aboutsummaryrefslogtreecommitdiff
path: root/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.cpp29
-rw-r--r--lib/CodeGen/AllocationOrder.cpp2
-rw-r--r--lib/CodeGen/AllocationOrder.h3
-rw-r--r--lib/CodeGen/Analysis.cpp73
-rw-r--r--lib/CodeGen/AsmPrinter/ARMException.cpp19
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp302
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp45
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp6
-rw-r--r--lib/CodeGen/AsmPrinter/CMakeLists.txt11
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.cpp42
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.h87
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp287
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfAccelTable.h290
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCFIException.cpp6
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp299
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.h68
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp427
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h56
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.cpp46
-rw-r--r--lib/CodeGen/AsmPrinter/LLVMBuild.txt22
-rw-r--r--lib/CodeGen/BranchFolding.cpp129
-rw-r--r--lib/CodeGen/CMakeLists.txt27
-rw-r--r--lib/CodeGen/CallingConvLower.cpp2
-rw-r--r--lib/CodeGen/CodeGen.cpp17
-rw-r--r--lib/CodeGen/CodePlacementOpt.cpp9
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.cpp72
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.h3
-rw-r--r--lib/CodeGen/DFAPacketizer.cpp223
-rw-r--r--lib/CodeGen/DeadMachineInstructionElim.cpp31
-rw-r--r--lib/CodeGen/DwarfEHPrepare.cpp703
-rw-r--r--lib/CodeGen/ELF.h227
-rw-r--r--lib/CodeGen/ELFCodeEmitter.cpp205
-rw-r--r--lib/CodeGen/ELFCodeEmitter.h78
-rw-r--r--lib/CodeGen/ELFWriter.cpp1105
-rw-r--r--lib/CodeGen/ELFWriter.h251
-rw-r--r--lib/CodeGen/EdgeBundles.cpp2
-rw-r--r--lib/CodeGen/ExecutionDepsFix.cpp526
-rw-r--r--lib/CodeGen/ExpandISelPseudos.cpp14
-rw-r--r--lib/CodeGen/ExpandPostRAPseudos.cpp12
-rw-r--r--lib/CodeGen/GCMetadata.cpp6
-rw-r--r--lib/CodeGen/GCStrategy.cpp160
-rw-r--r--lib/CodeGen/IfConversion.cpp132
-rw-r--r--lib/CodeGen/InlineSpiller.cpp191
-rw-r--r--lib/CodeGen/InterferenceCache.cpp35
-rw-r--r--lib/CodeGen/InterferenceCache.h15
-rw-r--r--lib/CodeGen/IntrinsicLowering.cpp5
-rw-r--r--lib/CodeGen/JITCodeEmitter.cpp14
-rw-r--r--lib/CodeGen/LLVMBuild.txt25
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp414
-rw-r--r--lib/CodeGen/LatencyPriorityQueue.cpp6
-rw-r--r--lib/CodeGen/LexicalScopes.cpp2
-rw-r--r--lib/CodeGen/LiveDebugVariables.cpp23
-rw-r--r--lib/CodeGen/LiveInterval.cpp36
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp2012
-rw-r--r--lib/CodeGen/LiveIntervalUnion.cpp2
-rw-r--r--lib/CodeGen/LiveIntervalUnion.h2
-rw-r--r--lib/CodeGen/LiveRangeCalc.cpp4
-rw-r--r--lib/CodeGen/LiveRangeEdit.cpp110
-rw-r--r--lib/CodeGen/LiveRangeEdit.h206
-rw-r--r--lib/CodeGen/LiveVariables.cpp114
-rw-r--r--lib/CodeGen/LocalStackSlotAllocation.cpp10
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp219
-rw-r--r--lib/CodeGen/MachineBlockFrequencyInfo.cpp2
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp1001
-rw-r--r--lib/CodeGen/MachineBranchProbabilityInfo.cpp65
-rw-r--r--lib/CodeGen/MachineCSE.cpp108
-rw-r--r--lib/CodeGen/MachineCodeEmitter.cpp14
-rw-r--r--lib/CodeGen/MachineCopyPropagation.cpp340
-rw-r--r--lib/CodeGen/MachineFunction.cpp93
-rw-r--r--lib/CodeGen/MachineFunctionAnalysis.cpp5
-rw-r--r--lib/CodeGen/MachineInstr.cpp254
-rw-r--r--lib/CodeGen/MachineInstrBundle.cpp278
-rw-r--r--lib/CodeGen/MachineLICM.cpp608
-rw-r--r--lib/CodeGen/MachineModuleInfo.cpp18
-rw-r--r--lib/CodeGen/MachinePassRegistry.cpp1
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp65
-rw-r--r--lib/CodeGen/MachineSSAUpdater.cpp7
-rw-r--r--lib/CodeGen/MachineScheduler.cpp614
-rw-r--r--lib/CodeGen/MachineSink.cpp182
-rw-r--r--lib/CodeGen/MachineVerifier.cpp462
-rw-r--r--lib/CodeGen/ObjectCodeEmitter.cpp141
-rw-r--r--lib/CodeGen/OptimizePHIs.cpp9
-rw-r--r--lib/CodeGen/PHIElimination.cpp13
-rw-r--r--lib/CodeGen/Passes.cpp607
-rw-r--r--lib/CodeGen/PeepholeOptimizer.cpp35
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp256
-rw-r--r--lib/CodeGen/ProcessImplicitDefs.cpp36
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp41
-rw-r--r--lib/CodeGen/PrologEpilogInserter.h4
-rw-r--r--lib/CodeGen/PseudoSourceValue.cpp2
-rw-r--r--lib/CodeGen/RegAllocBase.cpp280
-rw-r--r--lib/CodeGen/RegAllocBase.h36
-rw-r--r--lib/CodeGen/RegAllocBasic.cpp316
-rw-r--r--lib/CodeGen/RegAllocFast.cpp321
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp110
-rw-r--r--lib/CodeGen/RegAllocLinearScan.cpp1543
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp173
-rw-r--r--lib/CodeGen/RegisterClassInfo.cpp16
-rw-r--r--lib/CodeGen/RegisterClassInfo.h2
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp252
-rw-r--r--lib/CodeGen/RegisterCoalescer.h6
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp67
-rw-r--r--lib/CodeGen/RenderMachineFunction.cpp19
-rw-r--r--lib/CodeGen/ScheduleDAG.cpp62
-rw-r--r--lib/CodeGen/ScheduleDAGEmit.cpp68
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp654
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.h212
-rw-r--r--lib/CodeGen/ScheduleDAGPrinter.cpp24
-rw-r--r--lib/CodeGen/ScoreboardHazardRecognizer.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/CMakeLists.txt14
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp1120
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp215
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp67
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp117
-rw-r--r--lib/CodeGen/SelectionDAG/LLVMBuild.txt22
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp1251
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp10
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp134
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp20
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h2
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp13
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp140
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp163
-rw-r--r--lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp657
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp30
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp650
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp130
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h40
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp (renamed from lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp)85
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp1050
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp629
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h18
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp631
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp268
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp215
-rw-r--r--lib/CodeGen/ShadowStackGC.cpp3
-rw-r--r--lib/CodeGen/ShrinkWrapping.cpp7
-rw-r--r--lib/CodeGen/SjLjEHPrepare.cpp775
-rw-r--r--lib/CodeGen/SlotIndexes.cpp12
-rw-r--r--lib/CodeGen/Spiller.cpp81
-rw-r--r--lib/CodeGen/Spiller.h1
-rw-r--r--lib/CodeGen/SplitKit.cpp85
-rw-r--r--lib/CodeGen/SplitKit.h15
-rw-r--r--lib/CodeGen/Splitter.cpp827
-rw-r--r--lib/CodeGen/Splitter.h101
-rw-r--r--lib/CodeGen/StackProtector.cpp7
-rw-r--r--lib/CodeGen/StackSlotColoring.cpp358
-rw-r--r--lib/CodeGen/StrongPHIElimination.cpp18
-rw-r--r--lib/CodeGen/TailDuplication.cpp39
-rw-r--r--lib/CodeGen/TargetFrameLoweringImpl.cpp45
-rw-r--r--lib/CodeGen/TargetInstrInfoImpl.cpp106
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp123
-rw-r--r--lib/CodeGen/TargetOptionsImpl.cpp52
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp525
-rw-r--r--lib/CodeGen/VirtRegMap.cpp168
-rw-r--r--lib/CodeGen/VirtRegMap.h335
-rw-r--r--lib/CodeGen/VirtRegRewriter.cpp2633
-rw-r--r--lib/CodeGen/VirtRegRewriter.h32
159 files changed, 15273 insertions, 17695 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 25842a7876a2..822a564441ac 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -148,7 +148,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
assert(State == NULL);
State = new AggressiveAntiDepState(TRI->getNumRegs(), BB);
- bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn());
+ bool IsReturnBlock = (!BB->empty() && BB->back().isReturn());
std::vector<unsigned> &KillIndices = State->GetKillIndices();
std::vector<unsigned> &DefIndices = State->GetDefIndices();
@@ -157,7 +157,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
// In a return block, examine the function live-out regs.
for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
E = MRI.liveout_end(); I != E; ++I) {
- for (const unsigned *Alias = TRI->getOverlaps(*I);
+ for (const uint16_t *Alias = TRI->getOverlaps(*I);
unsigned Reg = *Alias; ++Alias) {
State->UnionGroups(Reg, 0);
KillIndices[Reg] = BB->size();
@@ -173,7 +173,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
SE = BB->succ_end(); SI != SE; ++SI)
for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
E = (*SI)->livein_end(); I != E; ++I) {
- for (const unsigned *Alias = TRI->getOverlaps(*I);
+ for (const uint16_t *Alias = TRI->getOverlaps(*I);
unsigned Reg = *Alias; ++Alias) {
State->UnionGroups(Reg, 0);
KillIndices[Reg] = BB->size();
@@ -186,10 +186,10 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
// callee-saved register that is not saved in the prolog.
const MachineFrameInfo *MFI = MF.getFrameInfo();
BitVector Pristine = MFI->getPristineRegs(BB);
- for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
+ for (const uint16_t *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
unsigned Reg = *I;
if (!IsReturnBlock && !Pristine.test(Reg)) continue;
- for (const unsigned *Alias = TRI->getOverlaps(Reg);
+ for (const uint16_t *Alias = TRI->getOverlaps(Reg);
unsigned AliasReg = *Alias; ++Alias) {
State->UnionGroups(AliasReg, 0);
KillIndices[AliasReg] = BB->size();
@@ -265,7 +265,7 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI,
IsImplicitDefUse(MI, MO)) {
const unsigned Reg = MO.getReg();
PassthruRegs.insert(Reg);
- for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
*Subreg; ++Subreg) {
PassthruRegs.insert(*Subreg);
}
@@ -333,7 +333,7 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag);
}
// Repeat for subregisters.
- for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
*Subreg; ++Subreg) {
unsigned SubregReg = *Subreg;
if (!State->IsLive(SubregReg)) {
@@ -384,7 +384,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
// If MI's defs have a special allocation requirement, don't allow
// any def registers to be changed. Also assume all registers
// defined in a call must not be changed (ABI).
- if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() ||
+ if (MI->isCall() || MI->hasExtraDefRegAllocReq() ||
TII->isPredicated(MI)) {
DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)");
State->UnionGroups(Reg, 0);
@@ -392,7 +392,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
// Any aliased that are live at this point are completely or
// partially defined here, so group those aliases with Reg.
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
unsigned AliasReg = *Alias;
if (State->IsLive(AliasReg)) {
State->UnionGroups(Reg, AliasReg);
@@ -423,7 +423,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
continue;
// Update def for Reg and aliases.
- for (const unsigned *Alias = TRI->getOverlaps(Reg);
+ for (const uint16_t *Alias = TRI->getOverlaps(Reg);
unsigned AliasReg = *Alias; ++Alias)
DefIndices[AliasReg] = Count;
}
@@ -451,8 +451,8 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
// instruction which may not be executed. The second R6 def may or may not
// re-define R6 so it's not safe to change it since the last R6 use cannot be
// changed.
- bool Special = MI->getDesc().isCall() ||
- MI->getDesc().hasExtraSrcRegAllocReq() ||
+ bool Special = MI->isCall() ||
+ MI->hasExtraSrcRegAllocReq() ||
TII->isPredicated(MI);
// Scan the register uses for this instruction and update
@@ -678,7 +678,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
goto next_super_reg;
} else {
bool found = false;
- for (const unsigned *Alias = TRI->getAliasSet(NewReg);
+ for (const uint16_t *Alias = TRI->getAliasSet(NewReg);
*Alias; ++Alias) {
unsigned AliasReg = *Alias;
if (State->IsLive(AliasReg) ||
@@ -780,6 +780,9 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
I != E; --Count) {
MachineInstr *MI = --I;
+ if (MI->isDebugValue())
+ continue;
+
DEBUG(dbgs() << "Anti: ");
DEBUG(MI->dump());
diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp
index 1005f102bea6..87f64311a655 100644
--- a/lib/CodeGen/AllocationOrder.cpp
+++ b/lib/CodeGen/AllocationOrder.cpp
@@ -41,7 +41,7 @@ AllocationOrder::AllocationOrder(unsigned VirtReg,
if (HintPair.first) {
const TargetRegisterInfo &TRI = VRM.getTargetRegInfo();
// The remaining allocation order may depend on the hint.
- ArrayRef<unsigned> Order =
+ ArrayRef<uint16_t> Order =
TRI.getRawAllocationOrder(RC, HintPair.first, Hint,
VRM.getMachineFunction());
if (Order.empty())
diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h
index d1e48a1f2e96..0ce7e0c3b5f6 100644
--- a/lib/CodeGen/AllocationOrder.h
+++ b/lib/CodeGen/AllocationOrder.h
@@ -34,8 +34,7 @@ public:
/// AllocationOrder - Create a new AllocationOrder for VirtReg.
/// @param VirtReg Virtual register to allocate for.
/// @param VRM Virtual register map for function.
- /// @param ReservedRegs Set of reserved registers as returned by
- /// TargetRegisterInfo::getReservedRegs().
+ /// @param RegClassInfo Information about reserved and allocatable registers.
AllocationOrder(unsigned VirtReg,
const VirtRegMap &VRM,
const RegisterClassInfo &RegClassInfo);
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index fafc01044d4f..00874d411378 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -1,4 +1,4 @@
-//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities --*- C++ ------*-===//
+//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities -----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/Instructions.h"
@@ -149,33 +150,37 @@ llvm::hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos,
/// consideration of global floating-point math flags.
///
ISD::CondCode llvm::getFCmpCondCode(FCmpInst::Predicate Pred) {
- ISD::CondCode FPC, FOC;
switch (Pred) {
- case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break;
- case FCmpInst::FCMP_OEQ: FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break;
- case FCmpInst::FCMP_OGT: FOC = ISD::SETGT; FPC = ISD::SETOGT; break;
- case FCmpInst::FCMP_OGE: FOC = ISD::SETGE; FPC = ISD::SETOGE; break;
- case FCmpInst::FCMP_OLT: FOC = ISD::SETLT; FPC = ISD::SETOLT; break;
- case FCmpInst::FCMP_OLE: FOC = ISD::SETLE; FPC = ISD::SETOLE; break;
- case FCmpInst::FCMP_ONE: FOC = ISD::SETNE; FPC = ISD::SETONE; break;
- case FCmpInst::FCMP_ORD: FOC = FPC = ISD::SETO; break;
- case FCmpInst::FCMP_UNO: FOC = FPC = ISD::SETUO; break;
- case FCmpInst::FCMP_UEQ: FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break;
- case FCmpInst::FCMP_UGT: FOC = ISD::SETGT; FPC = ISD::SETUGT; break;
- case FCmpInst::FCMP_UGE: FOC = ISD::SETGE; FPC = ISD::SETUGE; break;
- case FCmpInst::FCMP_ULT: FOC = ISD::SETLT; FPC = ISD::SETULT; break;
- case FCmpInst::FCMP_ULE: FOC = ISD::SETLE; FPC = ISD::SETULE; break;
- case FCmpInst::FCMP_UNE: FOC = ISD::SETNE; FPC = ISD::SETUNE; break;
- case FCmpInst::FCMP_TRUE: FOC = FPC = ISD::SETTRUE; break;
- default:
- llvm_unreachable("Invalid FCmp predicate opcode!");
- FOC = FPC = ISD::SETFALSE;
- break;
+ case FCmpInst::FCMP_FALSE: return ISD::SETFALSE;
+ case FCmpInst::FCMP_OEQ: return ISD::SETOEQ;
+ case FCmpInst::FCMP_OGT: return ISD::SETOGT;
+ case FCmpInst::FCMP_OGE: return ISD::SETOGE;
+ case FCmpInst::FCMP_OLT: return ISD::SETOLT;
+ case FCmpInst::FCMP_OLE: return ISD::SETOLE;
+ case FCmpInst::FCMP_ONE: return ISD::SETONE;
+ case FCmpInst::FCMP_ORD: return ISD::SETO;
+ case FCmpInst::FCMP_UNO: return ISD::SETUO;
+ case FCmpInst::FCMP_UEQ: return ISD::SETUEQ;
+ case FCmpInst::FCMP_UGT: return ISD::SETUGT;
+ case FCmpInst::FCMP_UGE: return ISD::SETUGE;
+ case FCmpInst::FCMP_ULT: return ISD::SETULT;
+ case FCmpInst::FCMP_ULE: return ISD::SETULE;
+ case FCmpInst::FCMP_UNE: return ISD::SETUNE;
+ case FCmpInst::FCMP_TRUE: return ISD::SETTRUE;
+ default: llvm_unreachable("Invalid FCmp predicate opcode!");
+ }
+}
+
+ISD::CondCode llvm::getFCmpCodeWithoutNaN(ISD::CondCode CC) {
+ switch (CC) {
+ case ISD::SETOEQ: case ISD::SETUEQ: return ISD::SETEQ;
+ case ISD::SETONE: case ISD::SETUNE: return ISD::SETNE;
+ case ISD::SETOLT: case ISD::SETULT: return ISD::SETLT;
+ case ISD::SETOLE: case ISD::SETULE: return ISD::SETLE;
+ case ISD::SETOGT: case ISD::SETUGT: return ISD::SETGT;
+ case ISD::SETOGE: case ISD::SETUGE: return ISD::SETGE;
+ default: return CC;
}
- if (NoNaNsFPMath)
- return FOC;
- else
- return FPC;
}
/// getICmpCondCode - Return the ISD condition code corresponding to
@@ -195,7 +200,6 @@ ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) {
case ICmpInst::ICMP_UGT: return ISD::SETUGT;
default:
llvm_unreachable("Invalid ICmp predicate opcode!");
- return ISD::SETNE;
}
}
@@ -221,12 +225,13 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
// longjmp on x86), it can end up causing miscompilation that has not
// been fully understood.
if (!Ret &&
- (!GuaranteedTailCallOpt || !isa<UnreachableInst>(Term))) return false;
+ (!TLI.getTargetMachine().Options.GuaranteedTailCallOpt ||
+ !isa<UnreachableInst>(Term))) return false;
// If I will have a chain, make sure no other instruction that will have a
// chain interposes between I and the return.
if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
- !I->isSafeToSpeculativelyExecute())
+ !isSafeToSpeculativelyExecute(I))
for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ;
--BBI) {
if (&*BBI == I)
@@ -235,7 +240,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
if (isa<DbgInfoIntrinsic>(BBI))
continue;
if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
- !BBI->isSafeToSpeculativelyExecute())
+ !isSafeToSpeculativelyExecute(BBI))
return false;
}
@@ -250,7 +255,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
// Conservatively require the attributes of the call to match those of
// the return. Ignore noalias because it doesn't affect the call sequence.
const Function *F = ExitBB->getParent();
- unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+ Attributes CallerRetAttr = F->getAttributes().getRetAttributes();
if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
return false;
@@ -285,12 +290,12 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
}
bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
- const TargetLowering &TLI) {
+ SDValue &Chain, const TargetLowering &TLI) {
const Function *F = DAG.getMachineFunction().getFunction();
// Conservatively require the attributes of the call to match those of
// the return. Ignore noalias because it doesn't affect the call sequence.
- unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+ Attributes CallerRetAttr = F->getAttributes().getRetAttributes();
if (CallerRetAttr & ~Attribute::NoAlias)
return false;
@@ -299,5 +304,5 @@ bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
return false;
// Check if the only use is a function return node.
- return TLI.isUsedByReturnOnly(Node);
+ return TLI.isUsedByReturnOnly(Node, Chain);
}
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
index 3f2387325360..b60fda86a6ba 100644
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -29,6 +29,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/ADT/SmallString.h"
@@ -36,6 +37,12 @@
#include "llvm/ADT/Twine.h"
using namespace llvm;
+cl::opt<bool>
+EnableARMEHABIDescriptors("arm-enable-ehabi-descriptors", cl::Hidden,
+ cl::desc("Generate ARM EHABI tables with unwinding descriptors"),
+ cl::init(false));
+
+
ARMException::ARMException(AsmPrinter *A)
: DwarfException(A),
shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false)
@@ -72,13 +79,15 @@ void ARMException::EndFunction() {
Asm->OutStreamer.EmitPersonality(PerSym);
}
- // Map all labels and get rid of any dead landing pads.
- MMI->TidyLandingPads();
+ if (EnableARMEHABIDescriptors) {
+ // Map all labels and get rid of any dead landing pads.
+ MMI->TidyLandingPads();
- Asm->OutStreamer.EmitHandlerData();
+ Asm->OutStreamer.EmitHandlerData();
- // Emit actual exception table
- EmitExceptionTable();
+ // Emit actual exception table
+ EmitExceptionTable();
+ }
}
Asm->OutStreamer.EmitFnEnd();
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 1999f3608788..b0b2ff4882af 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -100,6 +100,7 @@ AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer)
OutStreamer(Streamer),
LastMI(0), LastFn(0), Counter(~0U), SetCounter(0) {
DD = 0; DE = 0; MMI = 0; LI = 0;
+ CurrentFnSym = CurrentFnSymForSize = 0;
GCMetadataPrinters = 0;
VerboseAsm = Streamer.isVerboseAsm();
}
@@ -613,6 +614,10 @@ bool AsmPrinter::needsSEHMoves() {
MF->getFunction()->needsUnwindTableEntry();
}
+bool AsmPrinter::needsRelocationsForDwarfStringPool() const {
+ return MAI->doesDwarfUseRelocationsForStringPool();
+}
+
void AsmPrinter::emitPrologLabel(const MachineInstr &MI) {
MCSymbol *Label = MI.getOperand(0).getMCSymbol();
@@ -732,6 +737,18 @@ void AsmPrinter::EmitFunctionBody() {
OutStreamer.EmitRawText(StringRef("\tnop\n"));
}
+ const Function *F = MF->getFunction();
+ for (Function::const_iterator i = F->begin(), e = F->end(); i != e; ++i) {
+ const BasicBlock *BB = i;
+ if (!BB->hasAddressTaken())
+ continue;
+ MCSymbol *Sym = GetBlockAddressSymbol(BB);
+ if (Sym->isDefined())
+ continue;
+ OutStreamer.AddComment("Address of block that was removed by CodeGen");
+ OutStreamer.EmitLabel(Sym);
+ }
+
// Emit target-specific gunk after the function body.
EmitFunctionBodyEnd();
@@ -745,7 +762,8 @@ void AsmPrinter::EmitFunctionBody() {
const MCExpr *SizeExp =
MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(FnEndLabel, OutContext),
- MCSymbolRefExpr::Create(CurrentFnSym, OutContext),
+ MCSymbolRefExpr::Create(CurrentFnSymForSize,
+ OutContext),
OutContext);
OutStreamer.EmitELFSize(CurrentFnSym, SizeExp);
}
@@ -780,7 +798,7 @@ void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false);
- for (const unsigned *SR = TRI->getSuperRegisters(MLoc.getReg());
+ for (const uint16_t *SR = TRI->getSuperRegisters(MLoc.getReg());
*SR && Reg < 0; ++SR) {
Reg = TRI->getDwarfRegNum(*SR, false);
// FIXME: Get the bit range this register uses of the superregister
@@ -841,6 +859,12 @@ bool AsmPrinter::doFinalization(Module &M) {
EmitVisibility(Name, V, false);
}
+ // Emit module flags.
+ SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
+ M.getModuleFlagsMetadata(ModuleFlags);
+ if (!ModuleFlags.empty())
+ getObjFileLowering().emitModuleFlags(OutStreamer, ModuleFlags, Mang, TM);
+
// Finalize debug and EH information.
if (DE) {
{
@@ -929,6 +953,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
this->MF = &MF;
// Get the function symbol.
CurrentFnSym = Mang->getSymbol(MF.getFunction());
+ CurrentFnSymForSize = CurrentFnSym;
if (isVerbose())
LI = &getAnalysis<MachineLoopInfo>();
@@ -1120,7 +1145,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
const MCExpr *Value = 0;
switch (MJTI->getEntryKind()) {
case MachineJumpTableInfo::EK_Inline:
- llvm_unreachable("Cannot emit EK_Inline jump table entry"); break;
+ llvm_unreachable("Cannot emit EK_Inline jump table entry");
case MachineJumpTableInfo::EK_Custom32:
Value = TM.getTargetLowering()->LowerCustomJumpTableEntry(MJTI, MBB, UID,
OutContext);
@@ -1139,6 +1164,15 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
return;
}
+ case MachineJumpTableInfo::EK_GPRel64BlockAddress: {
+ // EK_GPRel64BlockAddress - Each entry is an address of block, encoded
+ // with a relocation as gp-relative, e.g.:
+ // .gpdword LBB123
+ MCSymbol *MBBSym = MBB->getSymbol();
+ OutStreamer.EmitGPRel64Value(MCSymbolRefExpr::Create(MBBSym, OutContext));
+ return;
+ }
+
case MachineJumpTableInfo::EK_LabelDifference32: {
// EK_LabelDifference32 - Each entry is the address of the block minus
// the address of the jump table. This is used for PIC jump tables where
@@ -1191,12 +1225,8 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
assert(GV->hasInitializer() && "Not a special LLVM global!");
- const TargetData *TD = TM.getTargetData();
- unsigned Align = Log2_32(TD->getPointerPrefAlignment());
if (GV->getName() == "llvm.global_ctors") {
- OutStreamer.SwitchSection(getObjFileLowering().getStaticCtorSection());
- EmitAlignment(Align);
- EmitXXStructorList(GV->getInitializer());
+ EmitXXStructorList(GV->getInitializer(), /* isCtor */ true);
if (TM.getRelocationModel() == Reloc::Static &&
MAI->hasStaticCtorDtorReferenceInStaticMode()) {
@@ -1208,9 +1238,7 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
}
if (GV->getName() == "llvm.global_dtors") {
- OutStreamer.SwitchSection(getObjFileLowering().getStaticDtorSection());
- EmitAlignment(Align);
- EmitXXStructorList(GV->getInitializer());
+ EmitXXStructorList(GV->getInitializer(), /* isCtor */ false);
if (TM.getRelocationModel() == Reloc::Static &&
MAI->hasStaticCtorDtorReferenceInStaticMode()) {
@@ -1240,7 +1268,7 @@ void AsmPrinter::EmitLLVMUsedList(const Constant *List) {
}
}
-typedef std::pair<int, Constant*> Structor;
+typedef std::pair<unsigned, Constant*> Structor;
static bool priority_order(const Structor& lhs, const Structor& rhs) {
return lhs.first < rhs.first;
@@ -1248,7 +1276,7 @@ static bool priority_order(const Structor& lhs, const Structor& rhs) {
/// EmitXXStructorList - Emit the ctor or dtor list taking into account the init
/// priority.
-void AsmPrinter::EmitXXStructorList(const Constant *List) {
+void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
// Should be an array of '{ int, void ()* }' structs. The first value is the
// init priority.
if (!isa<ConstantArray>(List)) return;
@@ -1274,19 +1302,20 @@ void AsmPrinter::EmitXXStructorList(const Constant *List) {
CS->getOperand(1)));
}
- // Emit the function pointers in reverse priority order.
- switch (MAI->getStructorOutputOrder()) {
- case Structors::None:
- break;
- case Structors::PriorityOrder:
- std::sort(Structors.begin(), Structors.end(), priority_order);
- break;
- case Structors::ReversePriorityOrder:
- std::sort(Structors.rbegin(), Structors.rend(), priority_order);
- break;
+ // Emit the function pointers in the target-specific order
+ const TargetData *TD = TM.getTargetData();
+ unsigned Align = Log2_32(TD->getPointerPrefAlignment());
+ std::stable_sort(Structors.begin(), Structors.end(), priority_order);
+ for (unsigned i = 0, e = Structors.size(); i != e; ++i) {
+ const MCSection *OutputSection =
+ (isCtor ?
+ getObjFileLowering().getStaticCtorSection(Structors[i].first) :
+ getObjFileLowering().getStaticDtorSection(Structors[i].first));
+ OutStreamer.SwitchSection(OutputSection);
+ if (OutStreamer.getCurrentSection() != OutStreamer.getPreviousSection())
+ EmitAlignment(Align);
+ EmitXXStructor(Structors[i].second);
}
- for (unsigned i = 0, e = Structors.size(); i != e; ++i)
- EmitGlobalConstant(Structors[i].second);
}
//===--------------------------------------------------------------------===//
@@ -1423,7 +1452,6 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
if (CE == 0) {
llvm_unreachable("Unknown constant value to lower!");
- return MCConstantExpr::Create(0, Ctx);
}
switch (CE->getOpcode()) {
@@ -1445,7 +1473,6 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
!AP.MF ? 0 : AP.MF->getFunction()->getParent());
report_fatal_error(OS.str());
}
- return MCConstantExpr::Create(0, Ctx);
case Instruction::GetElementPtr: {
const TargetData &TD = *AP.TM.getTargetData();
// Generate a symbolic expression for the byte address
@@ -1543,6 +1570,19 @@ static void EmitGlobalConstantImpl(const Constant *C, unsigned AddrSpace,
/// isRepeatedByteSequence - Determine whether the given value is
/// composed of a repeated sequence of identical bytes and return the
/// byte value. If it is not a repeated sequence, return -1.
+static int isRepeatedByteSequence(const ConstantDataSequential *V) {
+ StringRef Data = V->getRawDataValues();
+ assert(!Data.empty() && "Empty aggregates should be CAZ node");
+ char C = Data[0];
+ for (unsigned i = 1, e = Data.size(); i != e; ++i)
+ if (Data[i] != C) return -1;
+ return static_cast<uint8_t>(C); // Ensure 255 is not returned as -1.
+}
+
+
+/// isRepeatedByteSequence - Determine whether the given value is
+/// composed of a repeated sequence of identical bytes and return the
+/// byte value. If it is not a repeated sequence, return -1.
static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
@@ -1568,8 +1608,7 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
if (const ConstantArray *CA = dyn_cast<ConstantArray>(V)) {
// Make sure all array elements are sequences of the same repeated
// byte.
- if (CA->getNumOperands() == 0) return -1;
-
+ assert(CA->getNumOperands() != 0 && "Should be a CAZ");
int Byte = isRepeatedByteSequence(CA->getOperand(0), TM);
if (Byte == -1) return -1;
@@ -1580,37 +1619,92 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
}
return Byte;
}
+
+ if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V))
+ return isRepeatedByteSequence(CDS);
return -1;
}
-static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace,
- AsmPrinter &AP) {
- if (AddrSpace != 0 || !CA->isString()) {
- // Not a string. Print the values in successive locations.
-
- // See if we can aggregate some values. Make sure it can be
- // represented as a series of bytes of the constant value.
- int Value = isRepeatedByteSequence(CA, AP.TM);
-
- if (Value != -1) {
- uint64_t Bytes = AP.TM.getTargetData()->getTypeAllocSize(CA->getType());
- AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace);
+static void EmitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
+ unsigned AddrSpace,AsmPrinter &AP){
+
+ // See if we can aggregate this into a .fill, if so, emit it as such.
+ int Value = isRepeatedByteSequence(CDS, AP.TM);
+ if (Value != -1) {
+ uint64_t Bytes = AP.TM.getTargetData()->getTypeAllocSize(CDS->getType());
+ // Don't emit a 1-byte object as a .fill.
+ if (Bytes > 1)
+ return AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace);
+ }
+
+ // If this can be emitted with .ascii/.asciz, emit it as such.
+ if (CDS->isString())
+ return AP.OutStreamer.EmitBytes(CDS->getAsString(), AddrSpace);
+
+ // Otherwise, emit the values in successive locations.
+ unsigned ElementByteSize = CDS->getElementByteSize();
+ if (isa<IntegerType>(CDS->getElementType())) {
+ for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+ if (AP.isVerbose())
+ AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n",
+ CDS->getElementAsInteger(i));
+ AP.OutStreamer.EmitIntValue(CDS->getElementAsInteger(i),
+ ElementByteSize, AddrSpace);
+ }
+ } else if (ElementByteSize == 4) {
+ // FP Constants are printed as integer constants to avoid losing
+ // precision.
+ assert(CDS->getElementType()->isFloatTy());
+ for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+ union {
+ float F;
+ uint32_t I;
+ };
+
+ F = CDS->getElementAsFloat(i);
+ if (AP.isVerbose())
+ AP.OutStreamer.GetCommentOS() << "float " << F << '\n';
+ AP.OutStreamer.EmitIntValue(I, 4, AddrSpace);
}
- else {
- for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
- EmitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP);
+ } else {
+ assert(CDS->getElementType()->isDoubleTy());
+ for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+ union {
+ double F;
+ uint64_t I;
+ };
+
+ F = CDS->getElementAsDouble(i);
+ if (AP.isVerbose())
+ AP.OutStreamer.GetCommentOS() << "double " << F << '\n';
+ AP.OutStreamer.EmitIntValue(I, 8, AddrSpace);
}
- return;
}
- // Otherwise, it can be emitted as .ascii.
- SmallVector<char, 128> TmpVec;
- TmpVec.reserve(CA->getNumOperands());
- for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
- TmpVec.push_back(cast<ConstantInt>(CA->getOperand(i))->getZExtValue());
+ const TargetData &TD = *AP.TM.getTargetData();
+ unsigned Size = TD.getTypeAllocSize(CDS->getType());
+ unsigned EmittedSize = TD.getTypeAllocSize(CDS->getType()->getElementType()) *
+ CDS->getNumElements();
+ if (unsigned Padding = Size - EmittedSize)
+ AP.OutStreamer.EmitZeros(Padding, AddrSpace);
- AP.OutStreamer.EmitBytes(StringRef(TmpVec.data(), TmpVec.size()), AddrSpace);
+}
+
+static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace,
+ AsmPrinter &AP) {
+ // See if we can aggregate some values. Make sure it can be
+ // represented as a series of bytes of the constant value.
+ int Value = isRepeatedByteSequence(CA, AP.TM);
+
+ if (Value != -1) {
+ uint64_t Bytes = AP.TM.getTargetData()->getTypeAllocSize(CA->getType());
+ AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace);
+ }
+ else {
+ for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
+ EmitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP);
+ }
}
static void EmitGlobalConstantVector(const ConstantVector *CV,
@@ -1656,29 +1750,44 @@ static void EmitGlobalConstantStruct(const ConstantStruct *CS,
static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
AsmPrinter &AP) {
- // FP Constants are printed as integer constants to avoid losing
- // precision.
- if (CFP->getType()->isDoubleTy()) {
+ if (CFP->getType()->isHalfTy()) {
if (AP.isVerbose()) {
- double Val = CFP->getValueAPF().convertToDouble();
- AP.OutStreamer.GetCommentOS() << "double " << Val << '\n';
+ SmallString<10> Str;
+ CFP->getValueAPF().toString(Str);
+ AP.OutStreamer.GetCommentOS() << "half " << Str << '\n';
}
-
uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
- AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace);
+ AP.OutStreamer.EmitIntValue(Val, 2, AddrSpace);
return;
}
if (CFP->getType()->isFloatTy()) {
if (AP.isVerbose()) {
float Val = CFP->getValueAPF().convertToFloat();
- AP.OutStreamer.GetCommentOS() << "float " << Val << '\n';
+ uint64_t IntVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+ AP.OutStreamer.GetCommentOS() << "float " << Val << '\n'
+ << " (" << format("0x%x", IntVal) << ")\n";
}
uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
AP.OutStreamer.EmitIntValue(Val, 4, AddrSpace);
return;
}
+ // FP Constants are printed as integer constants to avoid losing
+ // precision.
+ if (CFP->getType()->isDoubleTy()) {
+ if (AP.isVerbose()) {
+ double Val = CFP->getValueAPF().convertToDouble();
+ uint64_t IntVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+ AP.OutStreamer.GetCommentOS() << "double " << Val << '\n'
+ << " (" << format("0x%lx", IntVal) << ")\n";
+ }
+
+ uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+ AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace);
+ return;
+ }
+
if (CFP->getType()->isX86_FP80Ty()) {
// all long double variants are printed as hex
// API needed to prevent premature destruction
@@ -1742,20 +1851,20 @@ static void EmitGlobalConstantLargeInt(const ConstantInt *CI,
static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
AsmPrinter &AP) {
- if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) {
- uint64_t Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
+ const TargetData *TD = AP.TM.getTargetData();
+ uint64_t Size = TD->getTypeAllocSize(CV->getType());
+ if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV))
return AP.OutStreamer.EmitZeros(Size, AddrSpace);
- }
if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
- unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
switch (Size) {
case 1:
case 2:
case 4:
case 8:
if (AP.isVerbose())
- AP.OutStreamer.GetCommentOS() << format("0x%llx\n", CI->getZExtValue());
+ AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n",
+ CI->getZExtValue());
AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace);
return;
default:
@@ -1764,29 +1873,45 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
}
}
- if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
- return EmitGlobalConstantArray(CVA, AddrSpace, AP);
-
- if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
- return EmitGlobalConstantStruct(CVS, AddrSpace, AP);
-
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV))
return EmitGlobalConstantFP(CFP, AddrSpace, AP);
if (isa<ConstantPointerNull>(CV)) {
- unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
AP.OutStreamer.EmitIntValue(0, Size, AddrSpace);
return;
}
+ if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(CV))
+ return EmitGlobalConstantDataSequential(CDS, AddrSpace, AP);
+
+ if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
+ return EmitGlobalConstantArray(CVA, AddrSpace, AP);
+
+ if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
+ return EmitGlobalConstantStruct(CVS, AddrSpace, AP);
+
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+ // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of
+ // vectors).
+ if (CE->getOpcode() == Instruction::BitCast)
+ return EmitGlobalConstantImpl(CE->getOperand(0), AddrSpace, AP);
+
+ if (Size > 8) {
+ // If the constant expression's size is greater than 64-bits, then we have
+ // to emit the value in chunks. Try to constant fold the value and emit it
+ // that way.
+ Constant *New = ConstantFoldConstantExpression(CE, TD);
+ if (New && New != CE)
+ return EmitGlobalConstantImpl(New, AddrSpace, AP);
+ }
+ }
+
if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
return EmitGlobalConstantVector(V, AddrSpace, AP);
-
+
// Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it
// thread the streamer with EmitValue.
- AP.OutStreamer.EmitValue(LowerConstant(CV, AP),
- AP.TM.getTargetData()->getTypeAllocSize(CV->getType()),
- AddrSpace);
+ AP.OutStreamer.EmitValue(LowerConstant(CV, AP), Size, AddrSpace);
}
/// EmitGlobalConstant - Print a general LLVM constant to the .s file.
@@ -1953,7 +2078,7 @@ static void EmitBasicBlockLoopComments(const MachineBasicBlock &MBB,
void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
// Emit an alignment directive for this block, if needed.
if (unsigned Align = MBB->getAlignment())
- EmitAlignment(Log2_32(Align));
+ EmitAlignment(Align);
// If the block has its address taken, emit any labels that were used to
// reference the block. It is possible that there is more than one label
@@ -1970,27 +2095,22 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
OutStreamer.EmitLabel(Syms[i]);
}
+ // Print some verbose block comments.
+ if (isVerbose()) {
+ if (const BasicBlock *BB = MBB->getBasicBlock())
+ if (BB->hasName())
+ OutStreamer.AddComment("%" + BB->getName());
+ EmitBasicBlockLoopComments(*MBB, LI, *this);
+ }
+
// Print the main label for the block.
if (MBB->pred_empty() || isBlockOnlyReachableByFallthrough(MBB)) {
if (isVerbose() && OutStreamer.hasRawTextSupport()) {
- if (const BasicBlock *BB = MBB->getBasicBlock())
- if (BB->hasName())
- OutStreamer.AddComment("%" + BB->getName());
-
- EmitBasicBlockLoopComments(*MBB, LI, *this);
-
// NOTE: Want this comment at start of line, don't emit with AddComment.
OutStreamer.EmitRawText(Twine(MAI->getCommentString()) + " BB#" +
Twine(MBB->getNumber()) + ":");
}
} else {
- if (isVerbose()) {
- if (const BasicBlock *BB = MBB->getBasicBlock())
- if (BB->hasName())
- OutStreamer.AddComment("%" + BB->getName());
- EmitBasicBlockLoopComments(*MBB, LI, *this);
- }
-
OutStreamer.EmitLabel(MBB->getSymbol());
}
}
@@ -2048,7 +2168,7 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
MachineInstr &MI = *II;
// If it is not a simple branch, we are in a table somewhere.
- if (!MI.getDesc().isBranch() || MI.getDesc().isIndirectBranch())
+ if (!MI.isBranch() || MI.isIndirectBranch())
return false;
// If we are the operands of one of the branches, this is not
@@ -2090,6 +2210,4 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) {
}
report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name));
- return 0;
}
-
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 4d6c28118427..90d511cbab0a 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -25,6 +25,7 @@
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -35,23 +36,8 @@ using namespace llvm;
void AsmPrinter::EmitSLEB128(int Value, const char *Desc) const {
if (isVerbose() && Desc)
OutStreamer.AddComment(Desc);
-
- if (MAI->hasLEB128()) {
- OutStreamer.EmitSLEB128IntValue(Value);
- return;
- }
- // If we don't have .sleb128, emit as .bytes.
- int Sign = Value >> (8 * sizeof(Value) - 1);
- bool IsMore;
-
- do {
- unsigned char Byte = static_cast<unsigned char>(Value & 0x7f);
- Value >>= 7;
- IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
- if (IsMore) Byte |= 0x80;
- OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0);
- } while (IsMore);
+ OutStreamer.EmitSLEB128IntValue(Value);
}
/// EmitULEB128 - emit the specified signed leb128 value.
@@ -60,25 +46,7 @@ void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc,
if (isVerbose() && Desc)
OutStreamer.AddComment(Desc);
- // FIXME: Should we add a PadTo option to the streamer?
- if (MAI->hasLEB128() && PadTo == 0) {
- OutStreamer.EmitULEB128IntValue(Value);
- return;
- }
-
- // If we don't have .uleb128 or we want to emit padding, emit as .bytes.
- do {
- unsigned char Byte = static_cast<unsigned char>(Value & 0x7f);
- Value >>= 7;
- if (Value || PadTo != 0) Byte |= 0x80;
- OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0);
- } while (Value);
-
- if (PadTo) {
- if (PadTo > 1)
- OutStreamer.EmitFill(PadTo - 1, 0x80/*fillval*/, 0/*addrspace*/);
- OutStreamer.EmitFill(1, 0/*fillval*/, 0/*addrspace*/);
- }
+ OutStreamer.EmitULEB128IntValue(Value, 0/*addrspace*/, PadTo);
}
/// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value.
@@ -143,7 +111,7 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const {
return 0;
switch (Encoding & 0x07) {
- default: assert(0 && "Invalid encoded value.");
+ default: llvm_unreachable("Invalid encoded value.");
case dwarf::DW_EH_PE_absptr: return TM.getTargetData()->getPointerSize();
case dwarf::DW_EH_PE_udata2: return 2;
case dwarf::DW_EH_PE_udata4: return 4;
@@ -177,9 +145,8 @@ void AsmPrinter::EmitReference(const GlobalValue *GV, unsigned Encoding)const{
void AsmPrinter::EmitSectionOffset(const MCSymbol *Label,
const MCSymbol *SectionLabel) const {
// On COFF targets, we have to emit the special .secrel32 directive.
- if (const char *SecOffDir = MAI->getDwarfSectionOffsetDirective()) {
- // FIXME: MCize.
- OutStreamer.EmitRawText(SecOffDir + Twine(Label->getName()));
+ if (MAI->getDwarfSectionOffsetDirective()) {
+ OutStreamer.EmitCOFFSecRel32(Label);
return;
}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 8eda889155a2..d60585465be0 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -326,7 +326,11 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
}
- if (OpNo >= MI->getNumOperands()) {
+ // We may have a location metadata attached to the end of the
+ // instruction, and at no point should see metadata at any
+ // other point while processing. It's an error if so.
+ if (OpNo >= MI->getNumOperands() ||
+ MI->getOperand(OpNo).isMetadata()) {
Error = true;
} else {
unsigned OpFlags = MI->getOperand(OpNo).getImm();
diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt
index 67d927348b54..58fe2ed9d357 100644
--- a/lib/CodeGen/AsmPrinter/CMakeLists.txt
+++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -4,6 +4,7 @@ add_llvm_library(LLVMAsmPrinter
AsmPrinterDwarf.cpp
AsmPrinterInlineAsm.cpp
DIE.cpp
+ DwarfAccelTable.cpp
DwarfCFIException.cpp
DwarfCompileUnit.cpp
DwarfDebug.cpp
@@ -11,13 +12,3 @@ add_llvm_library(LLVMAsmPrinter
OcamlGCPrinter.cpp
Win64Exception.cpp
)
-
-add_llvm_library_dependencies(LLVMAsmPrinter
- LLVMAnalysis
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMMCParser
- LLVMSupport
- LLVMTarget
- )
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 9c1ce761b0c5..3776848e3f47 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -112,15 +112,6 @@ DIE::~DIE() {
delete Children[i];
}
-/// addSiblingOffset - Add a sibling offset field to the front of the DIE.
-///
-DIEValue *DIE::addSiblingOffset(BumpPtrAllocator &A) {
- DIEInteger *DI = new (A) DIEInteger(0);
- Values.insert(Values.begin(), DI);
- Abbrev.AddFirstAttribute(dwarf::DW_AT_sibling, dwarf::DW_FORM_ref4);
- return DI;
-}
-
#ifndef NDEBUG
void DIE::print(raw_ostream &O, unsigned IncIndent) {
IndentCount += IncIndent;
@@ -174,6 +165,7 @@ void DIE::dump() {
}
#endif
+void DIEValue::anchor() { }
#ifndef NDEBUG
void DIEValue::dump() {
@@ -223,33 +215,14 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const {
case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer);
case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer);
case dwarf::DW_FORM_addr: return AP->getTargetData().getPointerSize();
- default: llvm_unreachable("DIE Value form not supported yet"); break;
+ default: llvm_unreachable("DIE Value form not supported yet");
}
- return 0;
}
#ifndef NDEBUG
void DIEInteger::print(raw_ostream &O) {
- O << "Int: " << (int64_t)Integer
- << format(" 0x%llx", (unsigned long long)Integer);
-}
-#endif
-
-//===----------------------------------------------------------------------===//
-// DIEString Implementation
-//===----------------------------------------------------------------------===//
-
-/// EmitValue - Emit string value.
-///
-void DIEString::EmitValue(AsmPrinter *AP, unsigned Form) const {
- AP->OutStreamer.EmitBytes(Str, /*addrspace*/0);
- // Emit nul terminator.
- AP->OutStreamer.EmitIntValue(0, 1, /*addrspace*/0);
-}
-
-#ifndef NDEBUG
-void DIEString::print(raw_ostream &O) {
- O << "Str: \"" << Str << "\"";
+ O << "Int: " << (int64_t)Integer << " 0x";
+ O.write_hex(Integer);
}
#endif
@@ -267,6 +240,7 @@ void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const {
///
unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const {
if (Form == dwarf::DW_FORM_data4) return 4;
+ if (Form == dwarf::DW_FORM_strp) return 4;
return AP->getTargetData().getPointerSize();
}
@@ -290,6 +264,7 @@ void DIEDelta::EmitValue(AsmPrinter *AP, unsigned Form) const {
///
unsigned DIEDelta::SizeOf(AsmPrinter *AP, unsigned Form) const {
if (Form == dwarf::DW_FORM_data4) return 4;
+ if (Form == dwarf::DW_FORM_strp) return 4;
return AP->getTargetData().getPointerSize();
}
@@ -335,7 +310,7 @@ unsigned DIEBlock::ComputeSize(AsmPrinter *AP) {
///
void DIEBlock::EmitValue(AsmPrinter *Asm, unsigned Form) const {
switch (Form) {
- default: assert(0 && "Improper form for block"); break;
+ default: llvm_unreachable("Improper form for block");
case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break;
case dwarf::DW_FORM_block2: Asm->EmitInt16(Size); break;
case dwarf::DW_FORM_block4: Asm->EmitInt32(Size); break;
@@ -355,9 +330,8 @@ unsigned DIEBlock::SizeOf(AsmPrinter *AP, unsigned Form) const {
case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
case dwarf::DW_FORM_block4: return Size + sizeof(int32_t);
case dwarf::DW_FORM_block: return Size + MCAsmInfo::getULEB128Size(Size);
- default: llvm_unreachable("Improper form for block"); break;
+ default: llvm_unreachable("Improper form for block");
}
- return 0;
}
#ifndef NDEBUG
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index 7d61f1edff4a..f93ea1b045b2 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -31,17 +31,17 @@ namespace llvm {
class DIEAbbrevData {
/// Attribute - Dwarf attribute code.
///
- unsigned Attribute;
+ uint16_t Attribute;
/// Form - Dwarf form code.
///
- unsigned Form;
+ uint16_t Form;
public:
- DIEAbbrevData(unsigned A, unsigned F) : Attribute(A), Form(F) {}
+ DIEAbbrevData(uint16_t A, uint16_t F) : Attribute(A), Form(F) {}
// Accessors.
- unsigned getAttribute() const { return Attribute; }
- unsigned getForm() const { return Form; }
+ uint16_t getAttribute() const { return Attribute; }
+ uint16_t getForm() const { return Form; }
/// Profile - Used to gather unique data for the abbreviation folding set.
///
@@ -54,41 +54,41 @@ namespace llvm {
class DIEAbbrev : public FoldingSetNode {
/// Tag - Dwarf tag code.
///
- unsigned Tag;
+ uint16_t Tag;
- /// Unique number for node.
+ /// ChildrenFlag - Dwarf children flag.
///
- unsigned Number;
+ uint16_t ChildrenFlag;
- /// ChildrenFlag - Dwarf children flag.
+ /// Unique number for node.
///
- unsigned ChildrenFlag;
+ unsigned Number;
/// Data - Raw data bytes for abbreviation.
///
SmallVector<DIEAbbrevData, 8> Data;
public:
- DIEAbbrev(unsigned T, unsigned C) : Tag(T), ChildrenFlag(C), Data() {}
+ DIEAbbrev(uint16_t T, uint16_t C) : Tag(T), ChildrenFlag(C), Data() {}
// Accessors.
- unsigned getTag() const { return Tag; }
+ uint16_t getTag() const { return Tag; }
unsigned getNumber() const { return Number; }
- unsigned getChildrenFlag() const { return ChildrenFlag; }
+ uint16_t getChildrenFlag() const { return ChildrenFlag; }
const SmallVector<DIEAbbrevData, 8> &getData() const { return Data; }
- void setTag(unsigned T) { Tag = T; }
- void setChildrenFlag(unsigned CF) { ChildrenFlag = CF; }
+ void setTag(uint16_t T) { Tag = T; }
+ void setChildrenFlag(uint16_t CF) { ChildrenFlag = CF; }
void setNumber(unsigned N) { Number = N; }
/// AddAttribute - Adds another set of attribute information to the
/// abbreviation.
- void AddAttribute(unsigned Attribute, unsigned Form) {
+ void AddAttribute(uint16_t Attribute, uint16_t Form) {
Data.push_back(DIEAbbrevData(Attribute, Form));
}
/// AddFirstAttribute - Adds a set of attribute information to the front
/// of the abbreviation.
- void AddFirstAttribute(unsigned Attribute, unsigned Form) {
+ void AddFirstAttribute(uint16_t Attribute, uint16_t Form) {
Data.insert(Data.begin(), DIEAbbrevData(Attribute, Form));
}
@@ -113,10 +113,6 @@ namespace llvm {
class DIE {
protected:
- /// Abbrev - Buffer for constructing abbreviation.
- ///
- DIEAbbrev Abbrev;
-
/// Offset - Offset in debug info section.
///
unsigned Offset;
@@ -125,6 +121,10 @@ namespace llvm {
///
unsigned Size;
+ /// Abbrev - Buffer for constructing abbreviation.
+ ///
+ DIEAbbrev Abbrev;
+
/// Children DIEs.
///
std::vector<DIE *> Children;
@@ -139,8 +139,8 @@ namespace llvm {
mutable unsigned IndentCount;
public:
explicit DIE(unsigned Tag)
- : Abbrev(Tag, dwarf::DW_CHILDREN_no), Offset(0),
- Size(0), Parent (0), IndentCount(0) {}
+ : Offset(0), Size(0), Abbrev(Tag, dwarf::DW_CHILDREN_no), Parent(0),
+ IndentCount(0) {}
virtual ~DIE();
// Accessors.
@@ -163,16 +163,6 @@ namespace llvm {
Values.push_back(Value);
}
- /// SiblingOffset - Return the offset of the debug information entry's
- /// sibling.
- unsigned getSiblingOffset() const { return Offset + Size; }
-
- /// addSiblingOffset - Add a sibling offset field to the front of the DIE.
- /// The caller is responsible for deleting the return value at or after the
- /// same time it destroys this DIE.
- ///
- DIEValue *addSiblingOffset(BumpPtrAllocator &A);
-
/// addChild - Add a child to the DIE.
///
void addChild(DIE *Child) {
@@ -195,12 +185,12 @@ namespace llvm {
/// DIEValue - A debug information entry value.
///
class DIEValue {
+ virtual void anchor();
public:
enum {
isInteger,
isString,
isLabel,
- isSectionOffset,
isDelta,
isEntry,
isBlock
@@ -276,33 +266,6 @@ namespace llvm {
};
//===--------------------------------------------------------------------===//
- /// DIEString - A string value DIE. This DIE keeps string reference only.
- ///
- class DIEString : public DIEValue {
- const StringRef Str;
- public:
- explicit DIEString(const StringRef S) : DIEValue(isString), Str(S) {}
-
- /// EmitValue - Emit string value.
- ///
- virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
-
- /// SizeOf - Determine size of string value in bytes.
- ///
- virtual unsigned SizeOf(AsmPrinter *AP, unsigned /*Form*/) const {
- return Str.size() + sizeof(char); // sizeof('\0');
- }
-
- // Implement isa/cast/dyncast.
- static bool classof(const DIEString *) { return true; }
- static bool classof(const DIEValue *S) { return S->getType() == isString; }
-
-#ifndef NDEBUG
- virtual void print(raw_ostream &O);
-#endif
- };
-
- //===--------------------------------------------------------------------===//
/// DIELabel - A label expression DIE.
//
class DIELabel : public DIEValue {
@@ -359,7 +322,7 @@ namespace llvm {
};
//===--------------------------------------------------------------------===//
- /// DIEntry - A pointer to another debug information entry. An instance of
+ /// DIEEntry - A pointer to another debug information entry. An instance of
/// this class can also be used as a proxy for a debug information entry not
/// yet defined (ie. types.)
class DIEEntry : public DIEValue {
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
new file mode 100644
index 000000000000..660684d1bea5
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
@@ -0,0 +1,287 @@
+//=-- llvm/CodeGen/DwarfAccelTable.cpp - Dwarf Accelerator Tables -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf accelerator tables.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfAccelTable.h"
+#include "DwarfDebug.h"
+#include "DIE.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+const char *DwarfAccelTable::Atom::AtomTypeString(enum AtomType AT) {
+ switch (AT) {
+ case eAtomTypeNULL: return "eAtomTypeNULL";
+ case eAtomTypeDIEOffset: return "eAtomTypeDIEOffset";
+ case eAtomTypeCUOffset: return "eAtomTypeCUOffset";
+ case eAtomTypeTag: return "eAtomTypeTag";
+ case eAtomTypeNameFlags: return "eAtomTypeNameFlags";
+ case eAtomTypeTypeFlags: return "eAtomTypeTypeFlags";
+ }
+ llvm_unreachable("invalid AtomType!");
+}
+
+// The general case would need to have a less hard coded size for the
+// length of the HeaderData, however, if we're constructing based on a
+// single Atom then we know it will always be: 4 + 4 + 2 + 2.
+DwarfAccelTable::DwarfAccelTable(DwarfAccelTable::Atom atom) :
+ Header(12),
+ HeaderData(atom) {
+}
+
+// The length of the header data is always going to be 4 + 4 + 4*NumAtoms.
+DwarfAccelTable::DwarfAccelTable(std::vector<DwarfAccelTable::Atom> &atomList) :
+ Header(8 + (atomList.size() * 4)),
+ HeaderData(atomList) {
+}
+
+DwarfAccelTable::~DwarfAccelTable() {
+ for (size_t i = 0, e = Data.size(); i < e; ++i)
+ delete Data[i];
+ for (StringMap<DataArray>::iterator
+ EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI)
+ for (DataArray::iterator DI = EI->second.begin(),
+ DE = EI->second.end(); DI != DE; ++DI)
+ delete (*DI);
+}
+
+void DwarfAccelTable::AddName(StringRef Name, DIE* die, char Flags) {
+ // If the string is in the list already then add this die to the list
+ // otherwise add a new one.
+ DataArray &DIEs = Entries[Name];
+ DIEs.push_back(new HashDataContents(die, Flags));
+}
+
+void DwarfAccelTable::ComputeBucketCount(void) {
+ // First get the number of unique hashes.
+ std::vector<uint32_t> uniques(Data.size());
+ for (size_t i = 0, e = Data.size(); i < e; ++i)
+ uniques[i] = Data[i]->HashValue;
+ array_pod_sort(uniques.begin(), uniques.end());
+ std::vector<uint32_t>::iterator p =
+ std::unique(uniques.begin(), uniques.end());
+ uint32_t num = std::distance(uniques.begin(), p);
+
+ // Then compute the bucket size, minimum of 1 bucket.
+ if (num > 1024) Header.bucket_count = num/4;
+ if (num > 16) Header.bucket_count = num/2;
+ else Header.bucket_count = num > 0 ? num : 1;
+
+ Header.hashes_count = num;
+}
+
+namespace {
+ // DIESorter - comparison predicate that sorts DIEs by their offset.
+ struct DIESorter {
+ bool operator()(const struct DwarfAccelTable::HashDataContents *A,
+ const struct DwarfAccelTable::HashDataContents *B) const {
+ return A->Die->getOffset() < B->Die->getOffset();
+ }
+ };
+}
+
+void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, const char *Prefix) {
+ // Create the individual hash data outputs.
+ for (StringMap<DataArray>::iterator
+ EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) {
+ struct HashData *Entry = new HashData((*EI).getKeyData());
+
+ // Unique the entries.
+ std::stable_sort(EI->second.begin(), EI->second.end(), DIESorter());
+ EI->second.erase(std::unique(EI->second.begin(), EI->second.end()),
+ EI->second.end());
+
+ for (DataArray::const_iterator DI = EI->second.begin(),
+ DE = EI->second.end();
+ DI != DE; ++DI)
+ Entry->addData((*DI));
+ Data.push_back(Entry);
+ }
+
+ // Figure out how many buckets we need, then compute the bucket
+ // contents and the final ordering. We'll emit the hashes and offsets
+ // by doing a walk during the emission phase. We add temporary
+ // symbols to the data so that we can reference them during the offset
+ // later, we'll emit them when we emit the data.
+ ComputeBucketCount();
+
+ // Compute bucket contents and final ordering.
+ Buckets.resize(Header.bucket_count);
+ for (size_t i = 0, e = Data.size(); i < e; ++i) {
+ uint32_t bucket = Data[i]->HashValue % Header.bucket_count;
+ Buckets[bucket].push_back(Data[i]);
+ Data[i]->Sym = Asm->GetTempSymbol(Prefix, i);
+ }
+}
+
+// Emits the header for the table via the AsmPrinter.
+void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) {
+ Asm->OutStreamer.AddComment("Header Magic");
+ Asm->EmitInt32(Header.magic);
+ Asm->OutStreamer.AddComment("Header Version");
+ Asm->EmitInt16(Header.version);
+ Asm->OutStreamer.AddComment("Header Hash Function");
+ Asm->EmitInt16(Header.hash_function);
+ Asm->OutStreamer.AddComment("Header Bucket Count");
+ Asm->EmitInt32(Header.bucket_count);
+ Asm->OutStreamer.AddComment("Header Hash Count");
+ Asm->EmitInt32(Header.hashes_count);
+ Asm->OutStreamer.AddComment("Header Data Length");
+ Asm->EmitInt32(Header.header_data_len);
+ Asm->OutStreamer.AddComment("HeaderData Die Offset Base");
+ Asm->EmitInt32(HeaderData.die_offset_base);
+ Asm->OutStreamer.AddComment("HeaderData Atom Count");
+ Asm->EmitInt32(HeaderData.Atoms.size());
+ for (size_t i = 0; i < HeaderData.Atoms.size(); i++) {
+ Atom A = HeaderData.Atoms[i];
+ Asm->OutStreamer.AddComment(Atom::AtomTypeString(A.type));
+ Asm->EmitInt16(A.type);
+ Asm->OutStreamer.AddComment(dwarf::FormEncodingString(A.form));
+ Asm->EmitInt16(A.form);
+ }
+}
+
+// Walk through and emit the buckets for the table. This will look
+// like a list of numbers of how many elements are in each bucket.
+void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) {
+ unsigned index = 0;
+ for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+ Asm->OutStreamer.AddComment("Bucket " + Twine(i));
+ if (Buckets[i].size() != 0)
+ Asm->EmitInt32(index);
+ else
+ Asm->EmitInt32(UINT32_MAX);
+ index += Buckets[i].size();
+ }
+}
+
+// Walk through the buckets and emit the individual hashes for each
+// bucket.
+void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) {
+ for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+ for (HashList::const_iterator HI = Buckets[i].begin(),
+ HE = Buckets[i].end(); HI != HE; ++HI) {
+ Asm->OutStreamer.AddComment("Hash in Bucket " + Twine(i));
+ Asm->EmitInt32((*HI)->HashValue);
+ }
+ }
+}
+
+// Walk through the buckets and emit the individual offsets for each
+// element in each bucket. This is done via a symbol subtraction from the
+// beginning of the section. The non-section symbol will be output later
+// when we emit the actual data.
+void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) {
+ for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+ for (HashList::const_iterator HI = Buckets[i].begin(),
+ HE = Buckets[i].end(); HI != HE; ++HI) {
+ Asm->OutStreamer.AddComment("Offset in Bucket " + Twine(i));
+ MCContext &Context = Asm->OutStreamer.getContext();
+ const MCExpr *Sub =
+ MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create((*HI)->Sym, Context),
+ MCSymbolRefExpr::Create(SecBegin, Context),
+ Context);
+ Asm->OutStreamer.EmitValue(Sub, sizeof(uint32_t), 0);
+ }
+ }
+}
+
+// Walk through the buckets and emit the full data for each element in
+// the bucket. For the string case emit the dies and the various offsets.
+// Terminate each HashData bucket with 0.
+void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) {
+ uint64_t PrevHash = UINT64_MAX;
+ for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+ for (HashList::const_iterator HI = Buckets[i].begin(),
+ HE = Buckets[i].end(); HI != HE; ++HI) {
+ // Remember to emit the label for our offset.
+ Asm->OutStreamer.EmitLabel((*HI)->Sym);
+ Asm->OutStreamer.AddComment((*HI)->Str);
+ Asm->EmitSectionOffset(D->getStringPoolEntry((*HI)->Str),
+ D->getStringPool());
+ Asm->OutStreamer.AddComment("Num DIEs");
+ Asm->EmitInt32((*HI)->Data.size());
+ for (std::vector<struct HashDataContents*>::const_iterator
+ DI = (*HI)->Data.begin(), DE = (*HI)->Data.end();
+ DI != DE; ++DI) {
+ // Emit the DIE offset
+ Asm->EmitInt32((*DI)->Die->getOffset());
+ // If we have multiple Atoms emit that info too.
+ // FIXME: A bit of a hack, we either emit only one atom or all info.
+ if (HeaderData.Atoms.size() > 1) {
+ Asm->EmitInt16((*DI)->Die->getTag());
+ Asm->EmitInt8((*DI)->Flags);
+ }
+ }
+ // Emit a 0 to terminate the data unless we have a hash collision.
+ if (PrevHash != (*HI)->HashValue)
+ Asm->EmitInt32(0);
+ PrevHash = (*HI)->HashValue;
+ }
+ }
+}
+
+// Emit the entire data structure to the output file.
+void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin,
+ DwarfDebug *D) {
+ // Emit the header.
+ EmitHeader(Asm);
+
+ // Emit the buckets.
+ EmitBuckets(Asm);
+
+ // Emit the hashes.
+ EmitHashes(Asm);
+
+ // Emit the offsets.
+ EmitOffsets(Asm, SecBegin);
+
+ // Emit the hash data.
+ EmitData(Asm, D);
+}
+
+#ifndef NDEBUG
+void DwarfAccelTable::print(raw_ostream &O) {
+
+ Header.print(O);
+ HeaderData.print(O);
+
+ O << "Entries: \n";
+ for (StringMap<DataArray>::const_iterator
+ EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) {
+ O << "Name: " << EI->getKeyData() << "\n";
+ for (DataArray::const_iterator DI = EI->second.begin(),
+ DE = EI->second.end();
+ DI != DE; ++DI)
+ (*DI)->print(O);
+ }
+
+ O << "Buckets and Hashes: \n";
+ for (size_t i = 0, e = Buckets.size(); i < e; ++i)
+ for (HashList::const_iterator HI = Buckets[i].begin(),
+ HE = Buckets[i].end(); HI != HE; ++HI)
+ (*HI)->print(O);
+
+ O << "Data: \n";
+ for (std::vector<HashData*>::const_iterator
+ DI = Data.begin(), DE = Data.end(); DI != DE; ++DI)
+ (*DI)->print(O);
+
+
+}
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
new file mode 100644
index 000000000000..2278d4c784f4
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
@@ -0,0 +1,290 @@
+//==-- llvm/CodeGen/DwarfAccelTable.h - Dwarf Accelerator Tables -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf accelerator tables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__
+#define CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+#include "DIE.h"
+#include <vector>
+#include <map>
+
+// The dwarf accelerator tables are an indirect hash table optimized
+// for null lookup rather than access to known data. They are output into
+// an on-disk format that looks like this:
+//
+// .-------------.
+// | HEADER |
+// |-------------|
+// | BUCKETS |
+// |-------------|
+// | HASHES |
+// |-------------|
+// | OFFSETS |
+// |-------------|
+// | DATA |
+// `-------------'
+//
+// where the header contains a magic number, version, type of hash function,
+// the number of buckets, total number of hashes, and room for a special
+// struct of data and the length of that struct.
+//
+// The buckets contain an index (e.g. 6) into the hashes array. The hashes
+// section contains all of the 32-bit hash values in contiguous memory, and
+// the offsets contain the offset into the data area for the particular
+// hash.
+//
+// For a lookup example, we could hash a function name and take it modulo the
+// number of buckets giving us our bucket. From there we take the bucket value
+// as an index into the hashes table and look at each successive hash as long
+// as the hash value is still the same modulo result (bucket value) as earlier.
+// If we have a match we look at that same entry in the offsets table and
+// grab the offset in the data for our final match.
+
+namespace llvm {
+
+class AsmPrinter;
+class DIE;
+class DwarfDebug;
+
+class DwarfAccelTable {
+
+ enum HashFunctionType {
+ eHashFunctionDJB = 0u
+ };
+
+ static uint32_t HashDJB (StringRef Str) {
+ uint32_t h = 5381;
+ for (unsigned i = 0, e = Str.size(); i != e; ++i)
+ h = ((h << 5) + h) + Str[i];
+ return h;
+ }
+
+ // Helper function to compute the number of buckets needed based on
+ // the number of unique hashes.
+ void ComputeBucketCount (void);
+
+ struct TableHeader {
+ uint32_t magic; // 'HASH' magic value to allow endian detection
+ uint16_t version; // Version number.
+ uint16_t hash_function; // The hash function enumeration that was used.
+ uint32_t bucket_count; // The number of buckets in this hash table.
+ uint32_t hashes_count; // The total number of unique hash values
+ // and hash data offsets in this table.
+ uint32_t header_data_len; // The bytes to skip to get to the hash
+ // indexes (buckets) for correct alignment.
+ // Also written to disk is the implementation specific header data.
+
+ static const uint32_t MagicHash = 0x48415348;
+
+ TableHeader (uint32_t data_len) :
+ magic (MagicHash), version (1), hash_function (eHashFunctionDJB),
+ bucket_count (0), hashes_count (0), header_data_len (data_len)
+ {}
+
+#ifndef NDEBUG
+ void print(raw_ostream &O) {
+ O << "Magic: " << format("0x%x", magic) << "\n"
+ << "Version: " << version << "\n"
+ << "Hash Function: " << hash_function << "\n"
+ << "Bucket Count: " << bucket_count << "\n"
+ << "Header Data Length: " << header_data_len << "\n";
+ }
+ void dump() { print(dbgs()); }
+#endif
+ };
+
+public:
+ // The HeaderData describes the form of each set of data. In general this
+ // is as a list of atoms (atom_count) where each atom contains a type
+ // (AtomType type) of data, and an encoding form (form). In the case of
+ // data that is referenced via DW_FORM_ref_* the die_offset_base is
+ // used to describe the offset for all forms in the list of atoms.
+ // This also serves as a public interface of sorts.
+ // When written to disk this will have the form:
+ //
+ // uint32_t die_offset_base
+ // uint32_t atom_count
+ // atom_count Atoms
+ enum AtomType {
+ eAtomTypeNULL = 0u,
+ eAtomTypeDIEOffset = 1u, // DIE offset, check form for encoding
+ eAtomTypeCUOffset = 2u, // DIE offset of the compiler unit header that
+ // contains the item in question
+ eAtomTypeTag = 3u, // DW_TAG_xxx value, should be encoded as
+ // DW_FORM_data1 (if no tags exceed 255) or
+ // DW_FORM_data2.
+ eAtomTypeNameFlags = 4u, // Flags from enum NameFlags
+ eAtomTypeTypeFlags = 5u // Flags from enum TypeFlags
+ };
+
+ enum TypeFlags {
+ eTypeFlagClassMask = 0x0000000fu,
+
+ // Always set for C++, only set for ObjC if this is the
+ // @implementation for a class.
+ eTypeFlagClassIsImplementation = ( 1u << 1 )
+ };
+
+ // Make these public so that they can be used as a general interface to
+ // the class.
+ struct Atom {
+ AtomType type; // enum AtomType
+ uint16_t form; // DWARF DW_FORM_ defines
+
+ Atom(AtomType type, uint16_t form) : type(type), form(form) {}
+ static const char * AtomTypeString(enum AtomType);
+#ifndef NDEBUG
+ void print(raw_ostream &O) {
+ O << "Type: " << AtomTypeString(type) << "\n"
+ << "Form: " << dwarf::FormEncodingString(form) << "\n";
+ }
+ void dump() {
+ print(dbgs());
+ }
+#endif
+ };
+
+ private:
+ struct TableHeaderData {
+
+ uint32_t die_offset_base;
+ std::vector<Atom> Atoms;
+
+ TableHeaderData(std::vector<DwarfAccelTable::Atom> &AtomList,
+ uint32_t offset = 0) :
+ die_offset_base(offset) {
+ for (size_t i = 0, e = AtomList.size(); i != e; ++i)
+ Atoms.push_back(AtomList[i]);
+ }
+
+ TableHeaderData(DwarfAccelTable::Atom Atom, uint32_t offset = 0)
+ : die_offset_base(offset) {
+ Atoms.push_back(Atom);
+ }
+
+#ifndef NDEBUG
+ void print (raw_ostream &O) {
+ O << "die_offset_base: " << die_offset_base << "\n";
+ for (size_t i = 0; i < Atoms.size(); i++)
+ Atoms[i].print(O);
+ }
+ void dump() {
+ print(dbgs());
+ }
+#endif
+ };
+
+ // The data itself consists of a str_offset, a count of the DIEs in the
+ // hash and the offsets to the DIEs themselves.
+ // On disk each data section is ended with a 0 KeyType as the end of the
+ // hash chain.
+ // On output this looks like:
+ // uint32_t str_offset
+ // uint32_t hash_data_count
+ // HashData[hash_data_count]
+public:
+ struct HashDataContents {
+ DIE *Die; // Offsets
+ char Flags; // Specific flags to output
+
+ HashDataContents(DIE *D, char Flags) :
+ Die(D),
+ Flags(Flags) { }
+ #ifndef NDEBUG
+ void print(raw_ostream &O) const {
+ O << " Offset: " << Die->getOffset() << "\n";
+ O << " Tag: " << dwarf::TagString(Die->getTag()) << "\n";
+ O << " Flags: " << Flags << "\n";
+ }
+ #endif
+ };
+private:
+ struct HashData {
+ StringRef Str;
+ uint32_t HashValue;
+ MCSymbol *Sym;
+ std::vector<struct HashDataContents*> Data; // offsets
+ HashData(StringRef S) : Str(S) {
+ HashValue = DwarfAccelTable::HashDJB(S);
+ }
+ void addData(struct HashDataContents *Datum) { Data.push_back(Datum); }
+ #ifndef NDEBUG
+ void print(raw_ostream &O) {
+ O << "Name: " << Str << "\n";
+ O << " Hash Value: " << format("0x%x", HashValue) << "\n";
+ O << " Symbol: " ;
+ if (Sym) Sym->print(O);
+ else O << "<none>";
+ O << "\n";
+ for (size_t i = 0; i < Data.size(); i++) {
+ O << " Offset: " << Data[i]->Die->getOffset() << "\n";
+ O << " Tag: " << dwarf::TagString(Data[i]->Die->getTag()) << "\n";
+ O << " Flags: " << Data[i]->Flags << "\n";
+ }
+ }
+ void dump() {
+ print(dbgs());
+ }
+ #endif
+ };
+
+ DwarfAccelTable(const DwarfAccelTable&); // DO NOT IMPLEMENT
+ void operator=(const DwarfAccelTable&); // DO NOT IMPLEMENT
+
+ // Internal Functions
+ void EmitHeader(AsmPrinter *);
+ void EmitBuckets(AsmPrinter *);
+ void EmitHashes(AsmPrinter *);
+ void EmitOffsets(AsmPrinter *, MCSymbol *);
+ void EmitData(AsmPrinter *, DwarfDebug *D);
+
+ // Output Variables
+ TableHeader Header;
+ TableHeaderData HeaderData;
+ std::vector<HashData*> Data;
+
+ // String Data
+ typedef std::vector<struct HashDataContents*> DataArray;
+ typedef StringMap<DataArray> StringEntries;
+ StringEntries Entries;
+
+ // Buckets/Hashes/Offsets
+ typedef std::vector<HashData*> HashList;
+ typedef std::vector<HashList> BucketList;
+ BucketList Buckets;
+ HashList Hashes;
+
+ // Public Implementation
+ public:
+ DwarfAccelTable(DwarfAccelTable::Atom);
+ DwarfAccelTable(std::vector<DwarfAccelTable::Atom> &);
+ ~DwarfAccelTable();
+ void AddName(StringRef, DIE*, char = 0);
+ void FinalizeTable(AsmPrinter *, const char *);
+ void Emit(AsmPrinter *, MCSymbol *, DwarfDebug *);
+#ifndef NDEBUG
+ void print(raw_ostream &O);
+ void dump() { print(dbgs()); }
+#endif
+};
+
+}
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 8ed4f4c43a7c..d975f1f97bea 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -142,12 +142,14 @@ void DwarfCFIException::EndFunction() {
Asm->OutStreamer.EmitCFIEndProc();
+ if (!shouldEmitPersonality)
+ return;
+
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
Asm->getFunctionNumber()));
// Map all labels and get rid of any dead landing pads.
MMI->TidyLandingPads();
- if (shouldEmitPersonality)
- EmitExceptionTable();
+ EmitExceptionTable();
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 6fe476d02ef7..69dc454ae1d7 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -13,12 +13,14 @@
#define DEBUG_TYPE "dwarfdebug"
+#include "DwarfAccelTable.h"
#include "DwarfCompileUnit.h"
#include "DwarfDebug.h"
#include "llvm/Constants.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Instructions.h"
#include "llvm/Analysis/DIBuilder.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetFrameLowering.h"
@@ -30,8 +32,9 @@
using namespace llvm;
/// CompileUnit - Compile unit constructor.
-CompileUnit::CompileUnit(unsigned I, DIE *D, AsmPrinter *A, DwarfDebug *DW)
- : ID(I), CUDie(D), Asm(A), DD(DW), IndexTyDie(0) {
+CompileUnit::CompileUnit(unsigned I, unsigned L, DIE *D, AsmPrinter *A,
+ DwarfDebug *DW)
+ : ID(I), Language(L), CUDie(D), Asm(A), DD(DW), IndexTyDie(0) {
DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1);
}
@@ -67,12 +70,19 @@ void CompileUnit::addSInt(DIE *Die, unsigned Attribute,
Die->addValue(Attribute, Form, Value);
}
-/// addString - Add a string attribute data and value. DIEString only
-/// keeps string reference.
-void CompileUnit::addString(DIE *Die, unsigned Attribute, unsigned Form,
- StringRef String) {
- DIEValue *Value = new (DIEValueAllocator) DIEString(String);
- Die->addValue(Attribute, Form, Value);
+/// addString - Add a string attribute data and value. We always emit a
+/// reference to the string pool instead of immediate strings so that DIEs have
+/// more predictable sizes.
+void CompileUnit::addString(DIE *Die, unsigned Attribute, StringRef String) {
+ MCSymbol *Symb = DD->getStringPoolEntry(String);
+ DIEValue *Value;
+ if (Asm->needsRelocationsForDwarfStringPool())
+ Value = new (DIEValueAllocator) DIELabel(Symb);
+ else {
+ MCSymbol *StringPool = DD->getStringPool();
+ Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool);
+ }
+ Die->addValue(Attribute, dwarf::DW_FORM_strp, Value);
}
/// addLabel - Add a Dwarf label attribute data and value.
@@ -98,7 +108,6 @@ void CompileUnit::addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form,
Die->addValue(Attribute, Form, createDIEEntry(Entry));
}
-
/// addBlock - Add block data.
///
void CompileUnit::addBlock(DIE *Die, unsigned Attribute, unsigned Form,
@@ -135,8 +144,7 @@ void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) {
unsigned Line = G.getLineNumber();
if (Line == 0)
return;
- unsigned FileID = DD->GetOrCreateSourceID(G.getFilename(),
- G.getDirectory());
+ unsigned FileID = DD->GetOrCreateSourceID(G.getFilename(), G.getDirectory());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -148,14 +156,14 @@ void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) {
// Verify subprogram.
if (!SP.Verify())
return;
- // If the line number is 0, don't add it.
- if (SP.getLineNumber() == 0)
- return;
+ // If the line number is 0, don't add it.
unsigned Line = SP.getLineNumber();
- if (!SP.getContext().Verify())
+ if (Line == 0)
return;
- unsigned FileID = DD->GetOrCreateSourceID(SP.getFilename(), SP.getDirectory());
+
+ unsigned FileID = DD->GetOrCreateSourceID(SP.getFilename(),
+ SP.getDirectory());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -169,9 +177,28 @@ void CompileUnit::addSourceLine(DIE *Die, DIType Ty) {
return;
unsigned Line = Ty.getLineNumber();
- if (Line == 0 || !Ty.getContext().Verify())
+ if (Line == 0)
return;
- unsigned FileID = DD->GetOrCreateSourceID(Ty.getFilename(), Ty.getDirectory());
+ unsigned FileID = DD->GetOrCreateSourceID(Ty.getFilename(),
+ Ty.getDirectory());
+ assert(FileID && "Invalid file id");
+ addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) {
+ // Verify type.
+ if (!Ty.Verify())
+ return;
+
+ unsigned Line = Ty.getLineNumber();
+ if (Line == 0)
+ return;
+ DIFile File = Ty.getFile();
+ unsigned FileID = DD->GetOrCreateSourceID(File.getFilename(),
+ File.getDirectory());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -458,7 +485,7 @@ static bool isTypeSigned(DIType Ty, int *SizeInBits) {
/// addConstantValue - Add constant value entry in variable DIE.
bool CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO,
DIType Ty) {
- assert (MO.isImm() && "Invalid machine operand!");
+ assert(MO.isImm() && "Invalid machine operand!");
DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
int SizeInBits = -1;
bool SignedConstant = isTypeSigned(Ty, &SizeInBits);
@@ -558,8 +585,8 @@ void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) {
Buffer.addChild(getOrCreateTemplateValueParameterDIE(
DITemplateValueParameter(Element)));
}
-
}
+
/// addToContextOwner - Add Die into the list of its context owner's children.
void CompileUnit::addToContextOwner(DIE *Die, DIDescriptor Context) {
if (Context.isType()) {
@@ -598,13 +625,29 @@ DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
assert(Ty.isDerivedType() && "Unknown kind of DIType");
constructTypeDIE(*TyDIE, DIDerivedType(Ty));
}
-
+ // If this is a named finished type then include it in the list of types
+ // for the accelerator tables.
+ if (!Ty.getName().empty() && !Ty.isForwardDecl()) {
+ bool IsImplementation = 0;
+ if (Ty.isCompositeType()) {
+ DICompositeType CT(Ty);
+ // A runtime language of 0 actually means C/C++ and that any
+ // non-negative value is some version of Objective-C/C++.
+ IsImplementation = (CT.getRunTimeLang() == 0) ||
+ CT.isObjcClassComplete();
+ }
+ unsigned Flags = IsImplementation ?
+ DwarfAccelTable::eTypeFlagClassIsImplementation : 0;
+ addAccelType(Ty.getName(), std::make_pair(TyDIE, Flags));
+ }
+
addToContextOwner(TyDIE, Ty.getContext());
return TyDIE;
}
/// addType - Add a new type attribute to the specified entity.
-void CompileUnit::addType(DIE *Entity, DIType Ty) {
+void CompileUnit::addType(DIE *Entity, DIType Ty,
+ unsigned Attribute) {
if (!Ty.Verify())
return;
@@ -612,7 +655,7 @@ void CompileUnit::addType(DIE *Entity, DIType Ty) {
DIEEntry *Entry = getDIEEntry(Ty);
// If it exists then use the existing value.
if (Entry) {
- Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
+ Entity->addValue(Attribute, dwarf::DW_FORM_ref4, Entry);
return;
}
@@ -622,7 +665,7 @@ void CompileUnit::addType(DIE *Entity, DIType Ty) {
// Set up proxy.
Entry = createDIEEntry(Buffer);
insertDIEEntry(Ty, Entry);
- Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
+ Entity->addValue(Attribute, dwarf::DW_FORM_ref4, Entry);
// If this is a complete composite type then include it in the
// list of global types.
@@ -662,7 +705,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) {
StringRef Name = BTy.getName();
// Add name if not anonymous or intermediate type.
if (!Name.empty())
- addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+ addString(&Buffer, dwarf::DW_AT_name, Name);
if (BTy.getTag() == dwarf::DW_TAG_unspecified_type) {
Buffer.setTag(dwarf::DW_TAG_unspecified_type);
@@ -671,8 +714,8 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) {
}
Buffer.setTag(dwarf::DW_TAG_base_type);
- addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
- BTy.getEncoding());
+ addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+ BTy.getEncoding());
uint64_t Size = BTy.getSizeInBits() >> 3;
addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
@@ -696,10 +739,10 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
// Add name if not anonymous or intermediate type.
if (!Name.empty())
- addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+ addString(&Buffer, dwarf::DW_AT_name, Name);
// Add size if non-zero (derived types might be zero-sized.)
- if (Size)
+ if (Size && Tag != dwarf::DW_TAG_pointer_type)
addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
// Add source line info if available and TyDesc is not a forward declaration.
@@ -755,8 +798,12 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
Buffer.addChild(Arg);
}
}
- // Add prototype flag.
- if (isPrototyped)
+ // Add prototype flag if we're dealing with a C language and the
+ // function has been prototyped.
+ if (isPrototyped &&
+ (Language == dwarf::DW_LANG_C89 ||
+ Language == dwarf::DW_LANG_C99 ||
+ Language == dwarf::DW_LANG_ObjC))
addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
}
break;
@@ -779,13 +826,13 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
DISubprogram SP(Element);
ElemDie = getOrCreateSubprogramDIE(DISubprogram(Element));
if (SP.isProtected())
- addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_protected);
else if (SP.isPrivate())
- addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_private);
else
- addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_public);
if (SP.isExplicit())
addUInt(ElemDie, dwarf::DW_AT_explicit, dwarf::DW_FORM_flag, 1);
@@ -793,15 +840,54 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
else if (Element.isVariable()) {
DIVariable DV(Element);
ElemDie = new DIE(dwarf::DW_TAG_variable);
- addString(ElemDie, dwarf::DW_AT_name, dwarf::DW_FORM_string,
- DV.getName());
+ addString(ElemDie, dwarf::DW_AT_name, DV.getName());
addType(ElemDie, DV.getType());
addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
addSourceLine(ElemDie, DV);
- } else if (Element.isDerivedType())
- ElemDie = createMemberDIE(DIDerivedType(Element));
- else
+ } else if (Element.isDerivedType()) {
+ DIDerivedType DDTy(Element);
+ if (DDTy.getTag() == dwarf::DW_TAG_friend) {
+ ElemDie = new DIE(dwarf::DW_TAG_friend);
+ addType(ElemDie, DDTy.getTypeDerivedFrom(), dwarf::DW_AT_friend);
+ } else
+ ElemDie = createMemberDIE(DIDerivedType(Element));
+ } else if (Element.isObjCProperty()) {
+ DIObjCProperty Property(Element);
+ ElemDie = new DIE(Property.getTag());
+ StringRef PropertyName = Property.getObjCPropertyName();
+ addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName);
+ addType(ElemDie, Property.getType());
+ addSourceLine(ElemDie, Property);
+ StringRef GetterName = Property.getObjCPropertyGetterName();
+ if (!GetterName.empty())
+ addString(ElemDie, dwarf::DW_AT_APPLE_property_getter, GetterName);
+ StringRef SetterName = Property.getObjCPropertySetterName();
+ if (!SetterName.empty())
+ addString(ElemDie, dwarf::DW_AT_APPLE_property_setter, SetterName);
+ unsigned PropertyAttributes = 0;
+ if (Property.isReadOnlyObjCProperty())
+ PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly;
+ if (Property.isReadWriteObjCProperty())
+ PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readwrite;
+ if (Property.isAssignObjCProperty())
+ PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_assign;
+ if (Property.isRetainObjCProperty())
+ PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_retain;
+ if (Property.isCopyObjCProperty())
+ PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_copy;
+ if (Property.isNonAtomicObjCProperty())
+ PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic;
+ if (PropertyAttributes)
+ addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, 0,
+ PropertyAttributes);
+
+ DIEEntry *Entry = getDIEEntry(Element);
+ if (!Entry) {
+ Entry = createDIEEntry(ElemDie);
+ insertDIEEntry(Element, Entry);
+ }
+ } else
continue;
Buffer.addChild(ElemDie);
}
@@ -809,11 +895,6 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
if (CTy.isAppleBlockExtension())
addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1);
- unsigned RLang = CTy.getRunTimeLang();
- if (RLang)
- addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class,
- dwarf::DW_FORM_data1, RLang);
-
DICompositeType ContainingType = CTy.getContainingType();
if (DIDescriptor(ContainingType).isCompositeType())
addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
@@ -827,7 +908,11 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
addUInt(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type,
dwarf::DW_FORM_flag, 1);
- if (Tag == dwarf::DW_TAG_class_type)
+ // Add template parameters to a class, structure or union types.
+ // FIXME: The support isn't in the metadata for this yet.
+ if (Tag == dwarf::DW_TAG_class_type ||
+ Tag == dwarf::DW_TAG_structure_type ||
+ Tag == dwarf::DW_TAG_union_type)
addTemplateParams(Buffer, CTy.getTemplateParams());
break;
@@ -838,11 +923,11 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
// Add name if not anonymous or intermediate type.
if (!Name.empty())
- addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+ addString(&Buffer, dwarf::DW_AT_name, Name);
if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type
|| Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type)
- {
+ {
// Add size if non-zero (derived types might be zero-sized.)
if (Size)
addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
@@ -857,6 +942,12 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
// Add source line info if available.
if (!CTy.isForwardDecl())
addSourceLine(&Buffer, CTy);
+
+ // No harm in adding the runtime language to the declaration.
+ unsigned RLang = CTy.getRunTimeLang();
+ if (RLang)
+ addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class,
+ dwarf::DW_FORM_data1, RLang);
}
}
@@ -870,7 +961,7 @@ CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) {
ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter);
addType(ParamDIE, TP.getType());
- addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TP.getName());
+ addString(ParamDIE, dwarf::DW_AT_name, TP.getName());
return ParamDIE;
}
@@ -885,7 +976,7 @@ CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV)
ParamDIE = new DIE(dwarf::DW_TAG_template_value_parameter);
addType(ParamDIE, TPV.getType());
if (!TPV.getName().empty())
- addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TPV.getName());
+ addString(ParamDIE, dwarf::DW_AT_name, TPV.getName());
addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata,
TPV.getValue());
return ParamDIE;
@@ -898,8 +989,11 @@ DIE *CompileUnit::getOrCreateNameSpace(DINameSpace NS) {
return NDie;
NDie = new DIE(dwarf::DW_TAG_namespace);
insertDIE(NS, NDie);
- if (!NS.getName().empty())
- addString(NDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, NS.getName());
+ if (!NS.getName().empty()) {
+ addString(NDie, dwarf::DW_AT_name, NS.getName());
+ addAccelNamespace(NS.getName(), NDie);
+ } else
+ addAccelNamespace("(anonymous namespace)", NDie);
addSourceLine(NDie, NS);
addToContextOwner(NDie, NS.getContext());
return NDie;
@@ -921,6 +1015,12 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
if (SPDie)
return SPDie;
+ DISubprogram SPDecl = SP.getFunctionDeclaration();
+ DIE *DeclDie = NULL;
+ if (SPDecl.isSubprogram()) {
+ DeclDie = getOrCreateSubprogramDIE(SPDecl);
+ }
+
SPDie = new DIE(dwarf::DW_TAG_subprogram);
// DW_TAG_inlined_subroutine may refer to this DIE.
@@ -932,25 +1032,36 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
// Add function template parameters.
addTemplateParams(*SPDie, SP.getTemplateParams());
+ // Unfortunately this code needs to stay here to work around
+ // a bug in older gdbs that requires the linkage name to resolve
+ // multiple template functions.
StringRef LinkageName = SP.getLinkageName();
if (!LinkageName.empty())
- addString(SPDie, dwarf::DW_AT_MIPS_linkage_name,
- dwarf::DW_FORM_string,
- getRealLinkageName(LinkageName));
+ addString(SPDie, dwarf::DW_AT_MIPS_linkage_name,
+ getRealLinkageName(LinkageName));
// If this DIE is going to refer declaration info using AT_specification
// then there is no need to add other attributes.
- if (SP.getFunctionDeclaration().isSubprogram())
+ if (DeclDie) {
+ // Refer function declaration directly.
+ addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
+ DeclDie);
+
return SPDie;
+ }
// Constructors and operators for anonymous aggregates do not have names.
if (!SP.getName().empty())
- addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string,
- SP.getName());
+ addString(SPDie, dwarf::DW_AT_name, SP.getName());
addSourceLine(SPDie, SP);
- if (SP.isPrototyped())
+ // Add the prototype if we have a prototype and we have a C like
+ // language.
+ if (SP.isPrototyped() &&
+ (Language == dwarf::DW_LANG_C89 ||
+ Language == dwarf::DW_LANG_C99 ||
+ Language == dwarf::DW_LANG_ObjC))
addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
// Add Return Type.
@@ -965,7 +1076,7 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
unsigned VK = SP.getVirtuality();
if (VK) {
- addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK);
+ addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK);
DIEBlock *Block = getDIEBlock();
addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex());
@@ -1052,31 +1163,30 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
insertDIE(N, VariableDIE);
// Add name.
- addString(VariableDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string,
- GV.getDisplayName());
+ addString(VariableDIE, dwarf::DW_AT_name, GV.getDisplayName());
StringRef LinkageName = GV.getLinkageName();
bool isGlobalVariable = GV.getGlobal() != NULL;
if (!LinkageName.empty() && isGlobalVariable)
- addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name,
- dwarf::DW_FORM_string,
- getRealLinkageName(LinkageName));
+ addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name,
+ getRealLinkageName(LinkageName));
// Add type.
DIType GTy = GV.getType();
addType(VariableDIE, GTy);
// Add scoping info.
- if (!GV.isLocalToUnit()) {
+ if (!GV.isLocalToUnit())
addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
- // Expose as global.
- addGlobal(GV.getName(), VariableDIE);
- }
+
// Add line number info.
addSourceLine(VariableDIE, GV);
// Add to context owner.
DIDescriptor GVContext = GV.getContext();
addToContextOwner(VariableDIE, GVContext);
// Add location.
+ bool addToAccelTable = false;
+ DIE *VariableSpecDIE = NULL;
if (isGlobalVariable) {
+ addToAccelTable = true;
DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
addLabel(Block, 0, dwarf::DW_FORM_udata,
@@ -1086,7 +1196,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
if (GVContext && GV.isDefinition() && !GVContext.isCompileUnit() &&
!GVContext.isFile() && !isSubprogramContext(GVContext)) {
// Create specification DIE.
- DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable);
+ VariableSpecDIE = new DIE(dwarf::DW_TAG_variable);
addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification,
dwarf::DW_FORM_ref4, VariableDIE);
addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block);
@@ -1095,11 +1205,12 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
addDie(VariableSpecDIE);
} else {
addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
- }
+ }
} else if (const ConstantInt *CI =
dyn_cast_or_null<ConstantInt>(GV.getConstant()))
addConstantValue(VariableDIE, CI, GTy.isUnsignedDIType());
else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) {
+ addToAccelTable = true;
// GV is a merged global.
DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
Value *Ptr = CE->getOperand(0);
@@ -1114,6 +1225,16 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
}
+ if (addToAccelTable) {
+ DIE *AddrDIE = VariableSpecDIE ? VariableSpecDIE : VariableDIE;
+ addAccelName(GV.getName(), AddrDIE);
+
+ // If the linkage name is different than the name, go ahead and output
+ // that as well into the name table.
+ if (GV.getLinkageName() != "" && GV.getName() != GV.getLinkageName())
+ addAccelName(GV.getLinkageName(), AddrDIE);
+ }
+
return;
}
@@ -1121,8 +1242,8 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){
DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type);
addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy);
- int64_t L = SR.getLo();
- int64_t H = SR.getHi();
+ uint64_t L = SR.getLo();
+ uint64_t H = SR.getHi();
// The L value defines the lower bounds which is typically zero for C/C++. The
// H value is the upper bounds. Values are 64 bit. H - L + 1 is the size
@@ -1135,8 +1256,8 @@ void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy)
return;
}
if (L)
- addSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L);
- addSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H);
+ addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L);
+ addUInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H);
Buffer.addChild(DW_Subrange);
}
@@ -1175,7 +1296,7 @@ void CompileUnit::constructArrayTypeDIE(DIE &Buffer,
DIE *CompileUnit::constructEnumTypeDIE(DIEnumerator ETy) {
DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator);
StringRef Name = ETy.getName();
- addString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+ addString(Enumerator, dwarf::DW_AT_name, Name);
int64_t Value = ETy.getEnumValue();
addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value);
return Enumerator;
@@ -1212,8 +1333,7 @@ DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) {
addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin,
dwarf::DW_FORM_ref4, AbsDIE);
else {
- addString(VariableDie, dwarf::DW_AT_name,
- dwarf::DW_FORM_string, Name);
+ addString(VariableDie, dwarf::DW_AT_name, Name);
addSourceLine(VariableDie, DV->getVariable());
addType(VariableDie, DV->getType());
}
@@ -1308,7 +1428,7 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) {
DIE *MemberDie = new DIE(DT.getTag());
StringRef Name = DT.getName();
if (!Name.empty())
- addString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+ addString(MemberDie, dwarf::DW_AT_name, Name);
addType(MemberDie, DT.getTypeDerivedFrom());
@@ -1366,32 +1486,35 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) {
addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie);
if (DT.isProtected())
- addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_protected);
else if (DT.isPrivate())
- addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_private);
// Otherwise C++ member and base classes are considered public.
else
- addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_public);
if (DT.isVirtual())
- addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag,
+ addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1,
dwarf::DW_VIRTUALITY_virtual);
// Objective-C properties.
+ if (MDNode *PNode = DT.getObjCProperty())
+ if (DIEEntry *PropertyDie = getDIEEntry(PNode))
+ MemberDie->addValue(dwarf::DW_AT_APPLE_property, dwarf::DW_FORM_ref4,
+ PropertyDie);
+
+ // This is only for backward compatibility.
StringRef PropertyName = DT.getObjCPropertyName();
if (!PropertyName.empty()) {
- addString(MemberDie, dwarf::DW_AT_APPLE_property_name, dwarf::DW_FORM_string,
- PropertyName);
+ addString(MemberDie, dwarf::DW_AT_APPLE_property_name, PropertyName);
StringRef GetterName = DT.getObjCPropertyGetterName();
if (!GetterName.empty())
- addString(MemberDie, dwarf::DW_AT_APPLE_property_getter,
- dwarf::DW_FORM_string, GetterName);
+ addString(MemberDie, dwarf::DW_AT_APPLE_property_getter, GetterName);
StringRef SetterName = DT.getObjCPropertySetterName();
if (!SetterName.empty())
- addString(MemberDie, dwarf::DW_AT_APPLE_property_setter,
- dwarf::DW_FORM_string, SetterName);
+ addString(MemberDie, dwarf::DW_AT_APPLE_property_setter, SetterName);
unsigned PropertyAttributes = 0;
if (DT.isReadOnlyObjCProperty())
PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly;
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 785926579fa4..45e407e27ffa 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -29,13 +29,17 @@ class ConstantInt;
class DbgVariable;
//===----------------------------------------------------------------------===//
-/// CompileUnit - This dwarf writer support class manages information associate
+/// CompileUnit - This dwarf writer support class manages information associated
/// with a source file.
class CompileUnit {
/// ID - File identifier for source.
///
unsigned ID;
+ /// Language - The DW_AT_language of the compile unit
+ ///
+ unsigned Language;
+
/// Die - Compile unit debug information entry.
///
const OwningPtr<DIE> CUDie;
@@ -56,14 +60,17 @@ class CompileUnit {
/// descriptors to debug information entries using a DIEEntry proxy.
DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap;
- /// Globals - A map of globally visible named entities for this unit.
- ///
- StringMap<DIE*> Globals;
-
/// GlobalTypes - A map of globally visible types for this unit.
///
StringMap<DIE*> GlobalTypes;
+ /// AccelNames - A map of names for the name accelerator table.
+ ///
+ StringMap<std::vector<DIE*> > AccelNames;
+ StringMap<std::vector<DIE*> > AccelObjC;
+ StringMap<std::vector<DIE*> > AccelNamespace;
+ StringMap<std::vector<std::pair<DIE*, unsigned> > > AccelTypes;
+
/// DIEBlocks - A list of all the DIEBlocks in use.
std::vector<DIEBlock *> DIEBlocks;
@@ -73,27 +80,56 @@ class CompileUnit {
DenseMap<DIE *, const MDNode *> ContainingTypeMap;
public:
- CompileUnit(unsigned I, DIE *D, AsmPrinter *A, DwarfDebug *DW);
+ CompileUnit(unsigned I, unsigned L, DIE *D, AsmPrinter *A, DwarfDebug *DW);
~CompileUnit();
// Accessors.
unsigned getID() const { return ID; }
+ unsigned getLanguage() const { return Language; }
DIE* getCUDie() const { return CUDie.get(); }
- const StringMap<DIE*> &getGlobals() const { return Globals; }
const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; }
+ const StringMap<std::vector<DIE*> > &getAccelNames() const {
+ return AccelNames;
+ }
+ const StringMap<std::vector<DIE*> > &getAccelObjC() const {
+ return AccelObjC;
+ }
+ const StringMap<std::vector<DIE*> > &getAccelNamespace() const {
+ return AccelNamespace;
+ }
+ const StringMap<std::vector<std::pair<DIE*, unsigned > > >
+ &getAccelTypes() const {
+ return AccelTypes;
+ }
+
/// hasContent - Return true if this compile unit has something to write out.
///
bool hasContent() const { return !CUDie->getChildren().empty(); }
- /// addGlobal - Add a new global entity to the compile unit.
- ///
- void addGlobal(StringRef Name, DIE *Die) { Globals[Name] = Die; }
-
/// addGlobalType - Add a new global type to the compile unit.
///
void addGlobalType(DIType Ty);
+
+ /// addAccelName - Add a new name to the name accelerator table.
+ void addAccelName(StringRef Name, DIE *Die) {
+ std::vector<DIE*> &DIEs = AccelNames[Name];
+ DIEs.push_back(Die);
+ }
+ void addAccelObjC(StringRef Name, DIE *Die) {
+ std::vector<DIE*> &DIEs = AccelObjC[Name];
+ DIEs.push_back(Die);
+ }
+ void addAccelNamespace(StringRef Name, DIE *Die) {
+ std::vector<DIE*> &DIEs = AccelNamespace[Name];
+ DIEs.push_back(Die);
+ }
+ void addAccelType(StringRef Name, std::pair<DIE *, unsigned> Die) {
+ std::vector<std::pair<DIE*, unsigned > > &DIEs = AccelTypes[Name];
+ DIEs.push_back(Die);
+ }
+
/// getDIE - Returns the debug information entry map slot for the
/// specified debug variable.
DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); }
@@ -150,8 +186,7 @@ public:
/// addString - Add a string attribute data and value.
///
- void addString(DIE *Die, unsigned Attribute, unsigned Form,
- const StringRef Str);
+ void addString(DIE *Die, unsigned Attribute, const StringRef Str);
/// addLabel - Add a Dwarf label attribute data and value.
///
@@ -178,6 +213,7 @@ public:
void addSourceLine(DIE *Die, DISubprogram SP);
void addSourceLine(DIE *Die, DIType Ty);
void addSourceLine(DIE *Die, DINameSpace NS);
+ void addSourceLine(DIE *Die, DIObjCProperty Ty);
/// addAddress - Add an address attribute to a die based on the location
/// provided.
@@ -225,8 +261,10 @@ public:
/// addToContextOwner - Add Die into the list of its context owner's children.
void addToContextOwner(DIE *Die, DIDescriptor Context);
- /// addType - Add a new type attribute to the specified entity.
- void addType(DIE *Entity, DIType Ty);
+ /// addType - Add a new type attribute to the specified entity. This takes
+ /// and attribute parameter because DW_AT_friend attributes are also
+ /// type references.
+ void addType(DIE *Entity, DIType Ty, unsigned Attribute = dwarf::DW_AT_type);
/// getOrCreateNameSpace - Create a DIE for DINameSpace.
DIE *getOrCreateNameSpace(DINameSpace NS);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 1b7e370fca09..cb7887890cda 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -14,10 +14,12 @@
#define DEBUG_TYPE "dwarfdebug"
#include "DwarfDebug.h"
#include "DIE.h"
+#include "DwarfAccelTable.h"
#include "DwarfCompileUnit.h"
#include "llvm/Constants.h"
#include "llvm/Module.h"
#include "llvm/Instructions.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -52,6 +54,10 @@ static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden,
cl::desc("Make an absence of debug location information explicit."),
cl::init(false));
+static cl::opt<bool> DwarfAccelTables("dwarf-accel-tables", cl::Hidden,
+ cl::desc("Output prototype dwarf accelerator tables."),
+ cl::init(false));
+
namespace {
const char *DWARFGroupName = "DWARF Emission";
const char *DbgTimerName = "DWARF Debug Writer";
@@ -128,6 +134,11 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
DwarfStrSectionSym = TextSectionSym = 0;
DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0;
FunctionBeginSym = FunctionEndSym = 0;
+
+ // Turn on accelerator tables for Darwin.
+ if (Triple(M->getTargetTriple()).isOSDarwin())
+ DwarfAccelTables = true;
+
{
NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
beginModule(M);
@@ -136,6 +147,22 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
DwarfDebug::~DwarfDebug() {
}
+/// EmitSectionSym - Switch to the specified MCSection and emit an assembler
+/// temporary label to it if SymbolStem is specified.
+static MCSymbol *EmitSectionSym(AsmPrinter *Asm, const MCSection *Section,
+ const char *SymbolStem = 0) {
+ Asm->OutStreamer.SwitchSection(Section);
+ if (!SymbolStem) return 0;
+
+ MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem);
+ Asm->OutStreamer.EmitLabel(TmpSym);
+ return TmpSym;
+}
+
+MCSymbol *DwarfDebug::getStringPool() {
+ return Asm->GetTempSymbol("section_str");
+}
+
MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) {
std::pair<MCSymbol*, unsigned> &Entry = StringPool[Str];
if (Entry.first) return Entry.first;
@@ -144,7 +171,6 @@ MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) {
return Entry.first = Asm->GetTempSymbol("string", Entry.second);
}
-
/// assignAbbrevNumber - Define a unique number for the abbreviation.
///
void DwarfDebug::assignAbbrevNumber(DIEAbbrev &Abbrev) {
@@ -178,6 +204,63 @@ static StringRef getRealLinkageName(StringRef LinkageName) {
return LinkageName;
}
+static bool isObjCClass(StringRef Name) {
+ return Name.startswith("+") || Name.startswith("-");
+}
+
+static bool hasObjCCategory(StringRef Name) {
+ if (!isObjCClass(Name)) return false;
+
+ size_t pos = Name.find(')');
+ if (pos != std::string::npos) {
+ if (Name[pos+1] != ' ') return false;
+ return true;
+ }
+ return false;
+}
+
+static void getObjCClassCategory(StringRef In, StringRef &Class,
+ StringRef &Category) {
+ if (!hasObjCCategory(In)) {
+ Class = In.slice(In.find('[') + 1, In.find(' '));
+ Category = "";
+ return;
+ }
+
+ Class = In.slice(In.find('[') + 1, In.find('('));
+ Category = In.slice(In.find('[') + 1, In.find(' '));
+ return;
+}
+
+static StringRef getObjCMethodName(StringRef In) {
+ return In.slice(In.find(' ') + 1, In.find(']'));
+}
+
+// Add the various names to the Dwarf accelerator table names.
+static void addSubprogramNames(CompileUnit *TheCU, DISubprogram SP,
+ DIE* Die) {
+ if (!SP.isDefinition()) return;
+
+ TheCU->addAccelName(SP.getName(), Die);
+
+ // If the linkage name is different than the name, go ahead and output
+ // that as well into the name table.
+ if (SP.getLinkageName() != "" && SP.getName() != SP.getLinkageName())
+ TheCU->addAccelName(SP.getLinkageName(), Die);
+
+ // If this is an Objective-C selector name add it to the ObjC accelerator
+ // too.
+ if (isObjCClass(SP.getName())) {
+ StringRef Class, Category;
+ getObjCClassCategory(SP.getName(), Class, Category);
+ TheCU->addAccelObjC(Class, Die);
+ if (Category != "")
+ TheCU->addAccelObjC(Category, Die);
+ // Also add the base method name to the name table.
+ TheCU->addAccelName(getObjCMethodName(SP.getName()), Die);
+ }
+}
+
/// updateSubprogramScopeDIE - Find DIE for the given subprogram and
/// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
/// If there are global variables in this scope then create and insert
@@ -190,11 +273,7 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
DISubprogram SP(SPNode);
DISubprogram SPDecl = SP.getFunctionDeclaration();
- if (SPDecl.isSubprogram())
- // Refer function declaration directly.
- SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
- SPCU->getOrCreateSubprogramDIE(SPDecl));
- else {
+ if (!SPDecl.isSubprogram()) {
// There is not any need to generate specification DIE for a function
// defined at compile unit level. If a function is defined inside another
// function then gdb prefers the definition at top level and but does not
@@ -203,7 +282,7 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
if (SP.isDefinition() && !SP.getContext().isCompileUnit() &&
!SP.getContext().isFile() &&
!isSubprogramContext(SP.getContext())) {
- SPCU-> addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+ SPCU->addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
// Add arguments.
DICompositeType SPTy = SP.getType();
@@ -241,6 +320,10 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
MachineLocation Location(RI->getFrameRegister(*Asm->MF));
SPCU->addAddress(SPDie, dwarf::DW_AT_frame_base, Location);
+ // Add name to the name table, we do this here because we're guaranteed
+ // to have concrete versions of our DW_TAG_subprogram nodes.
+ addSubprogramNames(SPCU, SP, SPDie);
+
return SPDie;
}
@@ -248,7 +331,6 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
/// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels.
DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU,
LexicalScope *Scope) {
-
DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block);
if (Scope->isAbstractScope())
return ScopeDIE;
@@ -294,10 +376,9 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU,
/// of the function.
DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
LexicalScope *Scope) {
-
const SmallVector<InsnRange, 4> &Ranges = Scope->getRanges();
- assert (Ranges.empty() == false
- && "LexicalScope does not have instruction markers!");
+ assert(Ranges.empty() == false &&
+ "LexicalScope does not have instruction markers!");
if (!Scope->getScopeNode())
return NULL;
@@ -314,8 +395,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
const MCSymbol *EndLabel = getLabelAfterInsn(RI->second);
if (StartLabel == 0 || EndLabel == 0) {
- assert (0 && "Unexpected Start and End labels for a inlined scope!");
- return 0;
+ llvm_unreachable("Unexpected Start and End labels for a inlined scope!");
}
assert(StartLabel->isDefined() &&
"Invalid starting label for an inlined scope!");
@@ -358,16 +438,20 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
I = InlineInfo.find(InlinedSP);
if (I == InlineInfo.end()) {
- InlineInfo[InlinedSP].push_back(std::make_pair(StartLabel,
- ScopeDIE));
+ InlineInfo[InlinedSP].push_back(std::make_pair(StartLabel, ScopeDIE));
InlinedSPNodes.push_back(InlinedSP);
} else
I->second.push_back(std::make_pair(StartLabel, ScopeDIE));
DILocation DL(Scope->getInlinedAt());
- TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, TheCU->getID());
+ TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0,
+ GetOrCreateSourceID(DL.getFilename(), DL.getDirectory()));
TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber());
+ // Add name to the name table, we do this here because we're guaranteed
+ // to have concrete versions of our DW_TAG_inlined_subprogram nodes.
+ addSubprogramNames(TheCU, InlinedSP, ScopeDIE);
+
return ScopeDIE;
}
@@ -376,7 +460,7 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
if (!Scope || !Scope->getScopeNode())
return NULL;
- SmallVector <DIE *, 8> Children;
+ SmallVector<DIE *, 8> Children;
// Collect arguments for current function.
if (LScopes.isCurrentFunctionScope(Scope))
@@ -426,39 +510,39 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
ScopeDIE->addChild(*I);
if (DS.isSubprogram())
- TheCU->addPubTypes(DISubprogram(DS));
+ TheCU->addPubTypes(DISubprogram(DS));
- return ScopeDIE;
+ return ScopeDIE;
}
/// GetOrCreateSourceID - Look up the source id with the given directory and
/// source file names. If none currently exists, create a new id and insert it
/// in the SourceIds map. This can update DirectoryNames and SourceFileNames
/// maps as well.
-
unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName,
StringRef DirName) {
// If FE did not provide a file name, then assume stdin.
if (FileName.empty())
return GetOrCreateSourceID("<stdin>", StringRef());
- // MCStream expects full path name as filename.
- if (!DirName.empty() && !sys::path::is_absolute(FileName)) {
- SmallString<128> FullPathName = DirName;
- sys::path::append(FullPathName, FileName);
- // Here FullPathName will be copied into StringMap by GetOrCreateSourceID.
- return GetOrCreateSourceID(StringRef(FullPathName), StringRef());
- }
+ // TODO: this might not belong here. See if we can factor this better.
+ if (DirName == CompilationDir)
+ DirName = "";
- StringMapEntry<unsigned> &Entry = SourceIdMap.GetOrCreateValue(FileName);
- if (Entry.getValue())
- return Entry.getValue();
+ unsigned SrcId = SourceIdMap.size()+1;
- unsigned SrcId = SourceIdMap.size();
- Entry.setValue(SrcId);
+ // We look up the file/dir pair by concatenating them with a zero byte.
+ SmallString<128> NamePair;
+ NamePair += DirName;
+ NamePair += '\0'; // Zero bytes are not allowed in paths.
+ NamePair += FileName;
+
+ StringMapEntry<unsigned> &Ent = SourceIdMap.GetOrCreateValue(NamePair, SrcId);
+ if (Ent.getValue() != SrcId)
+ return Ent.getValue();
// Print out a .file directive to specify files for .loc directives.
- Asm->OutStreamer.EmitDwarfFileDirective(SrcId, Entry.getKey());
+ Asm->OutStreamer.EmitDwarfFileDirective(SrcId, DirName, FileName);
return SrcId;
}
@@ -468,39 +552,36 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName,
CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
DICompileUnit DIUnit(N);
StringRef FN = DIUnit.getFilename();
- StringRef Dir = DIUnit.getDirectory();
- unsigned ID = GetOrCreateSourceID(FN, Dir);
+ CompilationDir = DIUnit.getDirectory();
+ unsigned ID = GetOrCreateSourceID(FN, CompilationDir);
DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
- CompileUnit *NewCU = new CompileUnit(ID, Die, Asm, this);
- NewCU->addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string,
- DIUnit.getProducer());
+ CompileUnit *NewCU = new CompileUnit(ID, DIUnit.getLanguage(), Die, Asm, this);
+ NewCU->addString(Die, dwarf::DW_AT_producer, DIUnit.getProducer());
NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
DIUnit.getLanguage());
- NewCU->addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN);
- // Use DW_AT_entry_pc instead of DW_AT_low_pc/DW_AT_high_pc pair. This
- // simplifies debug range entries.
- NewCU->addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_addr, 0);
+ NewCU->addString(Die, dwarf::DW_AT_name, FN);
+ // 2.17.1 requires that we use DW_AT_low_pc for a single entry point
+ // into an entity.
+ NewCU->addUInt(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0);
// DW_AT_stmt_list is a offset of line number information for this
// compile unit in debug_line section.
- if(Asm->MAI->doesDwarfRequireRelocationForSectionOffset())
+ if (Asm->MAI->doesDwarfRequireRelocationForSectionOffset())
NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
Asm->GetTempSymbol("section_line"));
else
NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
- if (!Dir.empty())
- NewCU->addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir);
+ if (!CompilationDir.empty())
+ NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
if (DIUnit.isOptimized())
NewCU->addUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
StringRef Flags = DIUnit.getFlags();
if (!Flags.empty())
- NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string,
- Flags);
+ NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, Flags);
- unsigned RVer = DIUnit.getRunTimeVersion();
- if (RVer)
+ if (unsigned RVer = DIUnit.getRunTimeVersion())
NewCU->addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
dwarf::DW_FORM_data1, RVer);
@@ -513,6 +594,11 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
/// construct SubprogramDIE - Construct subprogram DIE.
void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU,
const MDNode *N) {
+ CompileUnit *&CURef = SPMap[N];
+ if (CURef)
+ return;
+ CURef = TheCU;
+
DISubprogram SP(N);
if (!SP.isDefinition())
// This is a method declaration which will be handled while constructing
@@ -527,10 +613,6 @@ void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU,
// Add to context owner.
TheCU->addToContextOwner(SubprogramDie, SP.getContext());
- // Expose as global.
- TheCU->addGlobal(SP.getName(), SubprogramDie);
-
- SPMap[N] = TheCU;
return;
}
@@ -676,7 +758,7 @@ void DwarfDebug::endModule() {
// Construct subprogram DIE and add variables DIEs.
CompileUnit *SPCU = CUMap.lookup(TheCU);
- assert (SPCU && "Unable to find Compile Unit!");
+ assert(SPCU && "Unable to find Compile Unit!");
constructSubprogramDIE(SPCU, SP);
DIE *ScopeDIE = SPCU->getDIE(SP);
for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) {
@@ -697,6 +779,13 @@ void DwarfDebug::endModule() {
DIE *ISP = *AI;
FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
}
+ for (DenseMap<const MDNode *, DIE *>::iterator AI = AbstractSPDies.begin(),
+ AE = AbstractSPDies.end(); AI != AE; ++AI) {
+ DIE *ISP = AI->second;
+ if (InlinedSubprogramDIEs.count(ISP))
+ continue;
+ FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
+ }
// Emit DW_AT_containing_type attribute to connect types with their
// vtable holding type.
@@ -727,9 +816,14 @@ void DwarfDebug::endModule() {
// Corresponding abbreviations into a abbrev section.
emitAbbreviations();
- // Emit info into a debug pubnames section.
- emitDebugPubNames();
-
+ // Emit info into a dwarf accelerator table sections.
+ if (DwarfAccelTables) {
+ emitAccelNames();
+ emitAccelObjC();
+ emitAccelNamespaces();
+ emitAccelTypes();
+ }
+
// Emit info into a debug pubtypes section.
emitDebugPubTypes();
@@ -837,7 +931,7 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction *MF,
/// isDbgValueInDefinedReg - Return true if debug value, encoded by
/// DBG_VALUE instruction, is in a defined reg.
static bool isDbgValueInDefinedReg(const MachineInstr *MI) {
- assert (MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!");
+ assert(MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!");
return MI->getNumOperands() == 3 &&
MI->getOperand(0).isReg() && MI->getOperand(0).getReg() &&
MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0;
@@ -867,8 +961,7 @@ static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm,
if (MI->getOperand(0).isCImm())
return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getCImm());
- assert (0 && "Unexpected 3 operand DBG_VALUE instruction!");
- return DotDebugLocEntry();
+ llvm_unreachable("Unexpected 3 operand DBG_VALUE instruction!");
}
/// collectVariableInfo - Find variables for each lexical scope.
@@ -964,7 +1057,8 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
}
// The value is valid until the next DBG_VALUE or clobber.
- DotDebugLocEntries.push_back(getDebugLocEntry(Asm, FLabel, SLabel, Begin));
+ DotDebugLocEntries.push_back(getDebugLocEntry(Asm, FLabel, SLabel,
+ Begin));
}
DotDebugLocEntries.push_back(DotDebugLocEntry());
}
@@ -999,12 +1093,15 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
if (!MI->isDebugValue()) {
DebugLoc DL = MI->getDebugLoc();
if (DL != PrevInstLoc && (!DL.isUnknown() || UnknownLocations)) {
- unsigned Flags = DWARF2_FLAG_IS_STMT;
+ unsigned Flags = 0;
PrevInstLoc = DL;
if (DL == PrologEndLoc) {
Flags |= DWARF2_FLAG_PROLOGUE_END;
PrologEndLoc = DebugLoc();
}
+ if (PrologEndLoc.isUnknown())
+ Flags |= DWARF2_FLAG_IS_STMT;
+
if (!DL.isUnknown()) {
const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext());
recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags);
@@ -1099,12 +1196,19 @@ static MDNode *getScopeNode(DebugLoc DL, const LLVMContext &Ctx) {
}
/// getFnDebugLoc - Walk up the scope chain of given debug loc and find
-/// line number info for the function.
+/// line number info for the function.
static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) {
const MDNode *Scope = getScopeNode(DL, Ctx);
DISubprogram SP = getDISubprogram(Scope);
- if (SP.Verify())
- return DebugLoc::get(SP.getLineNumber(), 0, SP);
+ if (SP.Verify()) {
+ // Check for number of operands since the compatibility is
+ // cheap here.
+ if (SP->getNumOperands() > 19)
+ return DebugLoc::get(SP.getScopeLineNumber(), 0, SP);
+ else
+ return DebugLoc::get(SP.getLineNumber(), 0, SP);
+ }
+
return DebugLoc();
}
@@ -1135,7 +1239,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
const MachineInstr *MI = II;
if (MI->isDebugValue()) {
- assert (MI->getNumOperands() > 1 && "Invalid machine instruction!");
+ assert(MI->getNumOperands() > 1 && "Invalid machine instruction!");
// Keep track of user variables.
const MDNode *Var =
@@ -1206,7 +1310,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
MOE = MI->operands_end(); MOI != MOE; ++MOI) {
if (!MOI->isReg() || !MOI->isDef() || !MOI->getReg())
continue;
- for (const unsigned *AI = TRI->getOverlaps(MOI->getReg());
+ for (const uint16_t *AI = TRI->getOverlaps(MOI->getReg());
unsigned Reg = *AI; ++AI) {
const MDNode *Var = LiveUserVar[Reg];
if (!Var)
@@ -1277,7 +1381,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
MF->getFunction()->getContext());
recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(),
FnStartDL.getScope(MF->getFunction()->getContext()),
- DWARF2_FLAG_IS_STMT);
+ 0);
}
}
@@ -1303,7 +1407,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode());
- assert (TheCU && "Unable to find compile unit!");
+ assert(TheCU && "Unable to find compile unit!");
// Construct abstract scopes.
ArrayRef<LexicalScope *> AList = LScopes.getAbstractScopesList();
@@ -1327,7 +1431,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
DIE *CurFnDIE = constructScopeDIE(TheCU, FnScope);
- if (!DisableFramePointerElim(*MF))
+ if (!MF->getTarget().Options.DisableFramePointerElim(*MF))
TheCU->addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr,
dwarf::DW_FORM_flag, 1);
@@ -1380,7 +1484,7 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
Fn = DB.getFilename();
Dir = DB.getDirectory();
} else
- assert(0 && "Unexpected scope info");
+ llvm_unreachable("Unexpected scope info");
Src = GetOrCreateSourceID(Fn, Dir);
}
@@ -1398,10 +1502,6 @@ DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) {
// Get the children.
const std::vector<DIE *> &Children = Die->getChildren();
- // If not last sibling and has children then add sibling offset attribute.
- if (!Last && !Children.empty())
- Die->addSiblingOffset(DIEValueAllocator);
-
// Record the abbreviation.
assignAbbrevNumber(Die->getAbbrev());
@@ -1454,18 +1554,6 @@ void DwarfDebug::computeSizeAndOffsets() {
}
}
-/// EmitSectionSym - Switch to the specified MCSection and emit an assembler
-/// temporary label to it if SymbolStem is specified.
-static MCSymbol *EmitSectionSym(AsmPrinter *Asm, const MCSection *Section,
- const char *SymbolStem = 0) {
- Asm->OutStreamer.SwitchSection(Section);
- if (!SymbolStem) return 0;
-
- MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem);
- Asm->OutStreamer.EmitLabel(TmpSym);
- return TmpSym;
-}
-
/// EmitSectionLabels - Emit initial Dwarf sections with a label at
/// the start of each one.
void DwarfDebug::EmitSectionLabels() {
@@ -1483,7 +1571,6 @@ void DwarfDebug::EmitSectionLabels() {
EmitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line");
EmitSectionSym(Asm, TLOF.getDwarfLocSection());
- EmitSectionSym(Asm, TLOF.getDwarfPubNamesSection());
EmitSectionSym(Asm, TLOF.getDwarfPubTypesSection());
DwarfStrSectionSym =
EmitSectionSym(Asm, TLOF.getDwarfStrSection(), "section_str");
@@ -1525,9 +1612,6 @@ void DwarfDebug::emitDIE(DIE *Die) {
Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr));
switch (Attr) {
- case dwarf::DW_AT_sibling:
- Asm->EmitInt32(Die->getSiblingOffset());
- break;
case dwarf::DW_AT_abstract_origin: {
DIEEntry *E = cast<DIEEntry>(Values[i]);
DIE *Origin = E->getEntry();
@@ -1539,7 +1623,7 @@ void DwarfDebug::emitDIE(DIE *Die) {
// DW_AT_range Value encodes offset in debug_range section.
DIEInteger *V = cast<DIEInteger>(Values[i]);
- if (Asm->MAI->doesDwarfUsesLabelOffsetForRanges()) {
+ if (Asm->MAI->doesDwarfUseLabelOffsetForRanges()) {
Asm->EmitLabelPlusOffset(DwarfDebugRangeSectionSym,
V->getValue(),
4);
@@ -1678,62 +1762,133 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
Asm->EmitInt8(1);
}
-/// emitDebugPubNames - Emit visible names into a debug pubnames section.
-///
-void DwarfDebug::emitDebugPubNames() {
+/// emitAccelNames - Emit visible names into a hashed accelerator table
+/// section.
+void DwarfDebug::emitAccelNames() {
+ DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
+ dwarf::DW_FORM_data4));
for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
E = CUMap.end(); I != E; ++I) {
CompileUnit *TheCU = I->second;
- // Start the dwarf pubnames section.
- Asm->OutStreamer.SwitchSection(
- Asm->getObjFileLowering().getDwarfPubNamesSection());
+ const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelNames();
+ for (StringMap<std::vector<DIE*> >::const_iterator
+ GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) {
+ const char *Name = GI->getKeyData();
+ const std::vector<DIE *> &Entities = GI->second;
+ for (std::vector<DIE *>::const_iterator DI = Entities.begin(),
+ DE = Entities.end(); DI != DE; ++DI)
+ AT.AddName(Name, (*DI));
+ }
+ }
- Asm->OutStreamer.AddComment("Length of Public Names Info");
- Asm->EmitLabelDifference(
- Asm->GetTempSymbol("pubnames_end", TheCU->getID()),
- Asm->GetTempSymbol("pubnames_begin", TheCU->getID()), 4);
+ AT.FinalizeTable(Asm, "Names");
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfAccelNamesSection());
+ MCSymbol *SectionBegin = Asm->GetTempSymbol("names_begin");
+ Asm->OutStreamer.EmitLabel(SectionBegin);
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin",
- TheCU->getID()));
+ // Emit the full data.
+ AT.Emit(Asm, SectionBegin, this);
+}
- Asm->OutStreamer.AddComment("DWARF Version");
- Asm->EmitInt16(dwarf::DWARF_VERSION);
+/// emitAccelObjC - Emit objective C classes and categories into a hashed
+/// accelerator table section.
+void DwarfDebug::emitAccelObjC() {
+ DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
+ dwarf::DW_FORM_data4));
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+ E = CUMap.end(); I != E; ++I) {
+ CompileUnit *TheCU = I->second;
+ const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelObjC();
+ for (StringMap<std::vector<DIE*> >::const_iterator
+ GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) {
+ const char *Name = GI->getKeyData();
+ const std::vector<DIE *> &Entities = GI->second;
+ for (std::vector<DIE *>::const_iterator DI = Entities.begin(),
+ DE = Entities.end(); DI != DE; ++DI)
+ AT.AddName(Name, (*DI));
+ }
+ }
- Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
- Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()),
- DwarfInfoSectionSym);
+ AT.FinalizeTable(Asm, "ObjC");
+ Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering()
+ .getDwarfAccelObjCSection());
+ MCSymbol *SectionBegin = Asm->GetTempSymbol("objc_begin");
+ Asm->OutStreamer.EmitLabel(SectionBegin);
- Asm->OutStreamer.AddComment("Compilation Unit Length");
- Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()),
- Asm->GetTempSymbol("info_begin", TheCU->getID()),
- 4);
+ // Emit the full data.
+ AT.Emit(Asm, SectionBegin, this);
+}
- const StringMap<DIE*> &Globals = TheCU->getGlobals();
- for (StringMap<DIE*>::const_iterator
- GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
+/// emitAccelNamespace - Emit namespace dies into a hashed accelerator
+/// table.
+void DwarfDebug::emitAccelNamespaces() {
+ DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
+ dwarf::DW_FORM_data4));
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+ E = CUMap.end(); I != E; ++I) {
+ CompileUnit *TheCU = I->second;
+ const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelNamespace();
+ for (StringMap<std::vector<DIE*> >::const_iterator
+ GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) {
const char *Name = GI->getKeyData();
- DIE *Entity = GI->second;
+ const std::vector<DIE *> &Entities = GI->second;
+ for (std::vector<DIE *>::const_iterator DI = Entities.begin(),
+ DE = Entities.end(); DI != DE; ++DI)
+ AT.AddName(Name, (*DI));
+ }
+ }
- Asm->OutStreamer.AddComment("DIE offset");
- Asm->EmitInt32(Entity->getOffset());
+ AT.FinalizeTable(Asm, "namespac");
+ Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering()
+ .getDwarfAccelNamespaceSection());
+ MCSymbol *SectionBegin = Asm->GetTempSymbol("namespac_begin");
+ Asm->OutStreamer.EmitLabel(SectionBegin);
- if (Asm->isVerbose())
- Asm->OutStreamer.AddComment("External Name");
- Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0);
- }
+ // Emit the full data.
+ AT.Emit(Asm, SectionBegin, this);
+}
- Asm->OutStreamer.AddComment("End Mark");
- Asm->EmitInt32(0);
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end",
- TheCU->getID()));
+/// emitAccelTypes() - Emit type dies into a hashed accelerator table.
+void DwarfDebug::emitAccelTypes() {
+ std::vector<DwarfAccelTable::Atom> Atoms;
+ Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
+ dwarf::DW_FORM_data4));
+ Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeTag,
+ dwarf::DW_FORM_data2));
+ Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeTypeFlags,
+ dwarf::DW_FORM_data1));
+ DwarfAccelTable AT(Atoms);
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+ E = CUMap.end(); I != E; ++I) {
+ CompileUnit *TheCU = I->second;
+ const StringMap<std::vector<std::pair<DIE*, unsigned > > > &Names
+ = TheCU->getAccelTypes();
+ for (StringMap<std::vector<std::pair<DIE*, unsigned> > >::const_iterator
+ GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) {
+ const char *Name = GI->getKeyData();
+ const std::vector<std::pair<DIE *, unsigned> > &Entities = GI->second;
+ for (std::vector<std::pair<DIE *, unsigned> >::const_iterator DI
+ = Entities.begin(), DE = Entities.end(); DI !=DE; ++DI)
+ AT.AddName(Name, (*DI).first, (*DI).second);
+ }
}
+
+ AT.FinalizeTable(Asm, "types");
+ Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering()
+ .getDwarfAccelTypesSection());
+ MCSymbol *SectionBegin = Asm->GetTempSymbol("types_begin");
+ Asm->OutStreamer.EmitLabel(SectionBegin);
+
+ // Emit the full data.
+ AT.Emit(Asm, SectionBegin, this);
}
void DwarfDebug::emitDebugPubTypes() {
for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
E = CUMap.end(); I != E; ++I) {
CompileUnit *TheCU = I->second;
- // Start the dwarf pubnames section.
+ // Start the dwarf pubtypes section.
Asm->OutStreamer.SwitchSection(
Asm->getObjFileLowering().getDwarfPubTypesSection());
Asm->OutStreamer.AddComment("Length of Public Types Info");
@@ -1766,6 +1921,7 @@ void DwarfDebug::emitDebugPubTypes() {
Asm->EmitInt32(Entity->getOffset());
if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name");
+ // Emit the name with a terminating null byte.
Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1), 0);
}
@@ -1801,8 +1957,10 @@ void DwarfDebug::emitDebugStr() {
// Emit a label for reference from debug information entries.
Asm->OutStreamer.EmitLabel(Entries[i].second->getValue().first);
- // Emit the string itself.
- Asm->OutStreamer.EmitBytes(Entries[i].second->getKey(), 0/*addrspace*/);
+ // Emit the string itself with a terminating null byte.
+ Asm->OutStreamer.EmitBytes(StringRef(Entries[i].second->getKeyData(),
+ Entries[i].second->getKeyLength()+1),
+ 0/*addrspace*/);
}
}
@@ -1958,7 +2116,7 @@ void DwarfDebug::emitDebugMacInfo() {
/// __debug_info section, and the low_pc is the starting address for the
/// inlining instance.
void DwarfDebug::emitDebugInlineInfo() {
- if (!Asm->MAI->doesDwarfUsesInlineInfoSection())
+ if (!Asm->MAI->doesDwarfUseInlineInfoSection())
return;
if (!FirstCU)
@@ -1990,10 +2148,9 @@ void DwarfDebug::emitDebugInlineInfo() {
StringRef Name = SP.getName();
Asm->OutStreamer.AddComment("MIPS linkage name");
- if (LName.empty()) {
- Asm->OutStreamer.EmitBytes(Name, 0);
- Asm->OutStreamer.EmitIntValue(0, 1, 0); // nul terminator.
- } else
+ if (LName.empty())
+ Asm->EmitSectionOffset(getStringPoolEntry(Name), DwarfStrSectionSym);
+ else
Asm->EmitSectionOffset(getStringPoolEntry(getRealLinkageName(LName)),
DwarfStrSectionSym);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 35653be5c897..83f30f5b446f 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -30,7 +30,8 @@
namespace llvm {
class CompileUnit;
-class DbgConcreteScope;
+class ConstantInt;
+class ConstantFP;
class DbgVariable;
class MachineFrameInfo;
class MachineModuleInfo;
@@ -207,8 +208,8 @@ class DwarfDebug {
///
std::vector<DIEAbbrev *> Abbreviations;
- /// SourceIdMap - Source id map, i.e. pair of directory id and source file
- /// id mapped to a unique id.
+ /// SourceIdMap - Source id map, i.e. pair of source filename and directory,
+ /// separated by a zero byte, mapped to a unique id.
StringMap<unsigned> SourceIdMap;
/// StringPool - A String->Symbol mapping of strings used by indirect
@@ -216,8 +217,6 @@ class DwarfDebug {
StringMap<std::pair<MCSymbol*, unsigned> > StringPool;
unsigned NextStringPoolNumber;
- MCSymbol *getStringPoolEntry(StringRef Str);
-
/// SectionMap - Provides a unique id per text section.
///
UniqueVector<const MCSection*> SectionMap;
@@ -239,12 +238,12 @@ class DwarfDebug {
/// DotDebugLocEntries - Collection of DotDebugLocEntry.
SmallVector<DotDebugLocEntry, 4> DotDebugLocEntries;
- /// InliendSubprogramDIEs - Collection of subprgram DIEs that are marked
+ /// InlinedSubprogramDIEs - Collection of subprogram DIEs that are marked
/// (at the end of the module) as DW_AT_inline.
SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs;
/// InlineInfo - Keep track of inlined functions and their location. This
- /// information is used to populate debug_inlined section.
+ /// information is used to populate the debug_inlined section.
typedef std::pair<const MCSymbol *, DIE *> InlineInfoLabels;
DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> > InlineInfo;
SmallVector<const MDNode *, 4> InlinedSPNodes;
@@ -304,6 +303,10 @@ class DwarfDebug {
MCSymbol *DwarfDebugLocSectionSym;
MCSymbol *FunctionBeginSym, *FunctionEndSym;
+ // As an optimization, there is no need to emit an entry in the directory
+ // table for the same directory as DW_at_comp_dir.
+ StringRef CompilationDir;
+
private:
/// assignAbbrevNumber - Define a unique number for the abbreviation.
@@ -340,7 +343,7 @@ private:
/// the start of each one.
void EmitSectionLabels();
- /// emitDIE - Recusively Emits a debug information entry.
+ /// emitDIE - Recursively Emits a debug information entry.
///
void emitDIE(DIE *Die);
@@ -365,10 +368,22 @@ private:
///
void emitEndOfLineMatrix(unsigned SectionEnd);
- /// emitDebugPubNames - Emit visible names into a debug pubnames section.
- ///
- void emitDebugPubNames();
+ /// emitAccelNames - Emit visible names into a hashed accelerator table
+ /// section.
+ void emitAccelNames();
+
+ /// emitAccelObjC - Emit objective C classes and categories into a hashed
+ /// accelerator table section.
+ void emitAccelObjC();
+
+ /// emitAccelNamespace - Emit namespace dies into a hashed accelerator
+ /// table.
+ void emitAccelNamespaces();
+ /// emitAccelTypes() - Emit type dies into a hashed accelerator table.
+ ///
+ void emitAccelTypes();
+
/// emitDebugPubTypes - Emit visible types into a debug pubtypes section.
///
void emitDebugPubTypes();
@@ -407,10 +422,10 @@ private:
/// 3. an unsigned LEB128 number indicating the number of distinct inlining
/// instances for the function.
///
- /// The rest of the entry consists of a {die_offset, low_pc} pair for each
+ /// The rest of the entry consists of a {die_offset, low_pc} pair for each
/// inlined instance; the die_offset points to the inlined_subroutine die in
- /// the __debug_info section, and the low_pc is the starting address for the
- /// inlining instance.
+ /// the __debug_info section, and the low_pc is the starting address for the
+ /// inlining instance.
void emitDebugInlineInfo();
/// constructCompileUnit - Create new CompileUnit for the given
@@ -426,8 +441,8 @@ private:
void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope,
unsigned Flags);
- /// identifyScopeMarkers() - Indentify instructions that are marking
- /// beginning of or end of a scope.
+ /// identifyScopeMarkers() - Indentify instructions that are marking the
+ /// beginning of or ending of a scope.
void identifyScopeMarkers();
/// addCurrentFnArgument - If Var is an current function argument that add
@@ -472,7 +487,7 @@ public:
void collectInfoFromNamedMDNodes(Module *M);
/// collectLegacyDebugInfo - Collect debug info using DebugInfoFinder.
- /// FIXME - Remove this when dragon-egg and llvm-gcc switch to DIBuilder.
+ /// FIXME - Remove this when DragonEgg switches to DIBuilder.
bool collectLegacyDebugInfo(Module *M);
/// beginModule - Emit all Dwarf sections that should come prior to the
@@ -504,6 +519,13 @@ public:
/// createSubprogramDIE - Create new DIE using SP.
DIE *createSubprogramDIE(DISubprogram SP);
+
+ /// getStringPool - returns the entry into the start of the pool.
+ MCSymbol *getStringPool();
+
+ /// getStringPoolEntry - returns an entry into the string pool with the given
+ /// string text.
+ MCSymbol *getStringPoolEntry(StringRef Str);
};
} // End of namespace llvm
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 18b726b173dc..70cc2e56b3e1 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -31,6 +31,7 @@
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
@@ -184,7 +185,7 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
/// CallToNoUnwindFunction - Return `true' if this is a call to a function
/// marked `nounwind'. Return `false' otherwise.
bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) {
- assert(MI->getDesc().isCall() && "This should be a call instruction!");
+ assert(MI->isCall() && "This should be a call instruction!");
bool MarkedNoUnwind = false;
bool SawFunc = false;
@@ -243,7 +244,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
MI != E; ++MI) {
if (!MI->isLabel()) {
- if (MI->getDesc().isCall())
+ if (MI->isCall())
SawPotentiallyThrowing |= !CallToNoUnwindFunction(MI);
continue;
}
@@ -529,10 +530,8 @@ void DwarfException::EmitExceptionTable() {
// Offset of the landing pad, counted in 16-byte bundles relative to the
// @LPStart address.
if (VerboseAsm) {
- Asm->OutStreamer.AddComment(Twine(">> Call Site ") +
- llvm::utostr(idx) + " <<");
- Asm->OutStreamer.AddComment(Twine(" On exception at call site ") +
- llvm::utostr(idx));
+ Asm->OutStreamer.AddComment(">> Call Site " + Twine(idx) + " <<");
+ Asm->OutStreamer.AddComment(" On exception at call site "+Twine(idx));
}
Asm->EmitULEB128(idx);
@@ -543,8 +542,8 @@ void DwarfException::EmitExceptionTable() {
if (S.Action == 0)
Asm->OutStreamer.AddComment(" Action: cleanup");
else
- Asm->OutStreamer.AddComment(Twine(" Action: ") +
- llvm::utostr((S.Action - 1) / 2 + 1));
+ Asm->OutStreamer.AddComment(" Action: " +
+ Twine((S.Action - 1) / 2 + 1));
}
Asm->EmitULEB128(S.Action);
}
@@ -596,8 +595,7 @@ void DwarfException::EmitExceptionTable() {
// number of 16-byte bundles. The first call site is counted relative to
// the start of the procedure fragment.
if (VerboseAsm)
- Asm->OutStreamer.AddComment(Twine(">> Call Site ") +
- llvm::utostr(++Entry) + " <<");
+ Asm->OutStreamer.AddComment(">> Call Site " + Twine(++Entry) + " <<");
Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4);
if (VerboseAsm)
Asm->OutStreamer.AddComment(Twine(" Call between ") +
@@ -625,8 +623,8 @@ void DwarfException::EmitExceptionTable() {
if (S.Action == 0)
Asm->OutStreamer.AddComment(" On action: cleanup");
else
- Asm->OutStreamer.AddComment(Twine(" On action: ") +
- llvm::utostr((S.Action - 1) / 2 + 1));
+ Asm->OutStreamer.AddComment(" On action: " +
+ Twine((S.Action - 1) / 2 + 1));
}
Asm->EmitULEB128(S.Action);
}
@@ -640,8 +638,7 @@ void DwarfException::EmitExceptionTable() {
if (VerboseAsm) {
// Emit comments that decode the action table.
- Asm->OutStreamer.AddComment(Twine(">> Action Record ") +
- llvm::utostr(++Entry) + " <<");
+ Asm->OutStreamer.AddComment(">> Action Record " + Twine(++Entry) + " <<");
}
// Type Filter
@@ -650,11 +647,11 @@ void DwarfException::EmitExceptionTable() {
// type of the catch clauses or the types in the exception specification.
if (VerboseAsm) {
if (Action.ValueForTypeID > 0)
- Asm->OutStreamer.AddComment(Twine(" Catch TypeInfo ") +
- llvm::itostr(Action.ValueForTypeID));
+ Asm->OutStreamer.AddComment(" Catch TypeInfo " +
+ Twine(Action.ValueForTypeID));
else if (Action.ValueForTypeID < 0)
- Asm->OutStreamer.AddComment(Twine(" Filter TypeInfo ") +
- llvm::itostr(Action.ValueForTypeID));
+ Asm->OutStreamer.AddComment(" Filter TypeInfo " +
+ Twine(Action.ValueForTypeID));
else
Asm->OutStreamer.AddComment(" Cleanup");
}
@@ -669,8 +666,7 @@ void DwarfException::EmitExceptionTable() {
Asm->OutStreamer.AddComment(" No further actions");
} else {
unsigned NextAction = Entry + (Action.NextAction + 1) / 2;
- Asm->OutStreamer.AddComment(Twine(" Continue to action ") +
- llvm::utostr(NextAction));
+ Asm->OutStreamer.AddComment(" Continue to action "+Twine(NextAction));
}
}
Asm->EmitSLEB128(Action.NextAction);
@@ -687,7 +683,7 @@ void DwarfException::EmitExceptionTable() {
I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
const GlobalVariable *GV = *I;
if (VerboseAsm)
- Asm->OutStreamer.AddComment(Twine("TypeInfo ") + llvm::utostr(Entry--));
+ Asm->OutStreamer.AddComment("TypeInfo " + Twine(Entry--));
if (GV)
Asm->EmitReference(GV, TTypeEncoding);
else
@@ -707,7 +703,7 @@ void DwarfException::EmitExceptionTable() {
if (VerboseAsm) {
--Entry;
if (TypeID != 0)
- Asm->OutStreamer.AddComment(Twine("FilterInfo ") + llvm::itostr(Entry));
+ Asm->OutStreamer.AddComment("FilterInfo " + Twine(Entry));
}
Asm->EmitULEB128(TypeID);
@@ -719,17 +715,17 @@ void DwarfException::EmitExceptionTable() {
/// EndModule - Emit all exception information that should come after the
/// content.
void DwarfException::EndModule() {
- assert(0 && "Should be implemented");
+ llvm_unreachable("Should be implemented");
}
/// BeginFunction - Gather pre-function exception information. Assumes it's
/// being emitted immediately after the function entry point.
void DwarfException::BeginFunction(const MachineFunction *MF) {
- assert(0 && "Should be implemented");
+ llvm_unreachable("Should be implemented");
}
/// EndFunction - Gather and emit post-function exception information.
///
void DwarfException::EndFunction() {
- assert(0 && "Should be implemented");
+ llvm_unreachable("Should be implemented");
}
diff --git a/lib/CodeGen/AsmPrinter/LLVMBuild.txt b/lib/CodeGen/AsmPrinter/LLVMBuild.txt
new file mode 100644
index 000000000000..20b1f7b45b31
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/CodeGen/AsmPrinter/LLVMBuild.txt -------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AsmPrinter
+parent = Libraries
+required_libraries = Analysis CodeGen Core MC MCParser Support Target
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 75288b0934cb..ef1d2baed9ce 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
@@ -61,29 +62,33 @@ TailMergeSize("tail-merge-size",
namespace {
/// BranchFolderPass - Wrap branch folder in a machine function pass.
- class BranchFolderPass : public MachineFunctionPass,
- public BranchFolder {
+ class BranchFolderPass : public MachineFunctionPass {
public:
static char ID;
- explicit BranchFolderPass(bool defaultEnableTailMerge)
- : MachineFunctionPass(ID), BranchFolder(defaultEnableTailMerge, true) {}
+ explicit BranchFolderPass(): MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
- virtual const char *getPassName() const { return "Control Flow Optimizer"; }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetPassConfig>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
};
}
char BranchFolderPass::ID = 0;
+char &llvm::BranchFolderPassID = BranchFolderPass::ID;
-FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) {
- return new BranchFolderPass(DefaultEnableTailMerge);
-}
+INITIALIZE_PASS(BranchFolderPass, "branch-folder",
+ "Control Flow Optimizer", false, false)
bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
- return OptimizeFunction(MF,
- MF.getTarget().getInstrInfo(),
- MF.getTarget().getRegisterInfo(),
- getAnalysisIfAvailable<MachineModuleInfo>());
+ TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
+ BranchFolder Folder(PassConfig->getEnableTailMerge(), /*CommonHoist=*/true);
+ return Folder.OptimizeFunction(MF,
+ MF.getTarget().getInstrInfo(),
+ MF.getTarget().getRegisterInfo(),
+ getAnalysisIfAvailable<MachineModuleInfo>());
}
@@ -132,7 +137,7 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) {
break;
unsigned Reg = I->getOperand(0).getReg();
ImpDefRegs.insert(Reg);
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
unsigned SubReg = *SubRegs; ++SubRegs)
ImpDefRegs.insert(SubReg);
++I;
@@ -179,8 +184,14 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
TII = tii;
TRI = tri;
MMI = mmi;
+ RS = NULL;
- RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : NULL;
+ // Use a RegScavenger to help update liveness when required.
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ if (MRI.tracksLiveness() && TRI->requiresRegisterScavenging(MF))
+ RS = new RegScavenger();
+ else
+ MRI.invalidateLiveness();
// Fix CFG. The later algorithms expect it to be right.
bool MadeChange = false;
@@ -208,7 +219,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
delete RS;
return MadeChange;
}
-
+
// Walk the function to find jump tables that are live.
BitVector JTIsLive(JTI->getJumpTables().size());
for (MachineFunction::iterator BB = MF.begin(), E = MF.end();
@@ -432,10 +443,9 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
for (; I != E; ++I) {
if (I->isDebugValue())
continue;
- const MCInstrDesc &MCID = I->getDesc();
- if (MCID.isCall())
+ if (I->isCall())
Time += 10;
- else if (MCID.mayLoad() || MCID.mayStore())
+ else if (I->mayLoad() || I->mayStore())
Time += 2;
else
++Time;
@@ -484,8 +494,9 @@ BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const {
// an object with itself.
#ifndef _GLIBCXX_DEBUG
llvm_unreachable("Predecessor appears twice");
-#endif
+#else
return false;
+#endif
}
}
@@ -502,7 +513,7 @@ static unsigned CountTerminators(MachineBasicBlock *MBB,
break;
}
--I;
- if (!I->getDesc().isTerminator()) break;
+ if (!I->isTerminator()) break;
++NumTerms;
}
return NumTerms;
@@ -550,8 +561,8 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1,
// heuristics.
unsigned EffectiveTailLen = CommonTailLen;
if (SuccBB && MBB1 != PredBB && MBB2 != PredBB &&
- !MBB1->back().getDesc().isBarrier() &&
- !MBB2->back().getDesc().isBarrier())
+ !MBB1->back().isBarrier() &&
+ !MBB2->back().isBarrier())
++EffectiveTailLen;
// Check if the common tail is long enough to be worthwhile.
@@ -870,6 +881,9 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
// Visit each predecessor only once.
if (!UniquePreds.insert(PBB))
continue;
+ // Skip blocks which may jump to a landing pad. Can't tail merge these.
+ if (PBB->getLandingPadSuccessor())
+ continue;
MachineBasicBlock *TBB = 0, *FBB = 0;
SmallVector<MachineOperand, 4> Cond;
if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) {
@@ -924,8 +938,9 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
if (MergePotentials.size() >= 2)
MadeChange |= TryTailMergeBlocks(IBB, PredBB);
// Reinsert an unconditional branch if needed.
- // The 1 below can occur as a result of removing blocks in TryTailMergeBlocks.
- PredBB = prior(I); // this may have been changed in TryTailMergeBlocks
+ // The 1 below can occur as a result of removing blocks in
+ // TryTailMergeBlocks.
+ PredBB = prior(I); // this may have been changed in TryTailMergeBlocks
if (MergePotentials.size() == 1 &&
MergePotentials.begin()->getBlock() != PredBB)
FixTail(MergePotentials.begin()->getBlock(), IBB, TII);
@@ -980,7 +995,7 @@ static bool IsBranchOnlyBlock(MachineBasicBlock *MBB) {
if (!MBBI->isDebugValue())
break;
}
- return (MBBI->getDesc().isBranch());
+ return (MBBI->isBranch());
}
/// IsBetterFallthrough - Return true if it would be clearly better to
@@ -1008,7 +1023,23 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
MachineBasicBlock::iterator MBB2I = --MBB2->end();
while (MBB2I->isDebugValue())
--MBB2I;
- return MBB2I->getDesc().isCall() && !MBB1I->getDesc().isCall();
+ return MBB2I->isCall() && !MBB1I->isCall();
+}
+
+/// getBranchDebugLoc - Find and return, if any, the DebugLoc of the branch
+/// instructions on the block. Always use the DebugLoc of the first
+/// branching instruction found unless its absent, in which case use the
+/// DebugLoc of the second if present.
+static DebugLoc getBranchDebugLoc(MachineBasicBlock &MBB) {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return DebugLoc();
+ --I;
+ while (I->isDebugValue() && I != MBB.begin())
+ --I;
+ if (I->isBranch())
+ return I->getDebugLoc();
+ return DebugLoc();
}
/// OptimizeBlock - Analyze and optimize control flow related to the specified
@@ -1016,7 +1047,6 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
bool MadeChange = false;
MachineFunction &MF = *MBB->getParent();
- DebugLoc dl; // FIXME: this is nowhere
ReoptimizeBlock:
MachineFunction::iterator FallThrough = MBB;
@@ -1065,6 +1095,7 @@ ReoptimizeBlock:
// destination, remove the branch, replacing it with an unconditional one or
// a fall-through.
if (PriorTBB && PriorTBB == PriorFBB) {
+ DebugLoc dl = getBranchDebugLoc(PrevBB);
TII->RemoveBranch(PrevBB);
PriorCond.clear();
if (PriorTBB != MBB)
@@ -1091,7 +1122,7 @@ ReoptimizeBlock:
MachineBasicBlock::iterator PrevBBIter = PrevBB.end();
--PrevBBIter;
MachineBasicBlock::iterator MBBIter = MBB->begin();
- // Check if DBG_VALUE at the end of PrevBB is identical to the
+ // Check if DBG_VALUE at the end of PrevBB is identical to the
// DBG_VALUE at the beginning of MBB.
while (PrevBBIter != PrevBB.begin() && MBBIter != MBB->end()
&& PrevBBIter->isDebugValue() && MBBIter->isDebugValue()) {
@@ -1103,7 +1134,7 @@ ReoptimizeBlock:
}
}
PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end());
- PrevBB.removeSuccessor(PrevBB.succ_begin());;
+ PrevBB.removeSuccessor(PrevBB.succ_begin());
assert(PrevBB.succ_empty());
PrevBB.transferSuccessors(MBB);
MadeChange = true;
@@ -1122,6 +1153,7 @@ ReoptimizeBlock:
// If the prior block branches somewhere else on the condition and here if
// the condition is false, remove the uncond second branch.
if (PriorFBB == MBB) {
+ DebugLoc dl = getBranchDebugLoc(PrevBB);
TII->RemoveBranch(PrevBB);
TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl);
MadeChange = true;
@@ -1135,6 +1167,7 @@ ReoptimizeBlock:
if (PriorTBB == MBB) {
SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
if (!TII->ReverseBranchCondition(NewPriorCond)) {
+ DebugLoc dl = getBranchDebugLoc(PrevBB);
TII->RemoveBranch(PrevBB);
TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond, dl);
MadeChange = true;
@@ -1172,6 +1205,7 @@ ReoptimizeBlock:
DEBUG(dbgs() << "\nMoving MBB: " << *MBB
<< "To make fallthrough to: " << *PriorTBB << "\n");
+ DebugLoc dl = getBranchDebugLoc(PrevBB);
TII->RemoveBranch(PrevBB);
TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond, dl);
@@ -1201,6 +1235,7 @@ ReoptimizeBlock:
if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) {
SmallVector<MachineOperand, 4> NewCond(CurCond);
if (!TII->ReverseBranchCondition(NewCond)) {
+ DebugLoc dl = getBranchDebugLoc(*MBB);
TII->RemoveBranch(*MBB);
TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond, dl);
MadeChange = true;
@@ -1214,6 +1249,7 @@ ReoptimizeBlock:
if (CurTBB && CurCond.empty() && CurFBB == 0 &&
IsBranchOnlyBlock(MBB) && CurTBB != MBB &&
!MBB->hasAddressTaken()) {
+ DebugLoc dl = getBranchDebugLoc(*MBB);
// This block may contain just an unconditional branch. Because there can
// be 'non-branch terminators' in the block, try removing the branch and
// then seeing if the block is empty.
@@ -1256,8 +1292,9 @@ ReoptimizeBlock:
assert(PriorFBB == 0 && "Machine CFG out of date!");
PriorFBB = MBB;
}
+ DebugLoc pdl = getBranchDebugLoc(PrevBB);
TII->RemoveBranch(PrevBB);
- TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, dl);
+ TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, pdl);
}
// Iterate through all the predecessors, revectoring each in-turn.
@@ -1281,9 +1318,10 @@ ReoptimizeBlock:
bool NewCurUnAnalyzable = TII->AnalyzeBranch(*PMBB, NewCurTBB,
NewCurFBB, NewCurCond, true);
if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) {
+ DebugLoc pdl = getBranchDebugLoc(*PMBB);
TII->RemoveBranch(*PMBB);
NewCurCond.clear();
- TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, dl);
+ TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, pdl);
MadeChange = true;
++NumBranchOpts;
PMBB->CorrectExtraCFGEdges(NewCurTBB, 0, false);
@@ -1343,7 +1381,7 @@ ReoptimizeBlock:
if (CurFallsThru) {
MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
CurCond.clear();
- TII->InsertBranch(*MBB, NextBB, 0, CurCond, dl);
+ TII->InsertBranch(*MBB, NextBB, 0, CurCond, DebugLoc());
}
MBB->moveAfter(PredBB);
MadeChange = true;
@@ -1446,7 +1484,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
continue;
if (MO.isUse()) {
Uses.insert(Reg);
- for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+ for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS)
Uses.insert(*AS);
} else if (!MO.isDead())
// Don't try to hoist code in the rare case the terminator defines a
@@ -1469,6 +1507,9 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
bool IsDef = false;
for (unsigned i = 0, e = PI->getNumOperands(); !IsDef && i != e; ++i) {
const MachineOperand &MO = PI->getOperand(i);
+ // If PI has a regmask operand, it is probably a call. Separate away.
+ if (MO.isRegMask())
+ return Loc;
if (!MO.isReg() || MO.isUse())
continue;
unsigned Reg = MO.getReg();
@@ -1505,16 +1546,16 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
continue;
if (MO.isUse()) {
Uses.insert(Reg);
- for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+ for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS)
Uses.insert(*AS);
} else {
if (Uses.count(Reg)) {
Uses.erase(Reg);
- for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+ for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
Uses.erase(*SR); // Use getSubRegisters to be conservative
}
Defs.insert(Reg);
- for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+ for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS)
Defs.insert(*AS);
}
}
@@ -1581,6 +1622,11 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
bool IsSafe = true;
for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) {
MachineOperand &MO = TIB->getOperand(i);
+ // Don't attempt to hoist instructions with register masks.
+ if (MO.isRegMask()) {
+ IsSafe = false;
+ break;
+ }
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
@@ -1615,6 +1661,11 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
IsSafe = false;
break;
}
+
+ if (MO.isKill() && Uses.count(Reg))
+ // Kills a register that's read by the instruction at the point of
+ // insertion. Remove the kill marker.
+ MO.setIsKill(false);
}
}
if (!IsSafe)
@@ -1632,7 +1683,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
unsigned Reg = MO.getReg();
if (!Reg || !LocalDefsSet.count(Reg))
continue;
- for (const unsigned *OR = TRI->getOverlaps(Reg); *OR; ++OR)
+ for (const uint16_t *OR = TRI->getOverlaps(Reg); *OR; ++OR)
LocalDefsSet.erase(*OR);
}
@@ -1645,11 +1696,11 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
if (!Reg)
continue;
LocalDefs.push_back(Reg);
- for (const unsigned *OR = TRI->getOverlaps(Reg); *OR; ++OR)
+ for (const uint16_t *OR = TRI->getOverlaps(Reg); *OR; ++OR)
LocalDefsSet.insert(*OR);
}
- HasDups = true;;
+ HasDups = true;
++TIB;
++FIB;
}
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 9a5e55160114..21729cd6c380 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -9,10 +9,9 @@ add_llvm_library(LLVMCodeGen
CodePlacementOpt.cpp
CriticalAntiDepBreaker.cpp
DeadMachineInstructionElim.cpp
+ DFAPacketizer.cpp
DwarfEHPrepare.cpp
EdgeBundles.cpp
- ELFCodeEmitter.cpp
- ELFWriter.cpp
ExecutionDepsFix.cpp
ExpandISelPseudos.cpp
ExpandPostRAPseudos.cpp
@@ -23,6 +22,7 @@ add_llvm_library(LLVMCodeGen
InlineSpiller.cpp
InterferenceCache.cpp
IntrinsicLowering.cpp
+ JITCodeEmitter.cpp
LLVMTargetMachine.cpp
LatencyPriorityQueue.cpp
LexicalScopes.cpp
@@ -37,7 +37,10 @@ add_llvm_library(LLVMCodeGen
LocalStackSlotAllocation.cpp
MachineBasicBlock.cpp
MachineBlockFrequencyInfo.cpp
+ MachineBlockPlacement.cpp
MachineBranchProbabilityInfo.cpp
+ MachineCodeEmitter.cpp
+ MachineCopyPropagation.cpp
MachineCSE.cpp
MachineDominators.cpp
MachineFunction.cpp
@@ -45,6 +48,7 @@ add_llvm_library(LLVMCodeGen
MachineFunctionPass.cpp
MachineFunctionPrinterPass.cpp
MachineInstr.cpp
+ MachineInstrBundle.cpp
MachineLICM.cpp
MachineLoopInfo.cpp
MachineLoopRanges.cpp
@@ -53,9 +57,9 @@ add_llvm_library(LLVMCodeGen
MachinePassRegistry.cpp
MachineRegisterInfo.cpp
MachineSSAUpdater.cpp
+ MachineScheduler.cpp
MachineSink.cpp
MachineVerifier.cpp
- ObjectCodeEmitter.cpp
OcamlGC.cpp
OptimizePHIs.cpp
PHIElimination.cpp
@@ -66,17 +70,16 @@ add_llvm_library(LLVMCodeGen
ProcessImplicitDefs.cpp
PrologEpilogInserter.cpp
PseudoSourceValue.cpp
+ RegAllocBase.cpp
RegAllocBasic.cpp
RegAllocFast.cpp
RegAllocGreedy.cpp
- RegAllocLinearScan.cpp
RegAllocPBQP.cpp
RegisterClassInfo.cpp
RegisterCoalescer.cpp
RegisterScavenging.cpp
RenderMachineFunction.cpp
ScheduleDAG.cpp
- ScheduleDAGEmit.cpp
ScheduleDAGInstrs.cpp
ScheduleDAGPrinter.cpp
ScoreboardHazardRecognizer.cpp
@@ -87,27 +90,17 @@ add_llvm_library(LLVMCodeGen
Spiller.cpp
SpillPlacement.cpp
SplitKit.cpp
- Splitter.cpp
StackProtector.cpp
StackSlotColoring.cpp
StrongPHIElimination.cpp
TailDuplication.cpp
+ TargetFrameLoweringImpl.cpp
TargetInstrInfoImpl.cpp
TargetLoweringObjectFileImpl.cpp
+ TargetOptionsImpl.cpp
TwoAddressInstructionPass.cpp
UnreachableBlockElim.cpp
VirtRegMap.cpp
- VirtRegRewriter.cpp
- )
-
-add_llvm_library_dependencies(LLVMCodeGen
- LLVMAnalysis
- LLVMCore
- LLVMMC
- LLVMScalarOpts
- LLVMSupport
- LLVMTarget
- LLVMTransformUtils
)
add_subdirectory(SelectionDAG)
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index 14eb0541dc8d..2b7dfdbe41a0 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -58,7 +58,7 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
/// MarkAllocated - Mark a register and all of its aliases as allocated.
void CCState::MarkAllocated(unsigned Reg) {
- for (const unsigned *Alias = TRI.getOverlaps(Reg);
+ for (const uint16_t *Alias = TRI.getOverlaps(Reg);
unsigned Reg = *Alias; ++Alias)
UsedRegs[Reg/32] |= 1 << (Reg&31);
}
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index 424535ba2a1c..a81bb5cc5566 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -19,36 +19,49 @@ using namespace llvm;
/// initializeCodeGen - Initialize all passes linked into the CodeGen library.
void llvm::initializeCodeGen(PassRegistry &Registry) {
+ initializeBranchFolderPassPass(Registry);
initializeCalculateSpillWeightsPass(Registry);
+ initializeCodePlacementOptPass(Registry);
initializeDeadMachineInstructionElimPass(Registry);
+ initializeExpandPostRAPass(Registry);
+ initializeExpandISelPseudosPass(Registry);
+ initializeFinalizeMachineBundlesPass(Registry);
+ initializeGCMachineCodeAnalysisPass(Registry);
initializeGCModuleInfoPass(Registry);
initializeIfConverterPass(Registry);
initializeLiveDebugVariablesPass(Registry);
initializeLiveIntervalsPass(Registry);
initializeLiveStacksPass(Registry);
initializeLiveVariablesPass(Registry);
+ initializeLocalStackSlotPassPass(Registry);
initializeMachineBlockFrequencyInfoPass(Registry);
+ initializeMachineBlockPlacementPass(Registry);
+ initializeMachineBlockPlacementStatsPass(Registry);
+ initializeMachineCopyPropagationPass(Registry);
initializeMachineCSEPass(Registry);
initializeMachineDominatorTreePass(Registry);
initializeMachineLICMPass(Registry);
initializeMachineLoopInfoPass(Registry);
initializeMachineModuleInfoPass(Registry);
+ initializeMachineSchedulerPass(Registry);
initializeMachineSinkingPass(Registry);
initializeMachineVerifierPassPass(Registry);
initializeOptimizePHIsPass(Registry);
initializePHIEliminationPass(Registry);
initializePeepholeOptimizerPass(Registry);
+ initializePostRASchedulerPass(Registry);
initializeProcessImplicitDefsPass(Registry);
initializePEIPass(Registry);
- initializeRALinScanPass(Registry);
initializeRegisterCoalescerPass(Registry);
initializeRenderMachineFunctionPass(Registry);
initializeSlotIndexesPass(Registry);
- initializeLoopSplitterPass(Registry);
initializeStackProtectorPass(Registry);
initializeStackSlotColoringPass(Registry);
initializeStrongPHIEliminationPass(Registry);
+ initializeTailDuplicatePassPass(Registry);
+ initializeTargetPassConfigPass(Registry);
initializeTwoAddressInstructionPassPass(Registry);
+ initializeUnpackMachineBundlesPass(Registry);
initializeUnreachableBlockElimPass(Registry);
initializeUnreachableMachineBlockElimPass(Registry);
initializeVirtRegMapPass(Registry);
diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp
index 270c337ef67e..c13c05e26a20 100644
--- a/lib/CodeGen/CodePlacementOpt.cpp
+++ b/lib/CodeGen/CodePlacementOpt.cpp
@@ -39,9 +39,6 @@ namespace {
CodePlacementOpt() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
- virtual const char *getPassName() const {
- return "Code Placement Optimizer";
- }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineLoopInfo>();
@@ -69,9 +66,9 @@ namespace {
char CodePlacementOpt::ID = 0;
} // end anonymous namespace
-FunctionPass *llvm::createCodePlacementOptPass() {
- return new CodePlacementOpt();
-}
+char &llvm::CodePlacementOptID = CodePlacementOpt::ID;
+INITIALIZE_PASS(CodePlacementOpt, "code-placement",
+ "Code Placement Optimizer", false, false)
/// HasFallthrough - Test whether the given branch has a fallthrough, either as
/// a plain fallthrough or as a fallthrough case of a conditional branch.
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 84c4d59c0e41..bad50103b9c3 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -35,7 +35,8 @@ CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI) :
RegClassInfo(RCI),
Classes(TRI->getNumRegs(), static_cast<const TargetRegisterClass *>(0)),
KillIndices(TRI->getNumRegs(), 0),
- DefIndices(TRI->getNumRegs(), 0) {}
+ DefIndices(TRI->getNumRegs(), 0),
+ KeepRegs(TRI->getNumRegs(), false) {}
CriticalAntiDepBreaker::~CriticalAntiDepBreaker() {
}
@@ -52,9 +53,9 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
}
// Clear "do not change" set.
- KeepRegs.clear();
+ KeepRegs.reset();
- bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn());
+ bool IsReturnBlock = (BBSize != 0 && BB->back().isReturn());
// Determine the live-out physregs for this block.
if (IsReturnBlock) {
@@ -63,14 +64,14 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
E = MRI.liveout_end(); I != E; ++I) {
unsigned Reg = *I;
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
- KillIndices[Reg] = BB->size();
+ KillIndices[Reg] = BBSize;
DefIndices[Reg] = ~0u;
// Repeat, for all aliases.
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
unsigned AliasReg = *Alias;
Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
- KillIndices[AliasReg] = BB->size();
+ KillIndices[AliasReg] = BBSize;
DefIndices[AliasReg] = ~0u;
}
}
@@ -85,14 +86,14 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
E = (*SI)->livein_end(); I != E; ++I) {
unsigned Reg = *I;
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
- KillIndices[Reg] = BB->size();
+ KillIndices[Reg] = BBSize;
DefIndices[Reg] = ~0u;
// Repeat, for all aliases.
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
unsigned AliasReg = *Alias;
Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
- KillIndices[AliasReg] = BB->size();
+ KillIndices[AliasReg] = BBSize;
DefIndices[AliasReg] = ~0u;
}
}
@@ -102,18 +103,18 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
// callee-saved register that is not saved in the prolog.
const MachineFrameInfo *MFI = MF.getFrameInfo();
BitVector Pristine = MFI->getPristineRegs(BB);
- for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
+ for (const uint16_t *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
unsigned Reg = *I;
if (!IsReturnBlock && !Pristine.test(Reg)) continue;
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
- KillIndices[Reg] = BB->size();
+ KillIndices[Reg] = BBSize;
DefIndices[Reg] = ~0u;
// Repeat, for all aliases.
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
unsigned AliasReg = *Alias;
Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
- KillIndices[AliasReg] = BB->size();
+ KillIndices[AliasReg] = BBSize;
DefIndices[AliasReg] = ~0u;
}
}
@@ -121,7 +122,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
void CriticalAntiDepBreaker::FinishBlock() {
RegRefs.clear();
- KeepRegs.clear();
+ KeepRegs.reset();
}
void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
@@ -193,8 +194,8 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
// instruction which may not be executed. The second R6 def may or may not
// re-define R6 so it's not safe to change it since the last R6 use cannot be
// changed.
- bool Special = MI->getDesc().isCall() ||
- MI->getDesc().hasExtraSrcRegAllocReq() ||
+ bool Special = MI->isCall() ||
+ MI->hasExtraSrcRegAllocReq() ||
TII->isPredicated(MI);
// Scan the register operands for this instruction and update
@@ -217,7 +218,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
// Now check for aliases.
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
// If an alias of the reg is used during the live range, give up.
// Note that this allows us to skip checking if AntiDepReg
// overlaps with any of the aliases, among other things.
@@ -233,10 +234,11 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
RegRefs.insert(std::make_pair(Reg, &MO));
if (MO.isUse() && Special) {
- if (KeepRegs.insert(Reg)) {
- for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ if (!KeepRegs.test(Reg)) {
+ KeepRegs.set(Reg);
+ for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
*Subreg; ++Subreg)
- KeepRegs.insert(*Subreg);
+ KeepRegs.set(*Subreg);
}
}
}
@@ -253,6 +255,17 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
// address updates.
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
+
+ if (MO.isRegMask())
+ for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i)
+ if (MO.clobbersPhysReg(i)) {
+ DefIndices[i] = Count;
+ KillIndices[i] = ~0u;
+ KeepRegs.reset(i);
+ Classes[i] = 0;
+ RegRefs.erase(i);
+ }
+
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
if (Reg == 0) continue;
@@ -265,21 +278,21 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
assert(((KillIndices[Reg] == ~0u) !=
(DefIndices[Reg] == ~0u)) &&
"Kill and Def maps aren't consistent for Reg!");
- KeepRegs.erase(Reg);
+ KeepRegs.reset(Reg);
Classes[Reg] = 0;
RegRefs.erase(Reg);
// Repeat, for all subregs.
- for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
*Subreg; ++Subreg) {
unsigned SubregReg = *Subreg;
DefIndices[SubregReg] = Count;
KillIndices[SubregReg] = ~0u;
- KeepRegs.erase(SubregReg);
+ KeepRegs.reset(SubregReg);
Classes[SubregReg] = 0;
RegRefs.erase(SubregReg);
}
// Conservatively mark super-registers as unusable.
- for (const unsigned *Super = TRI->getSuperRegisters(Reg);
+ for (const uint16_t *Super = TRI->getSuperRegisters(Reg);
*Super; ++Super) {
unsigned SuperReg = *Super;
Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1);
@@ -315,7 +328,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
"Kill and Def maps aren't consistent for Reg!");
}
// Repeat, for all aliases.
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
unsigned AliasReg = *Alias;
if (KillIndices[AliasReg] == ~0u) {
KillIndices[AliasReg] = Count;
@@ -355,6 +368,9 @@ CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin,
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &CheckOper = MI->getOperand(i);
+ if (CheckOper.isRegMask() && CheckOper.clobbersPhysReg(NewReg))
+ return true;
+
if (!CheckOper.isReg() || !CheckOper.isDef() ||
CheckOper.getReg() != NewReg)
continue;
@@ -427,6 +443,8 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
// Keep a map of the MachineInstr*'s back to the SUnit representing them.
// This is used for updating debug information.
+ //
+ // FIXME: Replace this with the existing map in ScheduleDAGInstrs::MISUnitMap
DenseMap<MachineInstr*,const SUnit*> MISUnitMap;
// Find the node at the bottom of the critical path.
@@ -535,7 +553,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
if (!RegClassInfo.isAllocatable(AntiDepReg))
// Don't break anti-dependencies on non-allocatable registers.
AntiDepReg = 0;
- else if (KeepRegs.count(AntiDepReg))
+ else if (KeepRegs.test(AntiDepReg))
// Don't break anti-dependencies if an use down below requires
// this exact register.
AntiDepReg = 0;
@@ -572,7 +590,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
// If MI's defs have a special allocation requirement, don't allow
// any def registers to be changed. Also assume all registers
// defined in a call must not be changed (ABI).
- if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() ||
+ if (MI->isCall() || MI->hasExtraDefRegAllocReq() ||
TII->isPredicated(MI))
// If this instruction's defs have special allocation requirement, don't
// break this anti-dependency.
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index 07107802972d..77462593896e 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -24,7 +24,6 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallSet.h"
#include <map>
namespace llvm {
@@ -66,7 +65,7 @@ class TargetRegisterInfo;
/// KeepRegs - A set of registers which are live and cannot be changed to
/// break anti-dependencies.
- SmallSet<unsigned, 4> KeepRegs;
+ BitVector KeepRegs;
public:
CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&);
diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp
new file mode 100644
index 000000000000..bfbe7790998f
--- /dev/null
+++ b/lib/CodeGen/DFAPacketizer.cpp
@@ -0,0 +1,223 @@
+//=- llvm/CodeGen/DFAPacketizer.cpp - DFA Packetizer for VLIW -*- C++ -*-=====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This class implements a deterministic finite automaton (DFA) based
+// packetizing mechanism for VLIW architectures. It provides APIs to
+// determine whether there exists a legal mapping of instructions to
+// functional unit assignments in a packet. The DFA is auto-generated from
+// the target's Schedule.td file.
+//
+// A DFA consists of 3 major elements: states, inputs, and transitions. For
+// the packetizing mechanism, the input is the set of instruction classes for
+// a target. The state models all possible combinations of functional unit
+// consumption for a given set of instructions in a packet. A transition
+// models the addition of an instruction to a packet. In the DFA constructed
+// by this class, if an instruction can be added to a packet, then a valid
+// transition exists from the corresponding state. Invalid transitions
+// indicate that the instruction cannot be added to the current packet.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+using namespace llvm;
+
+DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, const int (*SIT)[2],
+ const unsigned *SET):
+ InstrItins(I), CurrentState(0), DFAStateInputTable(SIT),
+ DFAStateEntryTable(SET) {}
+
+
+//
+// ReadTable - Read the DFA transition table and update CachedTable.
+//
+// Format of the transition tables:
+// DFAStateInputTable[][2] = pairs of <Input, Transition> for all valid
+// transitions
+// DFAStateEntryTable[i] = Index of the first entry in DFAStateInputTable
+// for the ith state
+//
+void DFAPacketizer::ReadTable(unsigned int state) {
+ unsigned ThisState = DFAStateEntryTable[state];
+ unsigned NextStateInTable = DFAStateEntryTable[state+1];
+ // Early exit in case CachedTable has already contains this
+ // state's transitions.
+ if (CachedTable.count(UnsignPair(state,
+ DFAStateInputTable[ThisState][0])))
+ return;
+
+ for (unsigned i = ThisState; i < NextStateInTable; i++)
+ CachedTable[UnsignPair(state, DFAStateInputTable[i][0])] =
+ DFAStateInputTable[i][1];
+}
+
+
+// canReserveResources - Check if the resources occupied by a MCInstrDesc
+// are available in the current state.
+bool DFAPacketizer::canReserveResources(const llvm::MCInstrDesc *MID) {
+ unsigned InsnClass = MID->getSchedClass();
+ const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass);
+ unsigned FuncUnits = IS->getUnits();
+ UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits);
+ ReadTable(CurrentState);
+ return (CachedTable.count(StateTrans) != 0);
+}
+
+
+// reserveResources - Reserve the resources occupied by a MCInstrDesc and
+// change the current state to reflect that change.
+void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) {
+ unsigned InsnClass = MID->getSchedClass();
+ const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass);
+ unsigned FuncUnits = IS->getUnits();
+ UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits);
+ ReadTable(CurrentState);
+ assert(CachedTable.count(StateTrans) != 0);
+ CurrentState = CachedTable[StateTrans];
+}
+
+
+// canReserveResources - Check if the resources occupied by a machine
+// instruction are available in the current state.
+bool DFAPacketizer::canReserveResources(llvm::MachineInstr *MI) {
+ const llvm::MCInstrDesc &MID = MI->getDesc();
+ return canReserveResources(&MID);
+}
+
+// reserveResources - Reserve the resources occupied by a machine
+// instruction and change the current state to reflect that change.
+void DFAPacketizer::reserveResources(llvm::MachineInstr *MI) {
+ const llvm::MCInstrDesc &MID = MI->getDesc();
+ reserveResources(&MID);
+}
+
+namespace llvm {
+// DefaultVLIWScheduler - This class extends ScheduleDAGInstrs and overrides
+// Schedule method to build the dependence graph.
+class DefaultVLIWScheduler : public ScheduleDAGInstrs {
+public:
+ DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI,
+ MachineDominatorTree &MDT, bool IsPostRA);
+ // Schedule - Actual scheduling work.
+ void schedule();
+};
+}
+
+DefaultVLIWScheduler::DefaultVLIWScheduler(
+ MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
+ bool IsPostRA) :
+ ScheduleDAGInstrs(MF, MLI, MDT, IsPostRA) {
+}
+
+void DefaultVLIWScheduler::schedule() {
+ // Build the scheduling graph.
+ buildSchedGraph(0);
+}
+
+// VLIWPacketizerList Ctor
+VLIWPacketizerList::VLIWPacketizerList(
+ MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
+ bool IsPostRA) : TM(MF.getTarget()), MF(MF) {
+ TII = TM.getInstrInfo();
+ ResourceTracker = TII->CreateTargetScheduleState(&TM, 0);
+ VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, MDT, IsPostRA);
+}
+
+// VLIWPacketizerList Dtor
+VLIWPacketizerList::~VLIWPacketizerList() {
+ if (VLIWScheduler)
+ delete VLIWScheduler;
+
+ if (ResourceTracker)
+ delete ResourceTracker;
+}
+
+// endPacket - End the current packet, bundle packet instructions and reset
+// DFA state.
+void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
+ MachineInstr *MI) {
+ if (CurrentPacketMIs.size() > 1) {
+ MachineInstr *MIFirst = CurrentPacketMIs.front();
+ finalizeBundle(*MBB, MIFirst, MI);
+ }
+ CurrentPacketMIs.clear();
+ ResourceTracker->clearResources();
+}
+
+// PacketizeMIs - Bundle machine instructions into packets.
+void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator BeginItr,
+ MachineBasicBlock::iterator EndItr) {
+ assert(VLIWScheduler && "VLIW Scheduler is not initialized!");
+ VLIWScheduler->enterRegion(MBB, BeginItr, EndItr, MBB->size());
+ VLIWScheduler->schedule();
+ VLIWScheduler->exitRegion();
+
+ // Generate MI -> SU map.
+ //std::map <MachineInstr*, SUnit*> MIToSUnit;
+ MIToSUnit.clear();
+ for (unsigned i = 0, e = VLIWScheduler->SUnits.size(); i != e; ++i) {
+ SUnit *SU = &VLIWScheduler->SUnits[i];
+ MIToSUnit[SU->getInstr()] = SU;
+ }
+
+ // The main packetizer loop.
+ for (; BeginItr != EndItr; ++BeginItr) {
+ MachineInstr *MI = BeginItr;
+
+ this->initPacketizerState();
+
+ // End the current packet if needed.
+ if (this->isSoloInstruction(MI)) {
+ endPacket(MBB, MI);
+ continue;
+ }
+
+ // Ignore pseudo instructions.
+ if (this->ignorePseudoInstruction(MI, MBB))
+ continue;
+
+ SUnit *SUI = MIToSUnit[MI];
+ assert(SUI && "Missing SUnit Info!");
+
+ // Ask DFA if machine resource is available for MI.
+ bool ResourceAvail = ResourceTracker->canReserveResources(MI);
+ if (ResourceAvail) {
+ // Dependency check for MI with instructions in CurrentPacketMIs.
+ for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(),
+ VE = CurrentPacketMIs.end(); VI != VE; ++VI) {
+ MachineInstr *MJ = *VI;
+ SUnit *SUJ = MIToSUnit[MJ];
+ assert(SUJ && "Missing SUnit Info!");
+
+ // Is it legal to packetize SUI and SUJ together.
+ if (!this->isLegalToPacketizeTogether(SUI, SUJ)) {
+ // Allow packetization if dependency can be pruned.
+ if (!this->isLegalToPruneDependencies(SUI, SUJ)) {
+ // End the packet if dependency cannot be pruned.
+ endPacket(MBB, MI);
+ break;
+ } // !isLegalToPruneDependencies.
+ } // !isLegalToPacketizeTogether.
+ } // For all instructions in CurrentPacketMIs.
+ } else {
+ // End the packet if resource is not available.
+ endPacket(MBB, MI);
+ }
+
+ // Add MI to the current packet.
+ BeginItr = this->addToPacket(MI);
+ } // For all instructions in BB.
+
+ // End any packet left behind.
+ endPacket(MBB, EndItr);
+}
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index 6de6c0cb81bd..aa10d1d41f2b 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -28,11 +28,12 @@ STATISTIC(NumDeletes, "Number of dead instructions deleted");
namespace {
class DeadMachineInstructionElim : public MachineFunctionPass {
virtual bool runOnMachineFunction(MachineFunction &MF);
-
+
const TargetRegisterInfo *TRI;
const MachineRegisterInfo *MRI;
const TargetInstrInfo *TII;
BitVector LivePhysRegs;
+ BitVector ReservedRegs;
public:
static char ID; // Pass identification, replacement for typeid
@@ -45,14 +46,11 @@ namespace {
};
}
char DeadMachineInstructionElim::ID = 0;
+char &llvm::DeadMachineInstructionElimID = DeadMachineInstructionElim::ID;
INITIALIZE_PASS(DeadMachineInstructionElim, "dead-mi-elimination",
"Remove dead machine instructions", false, false)
-FunctionPass *llvm::createDeadMachineInstructionElimPass() {
- return new DeadMachineInstructionElim();
-}
-
bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
// Technically speaking inline asm without side effects and no defs can still
// be deleted. But there is so much bad inline asm code out there, we should
@@ -70,10 +68,14 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
const MachineOperand &MO = MI->getOperand(i);
if (MO.isReg() && MO.isDef()) {
unsigned Reg = MO.getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg) ?
- LivePhysRegs[Reg] : !MRI->use_nodbg_empty(Reg)) {
- // This def has a non-debug use. Don't delete the instruction!
- return false;
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // Don't delete live physreg defs, or any reserved register defs.
+ if (LivePhysRegs.test(Reg) || ReservedRegs.test(Reg))
+ return false;
+ } else {
+ if (!MRI->use_nodbg_empty(Reg))
+ // This def has a non-debug use. Don't delete the instruction!
+ return false;
}
}
}
@@ -89,7 +91,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getTarget().getInstrInfo();
// Treat reserved registers as always live.
- BitVector ReservedRegs = TRI->getReservedRegs(MF);
+ ReservedRegs = TRI->getReservedRegs(MF);
// Loop over all instructions in all blocks, from bottom to top, so that it's
// more likely that chains of dependent but ultimately dead instructions will
@@ -102,7 +104,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
LivePhysRegs = ReservedRegs;
// Also add any explicit live-out physregs for this block.
- if (!MBB->empty() && MBB->back().getDesc().isReturn())
+ if (!MBB->empty() && MBB->back().isReturn())
for (MachineRegisterInfo::liveout_iterator LOI = MRI->liveout_begin(),
LOE = MRI->liveout_end(); LOI != LOE; ++LOI) {
unsigned Reg = *LOI;
@@ -169,10 +171,13 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
// Check the subreg set, not the alias set, because a def
// of a super-register may still be partially live after
// this def.
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
*SubRegs; ++SubRegs)
LivePhysRegs.reset(*SubRegs);
}
+ } else if (MO.isRegMask()) {
+ // Register mask of preserved registers. All clobbers are dead.
+ LivePhysRegs.clearBitsNotInMask(MO.getRegMask());
}
}
// Record the physreg uses, after the defs, in case a physreg is
@@ -183,7 +188,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
unsigned Reg = MO.getReg();
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
LivePhysRegs.set(Reg);
- for (const unsigned *AliasSet = TRI->getAliasSet(Reg);
+ for (const uint16_t *AliasSet = TRI->getAliasSet(Reg);
*AliasSet; ++AliasSet)
LivePhysRegs.set(*AliasSet);
}
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index ed9e409d3e5a..944dd4fb41c8 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -28,98 +28,34 @@
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
-STATISTIC(NumLandingPadsSplit, "Number of landing pads split");
-STATISTIC(NumUnwindsLowered, "Number of unwind instructions lowered");
-STATISTIC(NumResumesLowered, "Number of eh.resume calls lowered");
-STATISTIC(NumExceptionValuesMoved, "Number of eh.exception calls moved");
+STATISTIC(NumResumesLowered, "Number of resume calls lowered");
namespace {
class DwarfEHPrepare : public FunctionPass {
const TargetMachine *TM;
const TargetLowering *TLI;
- // The eh.exception intrinsic.
- Function *ExceptionValueIntrinsic;
-
- // The eh.selector intrinsic.
- Function *SelectorIntrinsic;
-
- // _Unwind_Resume_or_Rethrow or _Unwind_SjLj_Resume call.
- Constant *URoR;
-
- // The EH language-specific catch-all type.
- GlobalVariable *EHCatchAllValue;
-
- // _Unwind_Resume or the target equivalent.
+ // RewindFunction - _Unwind_Resume or the target equivalent.
Constant *RewindFunction;
- // We both use and preserve dominator info.
- DominatorTree *DT;
-
- // The function we are running on.
- Function *F;
-
- // The landing pads for this function.
- typedef SmallPtrSet<BasicBlock*, 8> BBSet;
- BBSet LandingPads;
-
- bool InsertUnwindResumeCalls();
-
- bool NormalizeLandingPads();
- bool LowerUnwindsAndResumes();
- bool MoveExceptionValueCalls();
-
- Instruction *CreateExceptionValueCall(BasicBlock *BB);
-
- /// CleanupSelectors - Any remaining eh.selector intrinsic calls which still
- /// use the "llvm.eh.catch.all.value" call need to convert to using its
- /// initializer instead.
- bool CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels);
-
- bool HasCatchAllInSelector(IntrinsicInst *);
+ bool InsertUnwindResumeCalls(Function &Fn);
+ Instruction *GetExceptionObject(ResumeInst *RI);
- /// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups.
- void FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels,
- SmallPtrSet<IntrinsicInst*, 32> &CatchAllSels);
-
- /// FindAllURoRInvokes - Find all URoR invokes in the function.
- void FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes);
-
- /// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" or
- /// "_Unwind_SjLj_Resume" calls. The "unwind" part of these invokes jump to
- /// a landing pad within the current function. This is a candidate to merge
- /// the selector associated with the URoR invoke with the one from the
- /// URoR's landing pad.
- bool HandleURoRInvokes();
-
- /// FindSelectorAndURoR - Find the eh.selector call and URoR call associated
- /// with the eh.exception call. This recursively looks past instructions
- /// which don't change the EH pointer value, like casts or PHI nodes.
- bool FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
- SmallPtrSet<IntrinsicInst*, 8> &SelCalls,
- SmallPtrSet<PHINode*, 32> &SeenPHIs);
-
public:
static char ID; // Pass identification, replacement for typeid.
DwarfEHPrepare(const TargetMachine *tm) :
FunctionPass(ID), TM(tm), TLI(TM->getTargetLowering()),
- ExceptionValueIntrinsic(0), SelectorIntrinsic(0),
- URoR(0), EHCatchAllValue(0), RewindFunction(0) {
+ RewindFunction(0) {
initializeDominatorTreePass(*PassRegistry::getPassRegistry());
}
virtual bool runOnFunction(Function &Fn);
- // getAnalysisUsage - We need the dominator tree for handling URoR.
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<DominatorTree>();
- AU.addPreserved<DominatorTree>();
- }
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const { }
const char *getPassName() const {
return "Exception handling preparation";
}
-
};
} // end anonymous namespace
@@ -129,543 +65,52 @@ FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm) {
return new DwarfEHPrepare(tm);
}
-/// HasCatchAllInSelector - Return true if the intrinsic instruction has a
-/// catch-all.
-bool DwarfEHPrepare::HasCatchAllInSelector(IntrinsicInst *II) {
- if (!EHCatchAllValue) return false;
-
- unsigned ArgIdx = II->getNumArgOperands() - 1;
- GlobalVariable *GV = dyn_cast<GlobalVariable>(II->getArgOperand(ArgIdx));
- return GV == EHCatchAllValue;
-}
-
-/// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups.
-void DwarfEHPrepare::
-FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels,
- SmallPtrSet<IntrinsicInst*, 32> &CatchAllSels) {
- for (Value::use_iterator
- I = SelectorIntrinsic->use_begin(),
- E = SelectorIntrinsic->use_end(); I != E; ++I) {
- IntrinsicInst *II = cast<IntrinsicInst>(*I);
-
- if (II->getParent()->getParent() != F)
- continue;
-
- if (!HasCatchAllInSelector(II))
- Sels.insert(II);
- else
- CatchAllSels.insert(II);
- }
-}
-
-/// FindAllURoRInvokes - Find all URoR invokes in the function.
-void DwarfEHPrepare::
-FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes) {
- for (Value::use_iterator
- I = URoR->use_begin(),
- E = URoR->use_end(); I != E; ++I) {
- if (InvokeInst *II = dyn_cast<InvokeInst>(*I))
- URoRInvokes.insert(II);
- }
-}
-
-/// CleanupSelectors - Any remaining eh.selector intrinsic calls which still use
-/// the "llvm.eh.catch.all.value" call need to convert to using its
-/// initializer instead.
-bool DwarfEHPrepare::CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels) {
- if (!EHCatchAllValue) return false;
-
- if (!SelectorIntrinsic) {
- SelectorIntrinsic =
- Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_selector);
- if (!SelectorIntrinsic) return false;
- }
-
- bool Changed = false;
- for (SmallPtrSet<IntrinsicInst*, 32>::iterator
- I = Sels.begin(), E = Sels.end(); I != E; ++I) {
- IntrinsicInst *Sel = *I;
-
- // Index of the "llvm.eh.catch.all.value" variable.
- unsigned OpIdx = Sel->getNumArgOperands() - 1;
- GlobalVariable *GV = dyn_cast<GlobalVariable>(Sel->getArgOperand(OpIdx));
- if (GV != EHCatchAllValue) continue;
- Sel->setArgOperand(OpIdx, EHCatchAllValue->getInitializer());
- Changed = true;
- }
-
- return Changed;
-}
-
-/// FindSelectorAndURoR - Find the eh.selector call associated with the
-/// eh.exception call. And indicate if there is a URoR "invoke" associated with
-/// the eh.exception call. This recursively looks past instructions which don't
-/// change the EH pointer value, like casts or PHI nodes.
-bool
-DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
- SmallPtrSet<IntrinsicInst*, 8> &SelCalls,
- SmallPtrSet<PHINode*, 32> &SeenPHIs) {
- bool Changed = false;
-
- for (Value::use_iterator
- I = Inst->use_begin(), E = Inst->use_end(); I != E; ++I) {
- Instruction *II = dyn_cast<Instruction>(*I);
- if (!II || II->getParent()->getParent() != F) continue;
-
- if (IntrinsicInst *Sel = dyn_cast<IntrinsicInst>(II)) {
- if (Sel->getIntrinsicID() == Intrinsic::eh_selector)
- SelCalls.insert(Sel);
- } else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(II)) {
- if (Invoke->getCalledFunction() == URoR)
- URoRInvoke = true;
- } else if (CastInst *CI = dyn_cast<CastInst>(II)) {
- Changed |= FindSelectorAndURoR(CI, URoRInvoke, SelCalls, SeenPHIs);
- } else if (PHINode *PN = dyn_cast<PHINode>(II)) {
- if (SeenPHIs.insert(PN))
- // Don't process a PHI node more than once.
- Changed |= FindSelectorAndURoR(PN, URoRInvoke, SelCalls, SeenPHIs);
- }
- }
-
- return Changed;
-}
-
-/// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" or
-/// "_Unwind_SjLj_Resume" calls. The "unwind" part of these invokes jump to a
-/// landing pad within the current function. This is a candidate to merge the
-/// selector associated with the URoR invoke with the one from the URoR's
-/// landing pad.
-bool DwarfEHPrepare::HandleURoRInvokes() {
- if (!EHCatchAllValue) {
- EHCatchAllValue =
- F->getParent()->getNamedGlobal("llvm.eh.catch.all.value");
- if (!EHCatchAllValue) return false;
- }
-
- if (!SelectorIntrinsic) {
- SelectorIntrinsic =
- Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_selector);
- if (!SelectorIntrinsic) return false;
- }
-
- SmallPtrSet<IntrinsicInst*, 32> Sels;
- SmallPtrSet<IntrinsicInst*, 32> CatchAllSels;
- FindAllCleanupSelectors(Sels, CatchAllSels);
-
- if (!URoR) {
- URoR = F->getParent()->getFunction("_Unwind_Resume_or_Rethrow");
- if (!URoR) return CleanupSelectors(CatchAllSels);
- }
-
- SmallPtrSet<InvokeInst*, 32> URoRInvokes;
- FindAllURoRInvokes(URoRInvokes);
-
- SmallPtrSet<IntrinsicInst*, 32> SelsToConvert;
-
- for (SmallPtrSet<IntrinsicInst*, 32>::iterator
- SI = Sels.begin(), SE = Sels.end(); SI != SE; ++SI) {
- const BasicBlock *SelBB = (*SI)->getParent();
- for (SmallPtrSet<InvokeInst*, 32>::iterator
- UI = URoRInvokes.begin(), UE = URoRInvokes.end(); UI != UE; ++UI) {
- const BasicBlock *URoRBB = (*UI)->getParent();
- if (DT->dominates(SelBB, URoRBB)) {
- SelsToConvert.insert(*SI);
- break;
+/// GetExceptionObject - Return the exception object from the value passed into
+/// the 'resume' instruction (typically an aggregate). Clean up any dead
+/// instructions, including the 'resume' instruction.
+Instruction *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) {
+ Value *V = RI->getOperand(0);
+ Instruction *ExnObj = 0;
+ InsertValueInst *SelIVI = dyn_cast<InsertValueInst>(V);
+ LoadInst *SelLoad = 0;
+ InsertValueInst *ExcIVI = 0;
+ bool EraseIVIs = false;
+
+ if (SelIVI) {
+ if (SelIVI->getNumIndices() == 1 && *SelIVI->idx_begin() == 1) {
+ ExcIVI = dyn_cast<InsertValueInst>(SelIVI->getOperand(0));
+ if (ExcIVI && isa<UndefValue>(ExcIVI->getOperand(0)) &&
+ ExcIVI->getNumIndices() == 1 && *ExcIVI->idx_begin() == 0) {
+ ExnObj = cast<Instruction>(ExcIVI->getOperand(1));
+ SelLoad = dyn_cast<LoadInst>(SelIVI->getOperand(1));
+ EraseIVIs = true;
}
}
}
- bool Changed = false;
-
- if (Sels.size() != SelsToConvert.size()) {
- // If we haven't been able to convert all of the clean-up selectors, then
- // loop through the slow way to see if they still need to be converted.
- if (!ExceptionValueIntrinsic) {
- ExceptionValueIntrinsic =
- Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_exception);
- if (!ExceptionValueIntrinsic)
- return CleanupSelectors(CatchAllSels);
- }
-
- for (Value::use_iterator
- I = ExceptionValueIntrinsic->use_begin(),
- E = ExceptionValueIntrinsic->use_end(); I != E; ++I) {
- IntrinsicInst *EHPtr = dyn_cast<IntrinsicInst>(*I);
- if (!EHPtr || EHPtr->getParent()->getParent() != F) continue;
-
- bool URoRInvoke = false;
- SmallPtrSet<IntrinsicInst*, 8> SelCalls;
- SmallPtrSet<PHINode*, 32> SeenPHIs;
- Changed |= FindSelectorAndURoR(EHPtr, URoRInvoke, SelCalls, SeenPHIs);
-
- if (URoRInvoke) {
- // This EH pointer is being used by an invoke of an URoR instruction and
- // an eh.selector intrinsic call. If the eh.selector is a 'clean-up', we
- // need to convert it to a 'catch-all'.
- for (SmallPtrSet<IntrinsicInst*, 8>::iterator
- SI = SelCalls.begin(), SE = SelCalls.end(); SI != SE; ++SI)
- if (!HasCatchAllInSelector(*SI))
- SelsToConvert.insert(*SI);
- }
- }
- }
-
- if (!SelsToConvert.empty()) {
- // Convert all clean-up eh.selectors, which are associated with "invokes" of
- // URoR calls, into catch-all eh.selectors.
- Changed = true;
-
- for (SmallPtrSet<IntrinsicInst*, 8>::iterator
- SI = SelsToConvert.begin(), SE = SelsToConvert.end();
- SI != SE; ++SI) {
- IntrinsicInst *II = *SI;
-
- // Use the exception object pointer and the personality function
- // from the original selector.
- CallSite CS(II);
- IntrinsicInst::op_iterator I = CS.arg_begin();
- IntrinsicInst::op_iterator E = CS.arg_end();
- IntrinsicInst::op_iterator B = prior(E);
-
- // Exclude last argument if it is an integer.
- if (isa<ConstantInt>(B)) E = B;
+ if (!ExnObj)
+ ExnObj = ExtractValueInst::Create(RI->getOperand(0), 0, "exn.obj", RI);
- // Add exception object pointer (front).
- // Add personality function (next).
- // Add in any filter IDs (rest).
- SmallVector<Value*, 8> Args(I, E);
+ RI->eraseFromParent();
- Args.push_back(EHCatchAllValue->getInitializer()); // Catch-all indicator.
-
- CallInst *NewSelector =
- CallInst::Create(SelectorIntrinsic, Args, "eh.sel.catch.all", II);
-
- NewSelector->setTailCall(II->isTailCall());
- NewSelector->setAttributes(II->getAttributes());
- NewSelector->setCallingConv(II->getCallingConv());
-
- II->replaceAllUsesWith(NewSelector);
- II->eraseFromParent();
- }
+ if (EraseIVIs) {
+ if (SelIVI->getNumUses() == 0)
+ SelIVI->eraseFromParent();
+ if (ExcIVI->getNumUses() == 0)
+ ExcIVI->eraseFromParent();
+ if (SelLoad && SelLoad->getNumUses() == 0)
+ SelLoad->eraseFromParent();
}
- Changed |= CleanupSelectors(CatchAllSels);
- return Changed;
-}
-
-/// NormalizeLandingPads - Normalize and discover landing pads, noting them
-/// in the LandingPads set. A landing pad is normal if the only CFG edges
-/// that end at it are unwind edges from invoke instructions. If we inlined
-/// through an invoke we could have a normal branch from the previous
-/// unwind block through to the landing pad for the original invoke.
-/// Abnormal landing pads are fixed up by redirecting all unwind edges to
-/// a new basic block which falls through to the original.
-bool DwarfEHPrepare::NormalizeLandingPads() {
- bool Changed = false;
-
- const MCAsmInfo *MAI = TM->getMCAsmInfo();
- bool usingSjLjEH = MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
-
- for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
- TerminatorInst *TI = I->getTerminator();
- if (!isa<InvokeInst>(TI))
- continue;
- BasicBlock *LPad = TI->getSuccessor(1);
- // Skip landing pads that have already been normalized.
- if (LandingPads.count(LPad))
- continue;
-
- // Check that only invoke unwind edges end at the landing pad.
- bool OnlyUnwoundTo = true;
- bool SwitchOK = usingSjLjEH;
- for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad);
- PI != PE; ++PI) {
- TerminatorInst *PT = (*PI)->getTerminator();
- // The SjLj dispatch block uses a switch instruction. This is effectively
- // an unwind edge, so we can disregard it here. There will only ever
- // be one dispatch, however, so if there are multiple switches, one
- // of them truly is a normal edge, not an unwind edge.
- if (SwitchOK && isa<SwitchInst>(PT)) {
- SwitchOK = false;
- continue;
- }
- if (!isa<InvokeInst>(PT) || LPad == PT->getSuccessor(0)) {
- OnlyUnwoundTo = false;
- break;
- }
- }
-
- if (OnlyUnwoundTo) {
- // Only unwind edges lead to the landing pad. Remember the landing pad.
- LandingPads.insert(LPad);
- continue;
- }
-
- // At least one normal edge ends at the landing pad. Redirect the unwind
- // edges to a new basic block which falls through into this one.
-
- // Create the new basic block.
- BasicBlock *NewBB = BasicBlock::Create(F->getContext(),
- LPad->getName() + "_unwind_edge");
-
- // Insert it into the function right before the original landing pad.
- LPad->getParent()->getBasicBlockList().insert(LPad, NewBB);
-
- // Redirect unwind edges from the original landing pad to NewBB.
- for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad); PI != PE; ) {
- TerminatorInst *PT = (*PI++)->getTerminator();
- if (isa<InvokeInst>(PT) && PT->getSuccessor(1) == LPad)
- // Unwind to the new block.
- PT->setSuccessor(1, NewBB);
- }
-
- // If there are any PHI nodes in LPad, we need to update them so that they
- // merge incoming values from NewBB instead.
- for (BasicBlock::iterator II = LPad->begin(); isa<PHINode>(II); ++II) {
- PHINode *PN = cast<PHINode>(II);
- pred_iterator PB = pred_begin(NewBB), PE = pred_end(NewBB);
-
- // Check to see if all of the values coming in via unwind edges are the
- // same. If so, we don't need to create a new PHI node.
- Value *InVal = PN->getIncomingValueForBlock(*PB);
- for (pred_iterator PI = PB; PI != PE; ++PI) {
- if (PI != PB && InVal != PN->getIncomingValueForBlock(*PI)) {
- InVal = 0;
- break;
- }
- }
-
- if (InVal == 0) {
- // Different unwind edges have different values. Create a new PHI node
- // in NewBB.
- PHINode *NewPN = PHINode::Create(PN->getType(),
- PN->getNumIncomingValues(),
- PN->getName()+".unwind", NewBB);
- // Add an entry for each unwind edge, using the value from the old PHI.
- for (pred_iterator PI = PB; PI != PE; ++PI)
- NewPN->addIncoming(PN->getIncomingValueForBlock(*PI), *PI);
-
- // Now use this new PHI as the common incoming value for NewBB in PN.
- InVal = NewPN;
- }
-
- // Revector exactly one entry in the PHI node to come from NewBB
- // and delete all other entries that come from unwind edges. If
- // there are both normal and unwind edges from the same predecessor,
- // this leaves an entry for the normal edge.
- for (pred_iterator PI = PB; PI != PE; ++PI)
- PN->removeIncomingValue(*PI);
- PN->addIncoming(InVal, NewBB);
- }
-
- // Add a fallthrough from NewBB to the original landing pad.
- BranchInst::Create(LPad, NewBB);
-
- // Now update DominatorTree analysis information.
- DT->splitBlock(NewBB);
-
- // Remember the newly constructed landing pad. The original landing pad
- // LPad is no longer a landing pad now that all unwind edges have been
- // revectored to NewBB.
- LandingPads.insert(NewBB);
- ++NumLandingPadsSplit;
- Changed = true;
- }
-
- return Changed;
-}
-
-/// LowerUnwinds - Turn unwind instructions into calls to _Unwind_Resume,
-/// rethrowing any previously caught exception. This will crash horribly
-/// at runtime if there is no such exception: using unwind to throw a new
-/// exception is currently not supported.
-bool DwarfEHPrepare::LowerUnwindsAndResumes() {
- SmallVector<Instruction*, 16> ResumeInsts;
-
- for (Function::iterator fi = F->begin(), fe = F->end(); fi != fe; ++fi) {
- for (BasicBlock::iterator bi = fi->begin(), be = fi->end(); bi != be; ++bi){
- if (isa<UnwindInst>(bi))
- ResumeInsts.push_back(bi);
- else if (CallInst *call = dyn_cast<CallInst>(bi))
- if (Function *fn = dyn_cast<Function>(call->getCalledValue()))
- if (fn->getName() == "llvm.eh.resume")
- ResumeInsts.push_back(bi);
- }
- }
-
- if (ResumeInsts.empty()) return false;
-
- // Find the rewind function if we didn't already.
- if (!RewindFunction) {
- LLVMContext &Ctx = ResumeInsts[0]->getContext();
- FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx),
- Type::getInt8PtrTy(Ctx), false);
- const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME);
- RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy);
- }
-
- bool Changed = false;
-
- for (SmallVectorImpl<Instruction*>::iterator
- I = ResumeInsts.begin(), E = ResumeInsts.end(); I != E; ++I) {
- Instruction *RI = *I;
-
- // Replace the resuming instruction with a call to _Unwind_Resume (or the
- // appropriate target equivalent).
-
- llvm::Value *ExnValue;
- if (isa<UnwindInst>(RI))
- ExnValue = CreateExceptionValueCall(RI->getParent());
- else
- ExnValue = cast<CallInst>(RI)->getArgOperand(0);
-
- // Create the call...
- CallInst *CI = CallInst::Create(RewindFunction, ExnValue, "", RI);
- CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME));
-
- // ...followed by an UnreachableInst, if it was an unwind.
- // Calls to llvm.eh.resume are typically already followed by this.
- if (isa<UnwindInst>(RI))
- new UnreachableInst(RI->getContext(), RI);
-
- if (isa<UnwindInst>(RI))
- ++NumUnwindsLowered;
- else
- ++NumResumesLowered;
-
- // Nuke the resume instruction.
- RI->eraseFromParent();
-
- Changed = true;
- }
-
- return Changed;
-}
-
-/// MoveExceptionValueCalls - Ensure that eh.exception is only ever called from
-/// landing pads by replacing calls outside of landing pads with direct use of
-/// a register holding the appropriate value; this requires adding calls inside
-/// all landing pads to initialize the register. Also, move eh.exception calls
-/// inside landing pads to the start of the landing pad (optional, but may make
-/// things simpler for later passes).
-bool DwarfEHPrepare::MoveExceptionValueCalls() {
- // If the eh.exception intrinsic is not declared in the module then there is
- // nothing to do. Speed up compilation by checking for this common case.
- if (!ExceptionValueIntrinsic &&
- !F->getParent()->getFunction(Intrinsic::getName(Intrinsic::eh_exception)))
- return false;
-
- bool Changed = false;
-
- // Move calls to eh.exception that are inside a landing pad to the start of
- // the landing pad.
- for (BBSet::const_iterator LI = LandingPads.begin(), LE = LandingPads.end();
- LI != LE; ++LI) {
- BasicBlock *LP = *LI;
- for (BasicBlock::iterator II = LP->getFirstNonPHIOrDbg(), IE = LP->end();
- II != IE;)
- if (EHExceptionInst *EI = dyn_cast<EHExceptionInst>(II++)) {
- // Found a call to eh.exception.
- if (!EI->use_empty()) {
- // If there is already a call to eh.exception at the start of the
- // landing pad, then get hold of it; otherwise create such a call.
- Value *CallAtStart = CreateExceptionValueCall(LP);
-
- // If the call was at the start of a landing pad then leave it alone.
- if (EI == CallAtStart)
- continue;
- EI->replaceAllUsesWith(CallAtStart);
- }
- EI->eraseFromParent();
- ++NumExceptionValuesMoved;
- Changed = true;
- }
- }
-
- // Look for calls to eh.exception that are not in a landing pad. If one is
- // found, then a register that holds the exception value will be created in
- // each landing pad, and the SSAUpdater will be used to compute the values
- // returned by eh.exception calls outside of landing pads.
- SSAUpdater SSA;
-
- // Remember where we found the eh.exception call, to avoid rescanning earlier
- // basic blocks which we already know contain no eh.exception calls.
- bool FoundCallOutsideLandingPad = false;
- Function::iterator BB = F->begin();
- for (Function::iterator BE = F->end(); BB != BE; ++BB) {
- // Skip over landing pads.
- if (LandingPads.count(BB))
- continue;
-
- for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
- II != IE; ++II)
- if (isa<EHExceptionInst>(II)) {
- SSA.Initialize(II->getType(), II->getName());
- FoundCallOutsideLandingPad = true;
- break;
- }
-
- if (FoundCallOutsideLandingPad)
- break;
- }
-
- // If all calls to eh.exception are in landing pads then we are done.
- if (!FoundCallOutsideLandingPad)
- return Changed;
-
- // Add a call to eh.exception at the start of each landing pad, and tell the
- // SSAUpdater that this is the value produced by the landing pad.
- for (BBSet::iterator LI = LandingPads.begin(), LE = LandingPads.end();
- LI != LE; ++LI)
- SSA.AddAvailableValue(*LI, CreateExceptionValueCall(*LI));
-
- // Now turn all calls to eh.exception that are not in a landing pad into a use
- // of the appropriate register.
- for (Function::iterator BE = F->end(); BB != BE; ++BB) {
- // Skip over landing pads.
- if (LandingPads.count(BB))
- continue;
-
- for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
- II != IE;)
- if (EHExceptionInst *EI = dyn_cast<EHExceptionInst>(II++)) {
- // Found a call to eh.exception, replace it with the value from any
- // upstream landing pad(s).
- EI->replaceAllUsesWith(SSA.GetValueAtEndOfBlock(BB));
- EI->eraseFromParent();
- ++NumExceptionValuesMoved;
- }
- }
-
- return true;
-}
-
-/// CreateExceptionValueCall - Insert a call to the eh.exception intrinsic at
-/// the start of the basic block (unless there already is one, in which case
-/// the existing call is returned).
-Instruction *DwarfEHPrepare::CreateExceptionValueCall(BasicBlock *BB) {
- Instruction *Start = BB->getFirstNonPHIOrDbg();
- // Is this a call to eh.exception?
- if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Start))
- if (CI->getIntrinsicID() == Intrinsic::eh_exception)
- // Reuse the existing call.
- return Start;
-
- // Find the eh.exception intrinsic if we didn't already.
- if (!ExceptionValueIntrinsic)
- ExceptionValueIntrinsic = Intrinsic::getDeclaration(F->getParent(),
- Intrinsic::eh_exception);
-
- // Create the call.
- return CallInst::Create(ExceptionValueIntrinsic, "eh.value.call", Start);
+ return ExnObj;
}
/// InsertUnwindResumeCalls - Convert the ResumeInsts that are still present
/// into calls to the appropriate _Unwind_Resume function.
-bool DwarfEHPrepare::InsertUnwindResumeCalls() {
+bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
bool UsesNewEH = false;
SmallVector<ResumeInst*, 16> Resumes;
- for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
+ for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
TerminatorInst *TI = I->getTerminator();
if (ResumeInst *RI = dyn_cast<ResumeInst>(TI))
Resumes.push_back(RI);
@@ -682,27 +127,45 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() {
FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx),
Type::getInt8PtrTy(Ctx), false);
const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME);
- RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy);
+ RewindFunction = Fn.getParent()->getOrInsertFunction(RewindName, FTy);
}
// Create the basic block where the _Unwind_Resume call will live.
- LLVMContext &Ctx = F->getContext();
- BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", F);
- PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), Resumes.size(),
+ LLVMContext &Ctx = Fn.getContext();
+ unsigned ResumesSize = Resumes.size();
+
+ if (ResumesSize == 1) {
+ // Instead of creating a new BB and PHI node, just append the call to
+ // _Unwind_Resume to the end of the single resume block.
+ ResumeInst *RI = Resumes.front();
+ BasicBlock *UnwindBB = RI->getParent();
+ Instruction *ExnObj = GetExceptionObject(RI);
+
+ // Call the _Unwind_Resume function.
+ CallInst *CI = CallInst::Create(RewindFunction, ExnObj, "", UnwindBB);
+ CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME));
+
+ // We never expect _Unwind_Resume to return.
+ new UnreachableInst(Ctx, UnwindBB);
+ return true;
+ }
+
+ BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", &Fn);
+ PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesSize,
"exn.obj", UnwindBB);
// Extract the exception object from the ResumeInst and add it to the PHI node
// that feeds the _Unwind_Resume call.
- BasicBlock *UnwindBBDom = Resumes[0]->getParent();
for (SmallVectorImpl<ResumeInst*>::iterator
I = Resumes.begin(), E = Resumes.end(); I != E; ++I) {
ResumeInst *RI = *I;
- BranchInst::Create(UnwindBB, RI->getParent());
- ExtractValueInst *ExnObj = ExtractValueInst::Create(RI->getOperand(0),
- 0, "exn.obj", RI);
- PN->addIncoming(ExnObj, RI->getParent());
- UnwindBBDom = DT->findNearestCommonDominator(RI->getParent(), UnwindBBDom);
- RI->eraseFromParent();
+ BasicBlock *Parent = RI->getParent();
+ BranchInst::Create(UnwindBB, Parent);
+
+ Instruction *ExnObj = GetExceptionObject(RI);
+ PN->addIncoming(ExnObj, Parent);
+
+ ++NumResumesLowered;
}
// Call the function.
@@ -711,40 +174,10 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() {
// We never expect _Unwind_Resume to return.
new UnreachableInst(Ctx, UnwindBB);
-
- // Now update DominatorTree analysis information.
- DT->addNewBlock(UnwindBB, UnwindBBDom);
return true;
}
bool DwarfEHPrepare::runOnFunction(Function &Fn) {
- bool Changed = false;
-
- // Initialize internal state.
- DT = &getAnalysis<DominatorTree>(); // FIXME: We won't need this with the new EH.
- F = &Fn;
-
- if (InsertUnwindResumeCalls()) {
- // FIXME: The reset of this function can go once the new EH is done.
- LandingPads.clear();
- return true;
- }
-
- // Ensure that only unwind edges end at landing pads (a landing pad is a
- // basic block where an invoke unwind edge ends).
- Changed |= NormalizeLandingPads();
-
- // Turn unwind instructions and eh.resume calls into libcalls.
- Changed |= LowerUnwindsAndResumes();
-
- // TODO: Move eh.selector calls to landing pads and combine them.
-
- // Move eh.exception calls to landing pads.
- Changed |= MoveExceptionValueCalls();
-
- Changed |= HandleURoRInvokes();
-
- LandingPads.clear();
-
+ bool Changed = InsertUnwindResumeCalls(Fn);
return Changed;
}
diff --git a/lib/CodeGen/ELF.h b/lib/CodeGen/ELF.h
deleted file mode 100644
index 5b634682cc87..000000000000
--- a/lib/CodeGen/ELF.h
+++ /dev/null
@@ -1,227 +0,0 @@
-//===-- lib/CodeGen/ELF.h - ELF constants and data structures ---*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This header contains common, non-processor-specific data structures and
-// constants for the ELF file format.
-//
-// The details of the ELF32 bits in this file are largely based on the Tool
-// Interface Standard (TIS) Executable and Linking Format (ELF) Specification
-// Version 1.2, May 1995. The ELF64 is based on HP/Intel definition of the
-// ELF-64 object file format document, Version 1.5 Draft 2 May 27, 1998
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef CODEGEN_ELF_H
-#define CODEGEN_ELF_H
-
-#include "llvm/CodeGen/BinaryObject.h"
-#include "llvm/CodeGen/MachineRelocation.h"
-#include "llvm/Support/ELF.h"
-#include "llvm/Support/DataTypes.h"
-
-namespace llvm {
- class GlobalValue;
-
- /// ELFSym - This struct contains information about each symbol that is
- /// added to logical symbol table for the module. This is eventually
- /// turned into a real symbol table in the file.
- struct ELFSym {
-
- // ELF symbols are related to llvm ones by being one of the two llvm
- // types, for the other ones (section, file, func) a null pointer is
- // assumed by default.
- union {
- const GlobalValue *GV; // If this is a pointer to a GV
- const char *Ext; // If this is a pointer to a named symbol
- } Source;
-
- // Describes from which source type this ELF symbol comes from,
- // they can be GlobalValue, ExternalSymbol or neither.
- enum {
- isGV, // The Source.GV field is valid.
- isExtSym, // The Source.ExtSym field is valid.
- isOther // Not a GlobalValue or External Symbol
- };
- unsigned SourceType;
-
- bool isGlobalValue() const { return SourceType == isGV; }
- bool isExternalSym() const { return SourceType == isExtSym; }
-
- // getGlobalValue - If this is a global value which originated the
- // elf symbol, return a reference to it.
- const GlobalValue *getGlobalValue() const {
- assert(SourceType == isGV && "This is not a global value");
- return Source.GV;
- }
-
- // getExternalSym - If this is an external symbol which originated the
- // elf symbol, return a reference to it.
- const char *getExternalSymbol() const {
- assert(SourceType == isExtSym && "This is not an external symbol");
- return Source.Ext;
- }
-
- // getGV - From a global value return a elf symbol to represent it
- static ELFSym *getGV(const GlobalValue *GV, unsigned Bind,
- unsigned Type, unsigned Visibility) {
- ELFSym *Sym = new ELFSym();
- Sym->Source.GV = GV;
- Sym->setBind(Bind);
- Sym->setType(Type);
- Sym->setVisibility(Visibility);
- Sym->SourceType = isGV;
- return Sym;
- }
-
- // getExtSym - Create and return an elf symbol to represent an
- // external symbol
- static ELFSym *getExtSym(const char *Ext) {
- ELFSym *Sym = new ELFSym();
- Sym->Source.Ext = Ext;
- Sym->setBind(ELF::STB_GLOBAL);
- Sym->setType(ELF::STT_NOTYPE);
- Sym->setVisibility(ELF::STV_DEFAULT);
- Sym->SourceType = isExtSym;
- return Sym;
- }
-
- // getSectionSym - Returns a elf symbol to represent an elf section
- static ELFSym *getSectionSym() {
- ELFSym *Sym = new ELFSym();
- Sym->setBind(ELF::STB_LOCAL);
- Sym->setType(ELF::STT_SECTION);
- Sym->setVisibility(ELF::STV_DEFAULT);
- Sym->SourceType = isOther;
- return Sym;
- }
-
- // getFileSym - Returns a elf symbol to represent the module identifier
- static ELFSym *getFileSym() {
- ELFSym *Sym = new ELFSym();
- Sym->setBind(ELF::STB_LOCAL);
- Sym->setType(ELF::STT_FILE);
- Sym->setVisibility(ELF::STV_DEFAULT);
- Sym->SectionIdx = 0xfff1; // ELFSection::SHN_ABS;
- Sym->SourceType = isOther;
- return Sym;
- }
-
- // getUndefGV - Returns a STT_NOTYPE symbol
- static ELFSym *getUndefGV(const GlobalValue *GV, unsigned Bind) {
- ELFSym *Sym = new ELFSym();
- Sym->Source.GV = GV;
- Sym->setBind(Bind);
- Sym->setType(ELF::STT_NOTYPE);
- Sym->setVisibility(ELF::STV_DEFAULT);
- Sym->SectionIdx = 0; //ELFSection::SHN_UNDEF;
- Sym->SourceType = isGV;
- return Sym;
- }
-
- // ELF specific fields
- unsigned NameIdx; // Index in .strtab of name, once emitted.
- uint64_t Value;
- unsigned Size;
- uint8_t Info;
- uint8_t Other;
- unsigned short SectionIdx;
-
- // Symbol index into the Symbol table
- unsigned SymTabIdx;
-
- ELFSym() : SourceType(isOther), NameIdx(0), Value(0),
- Size(0), Info(0), Other(ELF::STV_DEFAULT), SectionIdx(0),
- SymTabIdx(0) {}
-
- unsigned getBind() const { return (Info >> 4) & 0xf; }
- unsigned getType() const { return Info & 0xf; }
- bool isLocalBind() const { return getBind() == ELF::STB_LOCAL; }
- bool isFileType() const { return getType() == ELF::STT_FILE; }
-
- void setBind(unsigned X) {
- assert(X == (X & 0xF) && "Bind value out of range!");
- Info = (Info & 0x0F) | (X << 4);
- }
-
- void setType(unsigned X) {
- assert(X == (X & 0xF) && "Type value out of range!");
- Info = (Info & 0xF0) | X;
- }
-
- void setVisibility(unsigned V) {
- assert(V == (V & 0x3) && "Visibility value out of range!");
- Other = V;
- }
- };
-
- /// ELFSection - This struct contains information about each section that is
- /// emitted to the file. This is eventually turned into the section header
- /// table at the end of the file.
- class ELFSection : public BinaryObject {
- public:
- // ELF specific fields
- unsigned NameIdx; // sh_name - .shstrtab idx of name, once emitted.
- unsigned Type; // sh_type - Section contents & semantics
- unsigned Flags; // sh_flags - Section flags.
- uint64_t Addr; // sh_addr - The mem addr this section is in.
- unsigned Offset; // sh_offset - Offset from the file start
- unsigned Size; // sh_size - The section size.
- unsigned Link; // sh_link - Section header table index link.
- unsigned Info; // sh_info - Auxiliary information.
- unsigned Align; // sh_addralign - Alignment of section.
- unsigned EntSize; // sh_entsize - Size of entries in the section e
-
- /// SectionIdx - The number of the section in the Section Table.
- unsigned short SectionIdx;
-
- /// Sym - The symbol to represent this section if it has one.
- ELFSym *Sym;
-
- /// getSymIndex - Returns the symbol table index of the symbol
- /// representing this section.
- unsigned getSymbolTableIndex() const {
- assert(Sym && "section not present in the symbol table");
- return Sym->SymTabIdx;
- }
-
- ELFSection(const std::string &name, bool isLittleEndian, bool is64Bit)
- : BinaryObject(name, isLittleEndian, is64Bit), Type(0), Flags(0), Addr(0),
- Offset(0), Size(0), Link(0), Info(0), Align(0), EntSize(0), Sym(0) {}
- };
-
- /// ELFRelocation - This class contains all the information necessary to
- /// to generate any 32-bit or 64-bit ELF relocation entry.
- class ELFRelocation {
- uint64_t r_offset; // offset in the section of the object this applies to
- uint32_t r_symidx; // symbol table index of the symbol to use
- uint32_t r_type; // machine specific relocation type
- int64_t r_add; // explicit relocation addend
- bool r_rela; // if true then the addend is part of the entry
- // otherwise the addend is at the location specified
- // by r_offset
- public:
- uint64_t getInfo(bool is64Bit) const {
- if (is64Bit)
- return ((uint64_t)r_symidx << 32) + ((uint64_t)r_type & 0xFFFFFFFFL);
- else
- return (r_symidx << 8) + (r_type & 0xFFL);
- }
-
- uint64_t getOffset() const { return r_offset; }
- int64_t getAddend() const { return r_add; }
-
- ELFRelocation(uint64_t off, uint32_t sym, uint32_t type,
- bool rela = true, int64_t addend = 0) :
- r_offset(off), r_symidx(sym), r_type(type),
- r_add(addend), r_rela(rela) {}
- };
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/CodeGen/ELFCodeEmitter.cpp b/lib/CodeGen/ELFCodeEmitter.cpp
deleted file mode 100644
index 660424c3c141..000000000000
--- a/lib/CodeGen/ELFCodeEmitter.cpp
+++ /dev/null
@@ -1,205 +0,0 @@
-//===-- lib/CodeGen/ELFCodeEmitter.cpp ------------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "elfce"
-
-#include "ELF.h"
-#include "ELFWriter.h"
-#include "ELFCodeEmitter.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/BinaryObject.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/CodeGen/MachineRelocation.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetELFWriterInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-
-//===----------------------------------------------------------------------===//
-// ELFCodeEmitter Implementation
-//===----------------------------------------------------------------------===//
-
-namespace llvm {
-
-/// startFunction - This callback is invoked when a new machine function is
-/// about to be emitted.
-void ELFCodeEmitter::startFunction(MachineFunction &MF) {
- DEBUG(dbgs() << "processing function: "
- << MF.getFunction()->getName() << "\n");
-
- // Get the ELF Section that this function belongs in.
- ES = &EW.getTextSection(MF.getFunction());
-
- // Set the desired binary object to be used by the code emitters
- setBinaryObject(ES);
-
- // Get the function alignment in bytes
- unsigned Align = (1 << MF.getAlignment());
-
- // The function must start on its required alignment
- ES->emitAlignment(Align);
-
- // Update the section alignment if needed.
- ES->Align = std::max(ES->Align, Align);
-
- // Record the function start offset
- FnStartOff = ES->getCurrentPCOffset();
-
- // Emit constant pool and jump tables to their appropriate sections.
- // They need to be emitted before the function because in some targets
- // the later may reference JT or CP entry address.
- emitConstantPool(MF.getConstantPool());
- if (MF.getJumpTableInfo())
- emitJumpTables(MF.getJumpTableInfo());
-}
-
-/// finishFunction - This callback is invoked after the function is completely
-/// finished.
-bool ELFCodeEmitter::finishFunction(MachineFunction &MF) {
- // Add a symbol to represent the function.
- const Function *F = MF.getFunction();
- ELFSym *FnSym = ELFSym::getGV(F, EW.getGlobalELFBinding(F), ELF::STT_FUNC,
- EW.getGlobalELFVisibility(F));
- FnSym->SectionIdx = ES->SectionIdx;
- FnSym->Size = ES->getCurrentPCOffset()-FnStartOff;
- EW.AddPendingGlobalSymbol(F, true);
-
- // Offset from start of Section
- FnSym->Value = FnStartOff;
-
- if (!F->hasPrivateLinkage())
- EW.SymbolList.push_back(FnSym);
-
- // Patch up Jump Table Section relocations to use the real MBBs offsets
- // now that the MBB label offsets inside the function are known.
- if (MF.getJumpTableInfo()) {
- ELFSection &JTSection = EW.getJumpTableSection();
- for (std::vector<MachineRelocation>::iterator MRI = JTRelocations.begin(),
- MRE = JTRelocations.end(); MRI != MRE; ++MRI) {
- MachineRelocation &MR = *MRI;
- uintptr_t MBBOffset = getMachineBasicBlockAddress(MR.getBasicBlock());
- MR.setResultPointer((void*)MBBOffset);
- MR.setConstantVal(ES->SectionIdx);
- JTSection.addRelocation(MR);
- }
- }
-
- // If we have emitted any relocations to function-specific objects such as
- // basic blocks, constant pools entries, or jump tables, record their
- // addresses now so that we can rewrite them with the correct addresses later
- for (unsigned i = 0, e = Relocations.size(); i != e; ++i) {
- MachineRelocation &MR = Relocations[i];
- intptr_t Addr;
- if (MR.isGlobalValue()) {
- EW.AddPendingGlobalSymbol(MR.getGlobalValue());
- } else if (MR.isExternalSymbol()) {
- EW.AddPendingExternalSymbol(MR.getExternalSymbol());
- } else if (MR.isBasicBlock()) {
- Addr = getMachineBasicBlockAddress(MR.getBasicBlock());
- MR.setConstantVal(ES->SectionIdx);
- MR.setResultPointer((void*)Addr);
- } else if (MR.isConstantPoolIndex()) {
- Addr = getConstantPoolEntryAddress(MR.getConstantPoolIndex());
- MR.setConstantVal(CPSections[MR.getConstantPoolIndex()]);
- MR.setResultPointer((void*)Addr);
- } else if (MR.isJumpTableIndex()) {
- ELFSection &JTSection = EW.getJumpTableSection();
- Addr = getJumpTableEntryAddress(MR.getJumpTableIndex());
- MR.setConstantVal(JTSection.SectionIdx);
- MR.setResultPointer((void*)Addr);
- } else {
- llvm_unreachable("Unhandled relocation type");
- }
- ES->addRelocation(MR);
- }
-
- // Clear per-function data structures.
- JTRelocations.clear();
- Relocations.clear();
- CPLocations.clear();
- CPSections.clear();
- JTLocations.clear();
- MBBLocations.clear();
- return false;
-}
-
-/// emitConstantPool - For each constant pool entry, figure out which section
-/// the constant should live in and emit the constant
-void ELFCodeEmitter::emitConstantPool(MachineConstantPool *MCP) {
- const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants();
- if (CP.empty()) return;
-
- // TODO: handle PIC codegen
- assert(TM.getRelocationModel() != Reloc::PIC_ &&
- "PIC codegen not yet handled for elf constant pools!");
-
- for (unsigned i = 0, e = CP.size(); i != e; ++i) {
- MachineConstantPoolEntry CPE = CP[i];
-
- // Record the constant pool location and the section index
- ELFSection &CstPool = EW.getConstantPoolSection(CPE);
- CPLocations.push_back(CstPool.size());
- CPSections.push_back(CstPool.SectionIdx);
-
- if (CPE.isMachineConstantPoolEntry())
- assert(0 && "CPE.isMachineConstantPoolEntry not supported yet");
-
- // Emit the constant to constant pool section
- EW.EmitGlobalConstant(CPE.Val.ConstVal, CstPool);
- }
-}
-
-/// emitJumpTables - Emit all the jump tables for a given jump table info
-/// record to the appropriate section.
-void ELFCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) {
- const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
- if (JT.empty()) return;
-
- // FIXME: handle PIC codegen
- assert(TM.getRelocationModel() != Reloc::PIC_ &&
- "PIC codegen not yet handled for elf jump tables!");
-
- const TargetELFWriterInfo *TEW = TM.getELFWriterInfo();
- unsigned EntrySize = 4; //MJTI->getEntrySize();
-
- // Get the ELF Section to emit the jump table
- ELFSection &JTSection = EW.getJumpTableSection();
-
- // For each JT, record its offset from the start of the section
- for (unsigned i = 0, e = JT.size(); i != e; ++i) {
- const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
-
- // Record JT 'i' offset in the JT section
- JTLocations.push_back(JTSection.size());
-
- // Each MBB entry in the Jump table section has a relocation entry
- // against the current text section.
- for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) {
- unsigned MachineRelTy = TEW->getAbsoluteLabelMachineRelTy();
- MachineRelocation MR =
- MachineRelocation::getBB(JTSection.size(), MachineRelTy, MBBs[mi]);
-
- // Add the relocation to the Jump Table section
- JTRelocations.push_back(MR);
-
- // Output placeholder for MBB in the JT section
- for (unsigned s=0; s < EntrySize; ++s)
- JTSection.emitByte(0);
- }
- }
-}
-
-} // end namespace llvm
diff --git a/lib/CodeGen/ELFCodeEmitter.h b/lib/CodeGen/ELFCodeEmitter.h
deleted file mode 100644
index 8671c674eecf..000000000000
--- a/lib/CodeGen/ELFCodeEmitter.h
+++ /dev/null
@@ -1,78 +0,0 @@
-//===-- lib/CodeGen/ELFCodeEmitter.h ----------------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ELFCODEEMITTER_H
-#define ELFCODEEMITTER_H
-
-#include "llvm/CodeGen/ObjectCodeEmitter.h"
-#include <vector>
-
-namespace llvm {
- class ELFWriter;
- class ELFSection;
-
- /// ELFCodeEmitter - This class is used by the ELFWriter to
- /// emit the code for functions to the ELF file.
- class ELFCodeEmitter : public ObjectCodeEmitter {
- ELFWriter &EW;
-
- /// Target machine description
- TargetMachine &TM;
-
- /// Section containing code for functions
- ELFSection *ES;
-
- /// Relocations - Record relocations needed by the current function
- std::vector<MachineRelocation> Relocations;
-
- /// JTRelocations - Record relocations needed by the relocation
- /// section.
- std::vector<MachineRelocation> JTRelocations;
-
- /// FnStartPtr - Function offset from the beginning of ELFSection 'ES'
- uintptr_t FnStartOff;
- public:
- explicit ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM) {}
-
- /// addRelocation - Register new relocations for this function
- void addRelocation(const MachineRelocation &MR) {
- Relocations.push_back(MR);
- }
-
- /// emitConstantPool - For each constant pool entry, figure out which
- /// section the constant should live in and emit data to it
- void emitConstantPool(MachineConstantPool *MCP);
-
- /// emitJumpTables - Emit all the jump tables for a given jump table
- /// info and record them to the appropriate section.
- void emitJumpTables(MachineJumpTableInfo *MJTI);
-
- void startFunction(MachineFunction &F);
- bool finishFunction(MachineFunction &F);
-
- /// emitLabel - Emits a label
- virtual void emitLabel(MCSymbol *Label) {
- assert(0 && "emitLabel not implemented");
- }
-
- /// getLabelAddress - Return the address of the specified LabelID,
- /// only usable after the LabelID has been emitted.
- virtual uintptr_t getLabelAddress(MCSymbol *Label) const {
- assert(0 && "getLabelAddress not implemented");
- return 0;
- }
-
- virtual void setModuleInfo(llvm::MachineModuleInfo* MMI) {}
-
-}; // end class ELFCodeEmitter
-
-} // end namespace llvm
-
-#endif
-
diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp
deleted file mode 100644
index f2c218565854..000000000000
--- a/lib/CodeGen/ELFWriter.cpp
+++ /dev/null
@@ -1,1105 +0,0 @@
-//===-- ELFWriter.cpp - Target-independent ELF Writer code ----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the target-independent ELF writer. This file writes out
-// the ELF file in the following order:
-//
-// #1. ELF Header
-// #2. '.text' section
-// #3. '.data' section
-// #4. '.bss' section (conceptual position in file)
-// ...
-// #X. '.shstrtab' section
-// #Y. Section Table
-//
-// The entries in the section table are laid out as:
-// #0. Null entry [required]
-// #1. ".text" entry - the program code
-// #2. ".data" entry - global variables with initializers. [ if needed ]
-// #3. ".bss" entry - global variables without initializers. [ if needed ]
-// ...
-// #N. ".shstrtab" entry - String table for the section names.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "elfwriter"
-#include "ELF.h"
-#include "ELFWriter.h"
-#include "ELFCodeEmitter.h"
-#include "llvm/Constants.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/CodeGen/BinaryObject.h"
-#include "llvm/CodeGen/MachineCodeEmitter.h"
-#include "llvm/CodeGen/ObjectCodeEmitter.h"
-#include "llvm/CodeGen/MachineCodeEmitter.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCSectionELF.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetELFWriterInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallString.h"
-using namespace llvm;
-
-char ELFWriter::ID = 0;
-
-//===----------------------------------------------------------------------===//
-// ELFWriter Implementation
-//===----------------------------------------------------------------------===//
-
-ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm)
- : MachineFunctionPass(ID), O(o), TM(tm),
- OutContext(*new MCContext(*TM.getMCAsmInfo(), *TM.getRegisterInfo(),
- &TM.getTargetLowering()->getObjFileLowering())),
- TLOF(TM.getTargetLowering()->getObjFileLowering()),
- is64Bit(TM.getTargetData()->getPointerSizeInBits() == 64),
- isLittleEndian(TM.getTargetData()->isLittleEndian()),
- ElfHdr(isLittleEndian, is64Bit) {
-
- MAI = TM.getMCAsmInfo();
- TEW = TM.getELFWriterInfo();
-
- // Create the object code emitter object for this target.
- ElfCE = new ELFCodeEmitter(*this);
-
- // Initial number of sections
- NumSections = 0;
-}
-
-ELFWriter::~ELFWriter() {
- delete ElfCE;
- delete &OutContext;
-
- while(!SymbolList.empty()) {
- delete SymbolList.back();
- SymbolList.pop_back();
- }
-
- while(!PrivateSyms.empty()) {
- delete PrivateSyms.back();
- PrivateSyms.pop_back();
- }
-
- while(!SectionList.empty()) {
- delete SectionList.back();
- SectionList.pop_back();
- }
-
- // Release the name mangler object.
- delete Mang; Mang = 0;
-}
-
-// doInitialization - Emit the file header and all of the global variables for
-// the module to the ELF file.
-bool ELFWriter::doInitialization(Module &M) {
- // Initialize TargetLoweringObjectFile.
- const_cast<TargetLoweringObjectFile&>(TLOF).Initialize(OutContext, TM);
-
- Mang = new Mangler(OutContext, *TM.getTargetData());
-
- // ELF Header
- // ----------
- // Fields e_shnum e_shstrndx are only known after all section have
- // been emitted. They locations in the ouput buffer are recorded so
- // to be patched up later.
- //
- // Note
- // ----
- // emitWord method behaves differently for ELF32 and ELF64, writing
- // 4 bytes in the former and 8 in the last for *_off and *_addr elf types
-
- ElfHdr.emitByte(0x7f); // e_ident[EI_MAG0]
- ElfHdr.emitByte('E'); // e_ident[EI_MAG1]
- ElfHdr.emitByte('L'); // e_ident[EI_MAG2]
- ElfHdr.emitByte('F'); // e_ident[EI_MAG3]
-
- ElfHdr.emitByte(TEW->getEIClass()); // e_ident[EI_CLASS]
- ElfHdr.emitByte(TEW->getEIData()); // e_ident[EI_DATA]
- ElfHdr.emitByte(ELF::EV_CURRENT); // e_ident[EI_VERSION]
- ElfHdr.emitAlignment(16); // e_ident[EI_NIDENT-EI_PAD]
-
- ElfHdr.emitWord16(ELF::ET_REL); // e_type
- ElfHdr.emitWord16(TEW->getEMachine()); // e_machine = target
- ElfHdr.emitWord32(ELF::EV_CURRENT); // e_version
- ElfHdr.emitWord(0); // e_entry, no entry point in .o file
- ElfHdr.emitWord(0); // e_phoff, no program header for .o
- ELFHdr_e_shoff_Offset = ElfHdr.size();
- ElfHdr.emitWord(0); // e_shoff = sec hdr table off in bytes
- ElfHdr.emitWord32(TEW->getEFlags()); // e_flags = whatever the target wants
- ElfHdr.emitWord16(TEW->getHdrSize()); // e_ehsize = ELF header size
- ElfHdr.emitWord16(0); // e_phentsize = prog header entry size
- ElfHdr.emitWord16(0); // e_phnum = # prog header entries = 0
-
- // e_shentsize = Section header entry size
- ElfHdr.emitWord16(TEW->getSHdrSize());
-
- // e_shnum = # of section header ents
- ELFHdr_e_shnum_Offset = ElfHdr.size();
- ElfHdr.emitWord16(0); // Placeholder
-
- // e_shstrndx = Section # of '.shstrtab'
- ELFHdr_e_shstrndx_Offset = ElfHdr.size();
- ElfHdr.emitWord16(0); // Placeholder
-
- // Add the null section, which is required to be first in the file.
- getNullSection();
-
- // The first entry in the symtab is the null symbol and the second
- // is a local symbol containing the module/file name
- SymbolList.push_back(new ELFSym());
- SymbolList.push_back(ELFSym::getFileSym());
-
- return false;
-}
-
-// AddPendingGlobalSymbol - Add a global to be processed and to
-// the global symbol lookup, use a zero index because the table
-// index will be determined later.
-void ELFWriter::AddPendingGlobalSymbol(const GlobalValue *GV,
- bool AddToLookup /* = false */) {
- PendingGlobals.insert(GV);
- if (AddToLookup)
- GblSymLookup[GV] = 0;
-}
-
-// AddPendingExternalSymbol - Add the external to be processed
-// and to the external symbol lookup, use a zero index because
-// the symbol table index will be determined later.
-void ELFWriter::AddPendingExternalSymbol(const char *External) {
- PendingExternals.insert(External);
- ExtSymLookup[External] = 0;
-}
-
-ELFSection &ELFWriter::getDataSection() {
- const MCSectionELF *Data = (const MCSectionELF *)TLOF.getDataSection();
- return getSection(Data->getSectionName(), Data->getType(),
- Data->getFlags(), 4);
-}
-
-ELFSection &ELFWriter::getBSSSection() {
- const MCSectionELF *BSS = (const MCSectionELF *)TLOF.getBSSSection();
- return getSection(BSS->getSectionName(), BSS->getType(), BSS->getFlags(), 4);
-}
-
-// getCtorSection - Get the static constructor section
-ELFSection &ELFWriter::getCtorSection() {
- const MCSectionELF *Ctor = (const MCSectionELF *)TLOF.getStaticCtorSection();
- return getSection(Ctor->getSectionName(), Ctor->getType(), Ctor->getFlags());
-}
-
-// getDtorSection - Get the static destructor section
-ELFSection &ELFWriter::getDtorSection() {
- const MCSectionELF *Dtor = (const MCSectionELF *)TLOF.getStaticDtorSection();
- return getSection(Dtor->getSectionName(), Dtor->getType(), Dtor->getFlags());
-}
-
-// getTextSection - Get the text section for the specified function
-ELFSection &ELFWriter::getTextSection(const Function *F) {
- const MCSectionELF *Text =
- (const MCSectionELF *)TLOF.SectionForGlobal(F, Mang, TM);
- return getSection(Text->getSectionName(), Text->getType(), Text->getFlags());
-}
-
-// getJumpTableSection - Get a read only section for constants when
-// emitting jump tables. TODO: add PIC support
-ELFSection &ELFWriter::getJumpTableSection() {
- const MCSectionELF *JT =
- (const MCSectionELF *)TLOF.getSectionForConstant(SectionKind::getReadOnly());
- return getSection(JT->getSectionName(), JT->getType(), JT->getFlags(),
- TM.getTargetData()->getPointerABIAlignment());
-}
-
-// getConstantPoolSection - Get a constant pool section based on the machine
-// constant pool entry type and relocation info.
-ELFSection &ELFWriter::getConstantPoolSection(MachineConstantPoolEntry &CPE) {
- SectionKind Kind;
- switch (CPE.getRelocationInfo()) {
- default: llvm_unreachable("Unknown section kind");
- case 2: Kind = SectionKind::getReadOnlyWithRel(); break;
- case 1:
- Kind = SectionKind::getReadOnlyWithRelLocal();
- break;
- case 0:
- switch (TM.getTargetData()->getTypeAllocSize(CPE.getType())) {
- case 4: Kind = SectionKind::getMergeableConst4(); break;
- case 8: Kind = SectionKind::getMergeableConst8(); break;
- case 16: Kind = SectionKind::getMergeableConst16(); break;
- default: Kind = SectionKind::getMergeableConst(); break;
- }
- }
-
- const MCSectionELF *CPSect =
- (const MCSectionELF *)TLOF.getSectionForConstant(Kind);
- return getSection(CPSect->getSectionName(), CPSect->getType(),
- CPSect->getFlags(), CPE.getAlignment());
-}
-
-// getRelocSection - Return the relocation section of section 'S'. 'RelA'
-// is true if the relocation section contains entries with addends.
-ELFSection &ELFWriter::getRelocSection(ELFSection &S) {
- unsigned SectionType = TEW->hasRelocationAddend() ?
- ELF::SHT_RELA : ELF::SHT_REL;
-
- std::string SectionName(".rel");
- if (TEW->hasRelocationAddend())
- SectionName.append("a");
- SectionName.append(S.getName());
-
- return getSection(SectionName, SectionType, 0, TEW->getPrefELFAlignment());
-}
-
-// getGlobalELFVisibility - Returns the ELF specific visibility type
-unsigned ELFWriter::getGlobalELFVisibility(const GlobalValue *GV) {
- switch (GV->getVisibility()) {
- default:
- llvm_unreachable("unknown visibility type");
- case GlobalValue::DefaultVisibility:
- return ELF::STV_DEFAULT;
- case GlobalValue::HiddenVisibility:
- return ELF::STV_HIDDEN;
- case GlobalValue::ProtectedVisibility:
- return ELF::STV_PROTECTED;
- }
- return 0;
-}
-
-// getGlobalELFBinding - Returns the ELF specific binding type
-unsigned ELFWriter::getGlobalELFBinding(const GlobalValue *GV) {
- if (GV->hasInternalLinkage())
- return ELF::STB_LOCAL;
-
- if (GV->isWeakForLinker() && !GV->hasCommonLinkage())
- return ELF::STB_WEAK;
-
- return ELF::STB_GLOBAL;
-}
-
-// getGlobalELFType - Returns the ELF specific type for a global
-unsigned ELFWriter::getGlobalELFType(const GlobalValue *GV) {
- if (GV->isDeclaration())
- return ELF::STT_NOTYPE;
-
- if (isa<Function>(GV))
- return ELF::STT_FUNC;
-
- return ELF::STT_OBJECT;
-}
-
-// IsELFUndefSym - True if the global value must be marked as a symbol
-// which points to a SHN_UNDEF section. This means that the symbol has
-// no definition on the module.
-static bool IsELFUndefSym(const GlobalValue *GV) {
- return GV->isDeclaration() || (isa<Function>(GV));
-}
-
-// AddToSymbolList - Update the symbol lookup and If the symbol is
-// private add it to PrivateSyms list, otherwise to SymbolList.
-void ELFWriter::AddToSymbolList(ELFSym *GblSym) {
- assert(GblSym->isGlobalValue() && "Symbol must be a global value");
-
- const GlobalValue *GV = GblSym->getGlobalValue();
- if (GV->hasPrivateLinkage()) {
- // For a private symbols, keep track of the index inside
- // the private list since it will never go to the symbol
- // table and won't be patched up later.
- PrivateSyms.push_back(GblSym);
- GblSymLookup[GV] = PrivateSyms.size()-1;
- } else {
- // Non private symbol are left with zero indices until
- // they are patched up during the symbol table emition
- // (where the indicies are created).
- SymbolList.push_back(GblSym);
- GblSymLookup[GV] = 0;
- }
-}
-
-/// HasCommonSymbols - True if this section holds common symbols, this is
-/// indicated on the ELF object file by a symbol with SHN_COMMON section
-/// header index.
-static bool HasCommonSymbols(const MCSectionELF &S) {
- // FIXME: this is wrong, a common symbol can be in .data for example.
- if (StringRef(S.getSectionName()).startswith(".gnu.linkonce."))
- return true;
-
- return false;
-}
-
-
-// EmitGlobal - Choose the right section for global and emit it
-void ELFWriter::EmitGlobal(const GlobalValue *GV) {
-
- // Check if the referenced symbol is already emitted
- if (GblSymLookup.find(GV) != GblSymLookup.end())
- return;
-
- // Handle ELF Bind, Visibility and Type for the current symbol
- unsigned SymBind = getGlobalELFBinding(GV);
- unsigned SymType = getGlobalELFType(GV);
- bool IsUndefSym = IsELFUndefSym(GV);
-
- ELFSym *GblSym = IsUndefSym ? ELFSym::getUndefGV(GV, SymBind)
- : ELFSym::getGV(GV, SymBind, SymType, getGlobalELFVisibility(GV));
-
- if (!IsUndefSym) {
- assert(isa<GlobalVariable>(GV) && "GV not a global variable!");
- const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
-
- // Handle special llvm globals
- if (EmitSpecialLLVMGlobal(GVar))
- return;
-
- // Get the ELF section where this global belongs from TLOF
- const MCSectionELF *S =
- (const MCSectionELF *)TLOF.SectionForGlobal(GV, Mang, TM);
- ELFSection &ES =
- getSection(S->getSectionName(), S->getType(), S->getFlags());
- SectionKind Kind = S->getKind();
-
- // The symbol align should update the section alignment if needed
- const TargetData *TD = TM.getTargetData();
- unsigned Align = TD->getPreferredAlignment(GVar);
- unsigned Size = TD->getTypeAllocSize(GVar->getInitializer()->getType());
- GblSym->Size = Size;
-
- if (HasCommonSymbols(*S)) { // Symbol must go to a common section
- GblSym->SectionIdx = ELF::SHN_COMMON;
-
- // A new linkonce section is created for each global in the
- // common section, the default alignment is 1 and the symbol
- // value contains its alignment.
- ES.Align = 1;
- GblSym->Value = Align;
-
- } else if (Kind.isBSS() || Kind.isThreadBSS()) { // Symbol goes to BSS.
- GblSym->SectionIdx = ES.SectionIdx;
-
- // Update the size with alignment and the next object can
- // start in the right offset in the section
- if (Align) ES.Size = (ES.Size + Align-1) & ~(Align-1);
- ES.Align = std::max(ES.Align, Align);
-
- // GblSym->Value should contain the virtual offset inside the section.
- // Virtual because the BSS space is not allocated on ELF objects
- GblSym->Value = ES.Size;
- ES.Size += Size;
-
- } else { // The symbol must go to some kind of data section
- GblSym->SectionIdx = ES.SectionIdx;
-
- // GblSym->Value should contain the symbol offset inside the section,
- // and all symbols should start on their required alignment boundary
- ES.Align = std::max(ES.Align, Align);
- ES.emitAlignment(Align);
- GblSym->Value = ES.size();
-
- // Emit the global to the data section 'ES'
- EmitGlobalConstant(GVar->getInitializer(), ES);
- }
- }
-
- AddToSymbolList(GblSym);
-}
-
-void ELFWriter::EmitGlobalConstantStruct(const ConstantStruct *CVS,
- ELFSection &GblS) {
-
- // Print the fields in successive locations. Pad to align if needed!
- const TargetData *TD = TM.getTargetData();
- unsigned Size = TD->getTypeAllocSize(CVS->getType());
- const StructLayout *cvsLayout = TD->getStructLayout(CVS->getType());
- uint64_t sizeSoFar = 0;
- for (unsigned i = 0, e = CVS->getNumOperands(); i != e; ++i) {
- const Constant* field = CVS->getOperand(i);
-
- // Check if padding is needed and insert one or more 0s.
- uint64_t fieldSize = TD->getTypeAllocSize(field->getType());
- uint64_t padSize = ((i == e-1 ? Size : cvsLayout->getElementOffset(i+1))
- - cvsLayout->getElementOffset(i)) - fieldSize;
- sizeSoFar += fieldSize + padSize;
-
- // Now print the actual field value.
- EmitGlobalConstant(field, GblS);
-
- // Insert padding - this may include padding to increase the size of the
- // current field up to the ABI size (if the struct is not packed) as well
- // as padding to ensure that the next field starts at the right offset.
- GblS.emitZeros(padSize);
- }
- assert(sizeSoFar == cvsLayout->getSizeInBytes() &&
- "Layout of constant struct may be incorrect!");
-}
-
-void ELFWriter::EmitGlobalConstant(const Constant *CV, ELFSection &GblS) {
- const TargetData *TD = TM.getTargetData();
- unsigned Size = TD->getTypeAllocSize(CV->getType());
-
- if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) {
- for (unsigned i = 0, e = CVA->getNumOperands(); i != e; ++i)
- EmitGlobalConstant(CVA->getOperand(i), GblS);
- return;
- } else if (isa<ConstantAggregateZero>(CV)) {
- GblS.emitZeros(Size);
- return;
- } else if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) {
- EmitGlobalConstantStruct(CVS, GblS);
- return;
- } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
- APInt Val = CFP->getValueAPF().bitcastToAPInt();
- if (CFP->getType()->isDoubleTy())
- GblS.emitWord64(Val.getZExtValue());
- else if (CFP->getType()->isFloatTy())
- GblS.emitWord32(Val.getZExtValue());
- else if (CFP->getType()->isX86_FP80Ty()) {
- unsigned PadSize = TD->getTypeAllocSize(CFP->getType())-
- TD->getTypeStoreSize(CFP->getType());
- GblS.emitWordFP80(Val.getRawData(), PadSize);
- } else if (CFP->getType()->isPPC_FP128Ty())
- llvm_unreachable("PPC_FP128Ty global emission not implemented");
- return;
- } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
- if (Size == 1)
- GblS.emitByte(CI->getZExtValue());
- else if (Size == 2)
- GblS.emitWord16(CI->getZExtValue());
- else if (Size == 4)
- GblS.emitWord32(CI->getZExtValue());
- else
- EmitGlobalConstantLargeInt(CI, GblS);
- return;
- } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
- VectorType *PTy = CP->getType();
- for (unsigned I = 0, E = PTy->getNumElements(); I < E; ++I)
- EmitGlobalConstant(CP->getOperand(I), GblS);
- return;
- } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
- // Resolve a constant expression which returns a (Constant, Offset)
- // pair. If 'Res.first' is a GlobalValue, emit a relocation with
- // the offset 'Res.second', otherwise emit a global constant like
- // it is always done for not contant expression types.
- CstExprResTy Res = ResolveConstantExpr(CE);
- const Constant *Op = Res.first;
-
- if (isa<GlobalValue>(Op))
- EmitGlobalDataRelocation(cast<const GlobalValue>(Op),
- TD->getTypeAllocSize(Op->getType()),
- GblS, Res.second);
- else
- EmitGlobalConstant(Op, GblS);
-
- return;
- } else if (CV->getType()->getTypeID() == Type::PointerTyID) {
- // Fill the data entry with zeros or emit a relocation entry
- if (isa<ConstantPointerNull>(CV))
- GblS.emitZeros(Size);
- else
- EmitGlobalDataRelocation(cast<const GlobalValue>(CV),
- Size, GblS);
- return;
- } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
- // This is a constant address for a global variable or function and
- // therefore must be referenced using a relocation entry.
- EmitGlobalDataRelocation(GV, Size, GblS);
- return;
- }
-
- std::string msg;
- raw_string_ostream ErrorMsg(msg);
- ErrorMsg << "Constant unimp for type: " << *CV->getType();
- report_fatal_error(ErrorMsg.str());
-}
-
-// ResolveConstantExpr - Resolve the constant expression until it stop
-// yielding other constant expressions.
-CstExprResTy ELFWriter::ResolveConstantExpr(const Constant *CV) {
- const TargetData *TD = TM.getTargetData();
-
- // There ins't constant expression inside others anymore
- if (!isa<ConstantExpr>(CV))
- return std::make_pair(CV, 0);
-
- const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
- switch (CE->getOpcode()) {
- case Instruction::BitCast:
- return ResolveConstantExpr(CE->getOperand(0));
-
- case Instruction::GetElementPtr: {
- const Constant *ptrVal = CE->getOperand(0);
- SmallVector<Value*, 8> idxVec(CE->op_begin()+1, CE->op_end());
- int64_t Offset = TD->getIndexedOffset(ptrVal->getType(), idxVec);
- return std::make_pair(ptrVal, Offset);
- }
- case Instruction::IntToPtr: {
- Constant *Op = CE->getOperand(0);
- Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(CV->getContext()),
- false/*ZExt*/);
- return ResolveConstantExpr(Op);
- }
- case Instruction::PtrToInt: {
- Constant *Op = CE->getOperand(0);
- Type *Ty = CE->getType();
-
- // We can emit the pointer value into this slot if the slot is an
- // integer slot greater or equal to the size of the pointer.
- if (TD->getTypeAllocSize(Ty) == TD->getTypeAllocSize(Op->getType()))
- return ResolveConstantExpr(Op);
-
- llvm_unreachable("Integer size less then pointer size");
- }
- case Instruction::Add:
- case Instruction::Sub: {
- // Only handle cases where there's a constant expression with GlobalValue
- // as first operand and ConstantInt as second, which are the cases we can
- // solve direclty using a relocation entry. GlobalValue=Op0, CstInt=Op1
- // 1) Instruction::Add => (global) + CstInt
- // 2) Instruction::Sub => (global) + -CstInt
- const Constant *Op0 = CE->getOperand(0);
- const Constant *Op1 = CE->getOperand(1);
- assert(isa<ConstantInt>(Op1) && "Op1 must be a ConstantInt");
-
- CstExprResTy Res = ResolveConstantExpr(Op0);
- assert(isa<GlobalValue>(Res.first) && "Op0 must be a GlobalValue");
-
- const APInt &RHS = cast<ConstantInt>(Op1)->getValue();
- switch (CE->getOpcode()) {
- case Instruction::Add:
- return std::make_pair(Res.first, RHS.getSExtValue());
- case Instruction::Sub:
- return std::make_pair(Res.first, (-RHS).getSExtValue());
- }
- }
- }
-
- report_fatal_error(CE->getOpcodeName() +
- StringRef(": Unsupported ConstantExpr type"));
-
- return std::make_pair(CV, 0); // silence warning
-}
-
-void ELFWriter::EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size,
- ELFSection &GblS, int64_t Offset) {
- // Create the relocation entry for the global value
- MachineRelocation MR =
- MachineRelocation::getGV(GblS.getCurrentPCOffset(),
- TEW->getAbsoluteLabelMachineRelTy(),
- const_cast<GlobalValue*>(GV),
- Offset);
-
- // Fill the data entry with zeros
- GblS.emitZeros(Size);
-
- // Add the relocation entry for the current data section
- GblS.addRelocation(MR);
-}
-
-void ELFWriter::EmitGlobalConstantLargeInt(const ConstantInt *CI,
- ELFSection &S) {
- const TargetData *TD = TM.getTargetData();
- unsigned BitWidth = CI->getBitWidth();
- assert(isPowerOf2_32(BitWidth) &&
- "Non-power-of-2-sized integers not handled!");
-
- const uint64_t *RawData = CI->getValue().getRawData();
- uint64_t Val = 0;
- for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) {
- Val = (TD->isBigEndian()) ? RawData[e - i - 1] : RawData[i];
- S.emitWord64(Val);
- }
-}
-
-/// EmitSpecialLLVMGlobal - Check to see if the specified global is a
-/// special global used by LLVM. If so, emit it and return true, otherwise
-/// do nothing and return false.
-bool ELFWriter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
- if (GV->getName() == "llvm.used")
- llvm_unreachable("not implemented yet");
-
- // Ignore debug and non-emitted data. This handles llvm.compiler.used.
- if (GV->getSection() == "llvm.metadata" ||
- GV->hasAvailableExternallyLinkage())
- return true;
-
- if (!GV->hasAppendingLinkage()) return false;
-
- assert(GV->hasInitializer() && "Not a special LLVM global!");
-
- const TargetData *TD = TM.getTargetData();
- unsigned Align = TD->getPointerPrefAlignment();
- if (GV->getName() == "llvm.global_ctors") {
- ELFSection &Ctor = getCtorSection();
- Ctor.emitAlignment(Align);
- EmitXXStructorList(GV->getInitializer(), Ctor);
- return true;
- }
-
- if (GV->getName() == "llvm.global_dtors") {
- ELFSection &Dtor = getDtorSection();
- Dtor.emitAlignment(Align);
- EmitXXStructorList(GV->getInitializer(), Dtor);
- return true;
- }
-
- return false;
-}
-
-/// EmitXXStructorList - Emit the ctor or dtor list. This just emits out the
-/// function pointers, ignoring the init priority.
-void ELFWriter::EmitXXStructorList(const Constant *List, ELFSection &Xtor) {
- // Should be an array of '{ i32, void ()* }' structs. The first value is the
- // init priority, which we ignore.
- if (List->isNullValue()) return;
- const ConstantArray *InitList = cast<ConstantArray>(List);
- for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
- if (InitList->getOperand(i)->isNullValue())
- continue;
- ConstantStruct *CS = cast<ConstantStruct>(InitList->getOperand(i));
-
- if (CS->getOperand(1)->isNullValue())
- continue;
-
- // Emit the function pointer.
- EmitGlobalConstant(CS->getOperand(1), Xtor);
- }
-}
-
-bool ELFWriter::runOnMachineFunction(MachineFunction &MF) {
- // Nothing to do here, this is all done through the ElfCE object above.
- return false;
-}
-
-/// doFinalization - Now that the module has been completely processed, emit
-/// the ELF file to 'O'.
-bool ELFWriter::doFinalization(Module &M) {
- // Emit .data section placeholder
- getDataSection();
-
- // Emit .bss section placeholder
- getBSSSection();
-
- // Build and emit data, bss and "common" sections.
- for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I)
- EmitGlobal(I);
-
- // Emit all pending globals
- for (PendingGblsIter I = PendingGlobals.begin(), E = PendingGlobals.end();
- I != E; ++I)
- EmitGlobal(*I);
-
- // Emit all pending externals
- for (PendingExtsIter I = PendingExternals.begin(), E = PendingExternals.end();
- I != E; ++I)
- SymbolList.push_back(ELFSym::getExtSym(*I));
-
- // Emit a symbol for each section created until now, skip null section
- for (unsigned i = 1, e = SectionList.size(); i < e; ++i) {
- ELFSection &ES = *SectionList[i];
- ELFSym *SectionSym = ELFSym::getSectionSym();
- SectionSym->SectionIdx = ES.SectionIdx;
- SymbolList.push_back(SectionSym);
- ES.Sym = SymbolList.back();
- }
-
- // Emit string table
- EmitStringTable(M.getModuleIdentifier());
-
- // Emit the symbol table now, if non-empty.
- EmitSymbolTable();
-
- // Emit the relocation sections.
- EmitRelocations();
-
- // Emit the sections string table.
- EmitSectionTableStringTable();
-
- // Dump the sections and section table to the .o file.
- OutputSectionsAndSectionTable();
-
- return false;
-}
-
-// RelocateField - Patch relocatable field with 'Offset' in 'BO'
-// using a 'Value' of known 'Size'
-void ELFWriter::RelocateField(BinaryObject &BO, uint32_t Offset,
- int64_t Value, unsigned Size) {
- if (Size == 32)
- BO.fixWord32(Value, Offset);
- else if (Size == 64)
- BO.fixWord64(Value, Offset);
- else
- llvm_unreachable("don't know howto patch relocatable field");
-}
-
-/// EmitRelocations - Emit relocations
-void ELFWriter::EmitRelocations() {
-
- // True if the target uses the relocation entry to hold the addend,
- // otherwise the addend is written directly to the relocatable field.
- bool HasRelA = TEW->hasRelocationAddend();
-
- // Create Relocation sections for each section which needs it.
- for (unsigned i=0, e=SectionList.size(); i != e; ++i) {
- ELFSection &S = *SectionList[i];
-
- // This section does not have relocations
- if (!S.hasRelocations()) continue;
- ELFSection &RelSec = getRelocSection(S);
-
- // 'Link' - Section hdr idx of the associated symbol table
- // 'Info' - Section hdr idx of the section to which the relocation applies
- ELFSection &SymTab = getSymbolTableSection();
- RelSec.Link = SymTab.SectionIdx;
- RelSec.Info = S.SectionIdx;
- RelSec.EntSize = TEW->getRelocationEntrySize();
-
- // Get the relocations from Section
- std::vector<MachineRelocation> Relos = S.getRelocations();
- for (std::vector<MachineRelocation>::iterator MRI = Relos.begin(),
- MRE = Relos.end(); MRI != MRE; ++MRI) {
- MachineRelocation &MR = *MRI;
-
- // Relocatable field offset from the section start
- unsigned RelOffset = MR.getMachineCodeOffset();
-
- // Symbol index in the symbol table
- unsigned SymIdx = 0;
-
- // Target specific relocation field type and size
- unsigned RelType = TEW->getRelocationType(MR.getRelocationType());
- unsigned RelTySize = TEW->getRelocationTySize(RelType);
- int64_t Addend = 0;
-
- // There are several machine relocations types, and each one of
- // them needs a different approach to retrieve the symbol table index.
- if (MR.isGlobalValue()) {
- const GlobalValue *G = MR.getGlobalValue();
- int64_t GlobalOffset = MR.getConstantVal();
- SymIdx = GblSymLookup[G];
- if (G->hasPrivateLinkage()) {
- // If the target uses a section offset in the relocation:
- // SymIdx + Addend = section sym for global + section offset
- unsigned SectionIdx = PrivateSyms[SymIdx]->SectionIdx;
- Addend = PrivateSyms[SymIdx]->Value + GlobalOffset;
- SymIdx = SectionList[SectionIdx]->getSymbolTableIndex();
- } else {
- Addend = TEW->getDefaultAddendForRelTy(RelType, GlobalOffset);
- }
- } else if (MR.isExternalSymbol()) {
- const char *ExtSym = MR.getExternalSymbol();
- SymIdx = ExtSymLookup[ExtSym];
- Addend = TEW->getDefaultAddendForRelTy(RelType);
- } else {
- // Get the symbol index for the section symbol
- unsigned SectionIdx = MR.getConstantVal();
- SymIdx = SectionList[SectionIdx]->getSymbolTableIndex();
-
- // The symbol offset inside the section
- int64_t SymOffset = (int64_t)MR.getResultPointer();
-
- // For pc relative relocations where symbols are defined in the same
- // section they are referenced, ignore the relocation entry and patch
- // the relocatable field with the symbol offset directly.
- if (S.SectionIdx == SectionIdx && TEW->isPCRelativeRel(RelType)) {
- int64_t Value = TEW->computeRelocation(SymOffset, RelOffset, RelType);
- RelocateField(S, RelOffset, Value, RelTySize);
- continue;
- }
-
- Addend = TEW->getDefaultAddendForRelTy(RelType, SymOffset);
- }
-
- // The target without addend on the relocation symbol must be
- // patched in the relocation place itself to contain the addend
- // otherwise write zeros to make sure there is no garbage there
- RelocateField(S, RelOffset, HasRelA ? 0 : Addend, RelTySize);
-
- // Get the relocation entry and emit to the relocation section
- ELFRelocation Rel(RelOffset, SymIdx, RelType, HasRelA, Addend);
- EmitRelocation(RelSec, Rel, HasRelA);
- }
- }
-}
-
-/// EmitRelocation - Write relocation 'Rel' to the relocation section 'Rel'
-void ELFWriter::EmitRelocation(BinaryObject &RelSec, ELFRelocation &Rel,
- bool HasRelA) {
- RelSec.emitWord(Rel.getOffset());
- RelSec.emitWord(Rel.getInfo(is64Bit));
- if (HasRelA)
- RelSec.emitWord(Rel.getAddend());
-}
-
-/// EmitSymbol - Write symbol 'Sym' to the symbol table 'SymbolTable'
-void ELFWriter::EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym) {
- if (is64Bit) {
- SymbolTable.emitWord32(Sym.NameIdx);
- SymbolTable.emitByte(Sym.Info);
- SymbolTable.emitByte(Sym.Other);
- SymbolTable.emitWord16(Sym.SectionIdx);
- SymbolTable.emitWord64(Sym.Value);
- SymbolTable.emitWord64(Sym.Size);
- } else {
- SymbolTable.emitWord32(Sym.NameIdx);
- SymbolTable.emitWord32(Sym.Value);
- SymbolTable.emitWord32(Sym.Size);
- SymbolTable.emitByte(Sym.Info);
- SymbolTable.emitByte(Sym.Other);
- SymbolTable.emitWord16(Sym.SectionIdx);
- }
-}
-
-/// EmitSectionHeader - Write section 'Section' header in 'SHdrTab'
-/// Section Header Table
-void ELFWriter::EmitSectionHeader(BinaryObject &SHdrTab,
- const ELFSection &SHdr) {
- SHdrTab.emitWord32(SHdr.NameIdx);
- SHdrTab.emitWord32(SHdr.Type);
- if (is64Bit) {
- SHdrTab.emitWord64(SHdr.Flags);
- SHdrTab.emitWord(SHdr.Addr);
- SHdrTab.emitWord(SHdr.Offset);
- SHdrTab.emitWord64(SHdr.Size);
- SHdrTab.emitWord32(SHdr.Link);
- SHdrTab.emitWord32(SHdr.Info);
- SHdrTab.emitWord64(SHdr.Align);
- SHdrTab.emitWord64(SHdr.EntSize);
- } else {
- SHdrTab.emitWord32(SHdr.Flags);
- SHdrTab.emitWord(SHdr.Addr);
- SHdrTab.emitWord(SHdr.Offset);
- SHdrTab.emitWord32(SHdr.Size);
- SHdrTab.emitWord32(SHdr.Link);
- SHdrTab.emitWord32(SHdr.Info);
- SHdrTab.emitWord32(SHdr.Align);
- SHdrTab.emitWord32(SHdr.EntSize);
- }
-}
-
-/// EmitStringTable - If the current symbol table is non-empty, emit the string
-/// table for it
-void ELFWriter::EmitStringTable(const std::string &ModuleName) {
- if (!SymbolList.size()) return; // Empty symbol table.
- ELFSection &StrTab = getStringTableSection();
-
- // Set the zero'th symbol to a null byte, as required.
- StrTab.emitByte(0);
-
- // Walk on the symbol list and write symbol names into the string table.
- unsigned Index = 1;
- for (ELFSymIter I=SymbolList.begin(), E=SymbolList.end(); I != E; ++I) {
- ELFSym &Sym = *(*I);
-
- std::string Name;
- if (Sym.isGlobalValue()) {
- SmallString<40> NameStr;
- Mang->getNameWithPrefix(NameStr, Sym.getGlobalValue(), false);
- Name.append(NameStr.begin(), NameStr.end());
- } else if (Sym.isExternalSym())
- Name.append(Sym.getExternalSymbol());
- else if (Sym.isFileType())
- Name.append(ModuleName);
-
- if (Name.empty()) {
- Sym.NameIdx = 0;
- } else {
- Sym.NameIdx = Index;
- StrTab.emitString(Name);
-
- // Keep track of the number of bytes emitted to this section.
- Index += Name.size()+1;
- }
- }
- assert(Index == StrTab.size());
- StrTab.Size = Index;
-}
-
-// SortSymbols - On the symbol table local symbols must come before
-// all other symbols with non-local bindings. The return value is
-// the position of the first non local symbol.
-unsigned ELFWriter::SortSymbols() {
- unsigned FirstNonLocalSymbol;
- std::vector<ELFSym*> LocalSyms, OtherSyms;
-
- for (ELFSymIter I=SymbolList.begin(), E=SymbolList.end(); I != E; ++I) {
- if ((*I)->isLocalBind())
- LocalSyms.push_back(*I);
- else
- OtherSyms.push_back(*I);
- }
- SymbolList.clear();
- FirstNonLocalSymbol = LocalSyms.size();
-
- for (unsigned i = 0; i < FirstNonLocalSymbol; ++i)
- SymbolList.push_back(LocalSyms[i]);
-
- for (ELFSymIter I=OtherSyms.begin(), E=OtherSyms.end(); I != E; ++I)
- SymbolList.push_back(*I);
-
- LocalSyms.clear();
- OtherSyms.clear();
-
- return FirstNonLocalSymbol;
-}
-
-/// EmitSymbolTable - Emit the symbol table itself.
-void ELFWriter::EmitSymbolTable() {
- if (!SymbolList.size()) return; // Empty symbol table.
-
- // Now that we have emitted the string table and know the offset into the
- // string table of each symbol, emit the symbol table itself.
- ELFSection &SymTab = getSymbolTableSection();
- SymTab.Align = TEW->getPrefELFAlignment();
-
- // Section Index of .strtab.
- SymTab.Link = getStringTableSection().SectionIdx;
-
- // Size of each symtab entry.
- SymTab.EntSize = TEW->getSymTabEntrySize();
-
- // Reorder the symbol table with local symbols first!
- unsigned FirstNonLocalSymbol = SortSymbols();
-
- // Emit all the symbols to the symbol table.
- for (unsigned i = 0, e = SymbolList.size(); i < e; ++i) {
- ELFSym &Sym = *SymbolList[i];
-
- // Emit symbol to the symbol table
- EmitSymbol(SymTab, Sym);
-
- // Record the symbol table index for each symbol
- if (Sym.isGlobalValue())
- GblSymLookup[Sym.getGlobalValue()] = i;
- else if (Sym.isExternalSym())
- ExtSymLookup[Sym.getExternalSymbol()] = i;
-
- // Keep track on the symbol index into the symbol table
- Sym.SymTabIdx = i;
- }
-
- // One greater than the symbol table index of the last local symbol
- SymTab.Info = FirstNonLocalSymbol;
- SymTab.Size = SymTab.size();
-}
-
-/// EmitSectionTableStringTable - This method adds and emits a section for the
-/// ELF Section Table string table: the string table that holds all of the
-/// section names.
-void ELFWriter::EmitSectionTableStringTable() {
- // First step: add the section for the string table to the list of sections:
- ELFSection &SHStrTab = getSectionHeaderStringTableSection();
-
- // Now that we know which section number is the .shstrtab section, update the
- // e_shstrndx entry in the ELF header.
- ElfHdr.fixWord16(SHStrTab.SectionIdx, ELFHdr_e_shstrndx_Offset);
-
- // Set the NameIdx of each section in the string table and emit the bytes for
- // the string table.
- unsigned Index = 0;
-
- for (ELFSectionIter I=SectionList.begin(), E=SectionList.end(); I != E; ++I) {
- ELFSection &S = *(*I);
- // Set the index into the table. Note if we have lots of entries with
- // common suffixes, we could memoize them here if we cared.
- S.NameIdx = Index;
- SHStrTab.emitString(S.getName());
-
- // Keep track of the number of bytes emitted to this section.
- Index += S.getName().size()+1;
- }
-
- // Set the size of .shstrtab now that we know what it is.
- assert(Index == SHStrTab.size());
- SHStrTab.Size = Index;
-}
-
-/// OutputSectionsAndSectionTable - Now that we have constructed the file header
-/// and all of the sections, emit these to the ostream destination and emit the
-/// SectionTable.
-void ELFWriter::OutputSectionsAndSectionTable() {
- // Pass #1: Compute the file offset for each section.
- size_t FileOff = ElfHdr.size(); // File header first.
-
- // Adjust alignment of all section if needed, skip the null section.
- for (unsigned i=1, e=SectionList.size(); i < e; ++i) {
- ELFSection &ES = *SectionList[i];
- if (!ES.size()) {
- ES.Offset = FileOff;
- continue;
- }
-
- // Update Section size
- if (!ES.Size)
- ES.Size = ES.size();
-
- // Align FileOff to whatever the alignment restrictions of the section are.
- if (ES.Align)
- FileOff = (FileOff+ES.Align-1) & ~(ES.Align-1);
-
- ES.Offset = FileOff;
- FileOff += ES.Size;
- }
-
- // Align Section Header.
- unsigned TableAlign = TEW->getPrefELFAlignment();
- FileOff = (FileOff+TableAlign-1) & ~(TableAlign-1);
-
- // Now that we know where all of the sections will be emitted, set the e_shnum
- // entry in the ELF header.
- ElfHdr.fixWord16(NumSections, ELFHdr_e_shnum_Offset);
-
- // Now that we know the offset in the file of the section table, update the
- // e_shoff address in the ELF header.
- ElfHdr.fixWord(FileOff, ELFHdr_e_shoff_Offset);
-
- // Now that we know all of the data in the file header, emit it and all of the
- // sections!
- O.write((char *)&ElfHdr.getData()[0], ElfHdr.size());
- FileOff = ElfHdr.size();
-
- // Section Header Table blob
- BinaryObject SHdrTable(isLittleEndian, is64Bit);
-
- // Emit all of sections to the file and build the section header table.
- for (ELFSectionIter I=SectionList.begin(), E=SectionList.end(); I != E; ++I) {
- ELFSection &S = *(*I);
- DEBUG(dbgs() << "SectionIdx: " << S.SectionIdx << ", Name: " << S.getName()
- << ", Size: " << S.Size << ", Offset: " << S.Offset
- << ", SectionData Size: " << S.size() << "\n");
-
- // Align FileOff to whatever the alignment restrictions of the section are.
- if (S.size()) {
- if (S.Align) {
- for (size_t NewFileOff = (FileOff+S.Align-1) & ~(S.Align-1);
- FileOff != NewFileOff; ++FileOff)
- O << (char)0xAB;
- }
- O.write((char *)&S.getData()[0], S.Size);
- FileOff += S.Size;
- }
-
- EmitSectionHeader(SHdrTable, S);
- }
-
- // Align output for the section table.
- for (size_t NewFileOff = (FileOff+TableAlign-1) & ~(TableAlign-1);
- FileOff != NewFileOff; ++FileOff)
- O << (char)0xAB;
-
- // Emit the section table itself.
- O.write((char *)&SHdrTable.getData()[0], SHdrTable.size());
-}
diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h
deleted file mode 100644
index 6f7fbace8aba..000000000000
--- a/lib/CodeGen/ELFWriter.h
+++ /dev/null
@@ -1,251 +0,0 @@
-//===-- ELFWriter.h - Target-independent ELF writer support -----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the ELFWriter class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ELFWRITER_H
-#define ELFWRITER_H
-
-#include "llvm/ADT/SetVector.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include <map>
-
-namespace llvm {
- class BinaryObject;
- class Constant;
- class ConstantInt;
- class ConstantStruct;
- class ELFCodeEmitter;
- class ELFRelocation;
- class ELFSection;
- struct ELFSym;
- class GlobalVariable;
- class JITDebugRegisterer;
- class Mangler;
- class MachineCodeEmitter;
- class MachineConstantPoolEntry;
- class ObjectCodeEmitter;
- class MCAsmInfo;
- class TargetELFWriterInfo;
- class TargetLoweringObjectFile;
- class raw_ostream;
- class SectionKind;
- class MCContext;
- class TargetMachine;
-
- typedef std::vector<ELFSym*>::iterator ELFSymIter;
- typedef std::vector<ELFSection*>::iterator ELFSectionIter;
- typedef SetVector<const GlobalValue*>::const_iterator PendingGblsIter;
- typedef SetVector<const char *>::const_iterator PendingExtsIter;
- typedef std::pair<const Constant *, int64_t> CstExprResTy;
-
- /// ELFWriter - This class implements the common target-independent code for
- /// writing ELF files. Targets should derive a class from this to
- /// parameterize the output format.
- ///
- class ELFWriter : public MachineFunctionPass {
- friend class ELFCodeEmitter;
- friend class JITDebugRegisterer;
- public:
- static char ID;
-
- /// Return the ELFCodeEmitter as an instance of ObjectCodeEmitter
- ObjectCodeEmitter *getObjectCodeEmitter() {
- return reinterpret_cast<ObjectCodeEmitter*>(ElfCE);
- }
-
- ELFWriter(raw_ostream &O, TargetMachine &TM);
- ~ELFWriter();
-
- protected:
- /// Output stream to send the resultant object file to.
- raw_ostream &O;
-
- /// Target machine description.
- TargetMachine &TM;
-
- /// Context object for machine code objects.
- MCContext &OutContext;
-
- /// Target Elf Writer description.
- const TargetELFWriterInfo *TEW;
-
- /// Mang - The object used to perform name mangling for this module.
- Mangler *Mang;
-
- /// MCE - The MachineCodeEmitter object that we are exposing to emit machine
- /// code for functions to the .o file.
- ELFCodeEmitter *ElfCE;
-
- /// TLOF - Target Lowering Object File, provide section names for globals
- /// and other object file specific stuff
- const TargetLoweringObjectFile &TLOF;
-
- /// MAI - Target Asm Info, provide information about section names for
- /// globals and other target specific stuff.
- const MCAsmInfo *MAI;
-
- //===------------------------------------------------------------------===//
- // Properties inferred automatically from the target machine.
- //===------------------------------------------------------------------===//
-
- /// is64Bit/isLittleEndian - This information is inferred from the target
- /// machine directly, indicating whether to emit a 32- or 64-bit ELF file.
- bool is64Bit, isLittleEndian;
-
- /// doInitialization - Emit the file header and all of the global variables
- /// for the module to the ELF file.
- bool doInitialization(Module &M);
- bool runOnMachineFunction(MachineFunction &MF);
-
- /// doFinalization - Now that the module has been completely processed, emit
- /// the ELF file to 'O'.
- bool doFinalization(Module &M);
-
- private:
- /// Blob containing the Elf header
- BinaryObject ElfHdr;
-
- /// SectionList - This is the list of sections that we have emitted to the
- /// file. Once the file has been completely built, the section header table
- /// is constructed from this info.
- std::vector<ELFSection*> SectionList;
- unsigned NumSections; // Always = SectionList.size()
-
- /// SectionLookup - This is a mapping from section name to section number in
- /// the SectionList. Used to quickly gather the Section Index from MAI names
- std::map<std::string, ELFSection*> SectionLookup;
-
- /// PendingGlobals - Globals not processed as symbols yet.
- SetVector<const GlobalValue*> PendingGlobals;
-
- /// GblSymLookup - This is a mapping from global value to a symbol index
- /// in the symbol table or private symbols list. This is useful since reloc
- /// symbol references must be quickly mapped to their indices on the lists.
- std::map<const GlobalValue*, uint32_t> GblSymLookup;
-
- /// PendingExternals - Externals not processed as symbols yet.
- SetVector<const char *> PendingExternals;
-
- /// ExtSymLookup - This is a mapping from externals to a symbol index
- /// in the symbol table list. This is useful since reloc symbol references
- /// must be quickly mapped to their symbol table indices.
- std::map<const char *, uint32_t> ExtSymLookup;
-
- /// SymbolList - This is the list of symbols emitted to the symbol table.
- /// When the SymbolList is finally built, local symbols must be placed in
- /// the beginning while non-locals at the end.
- std::vector<ELFSym*> SymbolList;
-
- /// PrivateSyms - Record private symbols, every symbol here must never be
- /// present in the SymbolList.
- std::vector<ELFSym*> PrivateSyms;
-
- /// getSection - Return the section with the specified name, creating a new
- /// section if one does not already exist.
- ELFSection &getSection(const std::string &Name, unsigned Type,
- unsigned Flags = 0, unsigned Align = 0) {
- ELFSection *&SN = SectionLookup[Name];
- if (SN) return *SN;
-
- SectionList.push_back(new ELFSection(Name, isLittleEndian, is64Bit));
- SN = SectionList.back();
- SN->SectionIdx = NumSections++;
- SN->Type = Type;
- SN->Flags = Flags;
- SN->Link = ELF::SHN_UNDEF;
- SN->Align = Align;
- return *SN;
- }
-
- ELFSection &getNonExecStackSection() {
- return getSection(".note.GNU-stack", ELF::SHT_PROGBITS, 0, 1);
- }
-
- ELFSection &getSymbolTableSection() {
- return getSection(".symtab", ELF::SHT_SYMTAB, 0);
- }
-
- ELFSection &getStringTableSection() {
- return getSection(".strtab", ELF::SHT_STRTAB, 0, 1);
- }
-
- ELFSection &getSectionHeaderStringTableSection() {
- return getSection(".shstrtab", ELF::SHT_STRTAB, 0, 1);
- }
-
- ELFSection &getNullSection() {
- return getSection("", ELF::SHT_NULL, 0);
- }
-
- ELFSection &getDataSection();
- ELFSection &getBSSSection();
- ELFSection &getCtorSection();
- ELFSection &getDtorSection();
- ELFSection &getJumpTableSection();
- ELFSection &getConstantPoolSection(MachineConstantPoolEntry &CPE);
- ELFSection &getTextSection(const Function *F);
- ELFSection &getRelocSection(ELFSection &S);
-
- // Helpers for obtaining ELF specific info.
- unsigned getGlobalELFBinding(const GlobalValue *GV);
- unsigned getGlobalELFType(const GlobalValue *GV);
- unsigned getGlobalELFVisibility(const GlobalValue *GV);
-
- // AddPendingGlobalSymbol - Add a global to be processed and to
- // the global symbol lookup, use a zero index because the table
- // index will be determined later.
- void AddPendingGlobalSymbol(const GlobalValue *GV,
- bool AddToLookup = false);
-
- // AddPendingExternalSymbol - Add the external to be processed
- // and to the external symbol lookup, use a zero index because
- // the symbol table index will be determined later.
- void AddPendingExternalSymbol(const char *External);
-
- // AddToSymbolList - Update the symbol lookup and If the symbol is
- // private add it to PrivateSyms list, otherwise to SymbolList.
- void AddToSymbolList(ELFSym *GblSym);
-
- // As we complete the ELF file, we need to update fields in the ELF header
- // (e.g. the location of the section table). These members keep track of
- // the offset in ELFHeader of these various pieces to update and other
- // locations in the file.
- unsigned ELFHdr_e_shoff_Offset; // e_shoff in ELF header.
- unsigned ELFHdr_e_shstrndx_Offset; // e_shstrndx in ELF header.
- unsigned ELFHdr_e_shnum_Offset; // e_shnum in ELF header.
-
- private:
- void EmitGlobal(const GlobalValue *GV);
- void EmitGlobalConstant(const Constant *C, ELFSection &GblS);
- void EmitGlobalConstantStruct(const ConstantStruct *CVS,
- ELFSection &GblS);
- void EmitGlobalConstantLargeInt(const ConstantInt *CI, ELFSection &S);
- void EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size,
- ELFSection &GblS, int64_t Offset = 0);
- bool EmitSpecialLLVMGlobal(const GlobalVariable *GV);
- void EmitXXStructorList(const Constant *List, ELFSection &Xtor);
- void EmitRelocations();
- void EmitRelocation(BinaryObject &RelSec, ELFRelocation &Rel, bool HasRelA);
- void EmitSectionHeader(BinaryObject &SHdrTab, const ELFSection &SHdr);
- void EmitSectionTableStringTable();
- void EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym);
- void EmitSymbolTable();
- void EmitStringTable(const std::string &ModuleName);
- void OutputSectionsAndSectionTable();
- void RelocateField(BinaryObject &BO, uint32_t Offset, int64_t Value,
- unsigned Size);
- unsigned SortSymbols();
- CstExprResTy ResolveConstantExpr(const Constant *CV);
- };
-}
-
-#endif
diff --git a/lib/CodeGen/EdgeBundles.cpp b/lib/CodeGen/EdgeBundles.cpp
index a7aba89b87f3..3bb04657b58a 100644
--- a/lib/CodeGen/EdgeBundles.cpp
+++ b/lib/CodeGen/EdgeBundles.cpp
@@ -77,7 +77,7 @@ void EdgeBundles::view() const {
/// Specialize WriteGraph, the standard implementation won't work.
raw_ostream &llvm::WriteGraph(raw_ostream &O, const EdgeBundles &G,
bool ShortNames,
- const std::string &Title) {
+ const Twine &Title) {
const MachineFunction *MF = G.getMachineFunction();
O << "digraph {\n";
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
index 01dccdb71e4b..a48c5400abcb 100644
--- a/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -26,7 +26,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -45,7 +45,7 @@ using namespace llvm;
/// DomainValue for each register, but it may contain multiple execution
/// domains. A register value is initially created in a single execution
/// domain, but if we were forced to pay the penalty of a domain crossing, we
-/// keep track of the fact the the register is now available in multiple
+/// keep track of the fact that the register is now available in multiple
/// domains.
namespace {
struct DomainValue {
@@ -57,8 +57,10 @@ struct DomainValue {
// domains where the register is available for free.
unsigned AvailableDomains;
- // Position of the last defining instruction.
- unsigned Dist;
+ // Pointer to the next DomainValue in a chain. When two DomainValues are
+ // merged, Victim.Next is set to point to Victor, so old DomainValue
+ // references can be updated by folowing the chain.
+ DomainValue *Next;
// Twiddleable instructions using or defining these registers.
SmallVector<MachineInstr*, 8> Instrs;
@@ -92,16 +94,33 @@ struct DomainValue {
return CountTrailingZeros_32(AvailableDomains);
}
- DomainValue() { clear(); }
+ DomainValue() : Refs(0) { clear(); }
+ // Clear this DomainValue and point to next which has all its data.
void clear() {
- Refs = AvailableDomains = Dist = 0;
+ AvailableDomains = 0;
+ Next = 0;
Instrs.clear();
}
};
}
namespace {
+/// LiveReg - Information about a live register.
+struct LiveReg {
+ /// Value currently in this register, or NULL when no value is being tracked.
+ /// This counts as a DomainValue reference.
+ DomainValue *Value;
+
+ /// Instruction that defined this register, relative to the beginning of the
+ /// current basic block. When a LiveReg is used to represent a live-out
+ /// register, this value is relative to the end of the basic block, so it
+ /// will be a negative number.
+ int Def;
+};
+} // anonynous namespace
+
+namespace {
class ExeDepsFix : public MachineFunctionPass {
static char ID;
SpecificBumpPtrAllocator<DomainValue> Allocator;
@@ -111,13 +130,19 @@ class ExeDepsFix : public MachineFunctionPass {
MachineFunction *MF;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
- MachineBasicBlock *MBB;
std::vector<int> AliasMap;
const unsigned NumRegs;
- DomainValue **LiveRegs;
- typedef DenseMap<MachineBasicBlock*,DomainValue**> LiveOutMap;
+ LiveReg *LiveRegs;
+ typedef DenseMap<MachineBasicBlock*, LiveReg*> LiveOutMap;
LiveOutMap LiveOuts;
- unsigned Distance;
+
+ /// Current instruction number.
+ /// The first instruction in each basic block is 0.
+ int CurInstr;
+
+ /// True when the current block has a predecessor that hasn't been visited
+ /// yet.
+ bool SeenUnknownBackEdge;
public:
ExeDepsFix(const TargetRegisterClass *rc)
@@ -131,26 +156,33 @@ public:
virtual bool runOnMachineFunction(MachineFunction &MF);
virtual const char *getPassName() const {
- return "SSE execution domain fixup";
+ return "Execution dependency fix";
}
private:
// Register mapping.
- int RegIndex(unsigned Reg);
+ int regIndex(unsigned Reg);
// DomainValue allocation.
- DomainValue *Alloc(int domain = -1);
- void Recycle(DomainValue*);
+ DomainValue *alloc(int domain = -1);
+ DomainValue *retain(DomainValue *DV) {
+ if (DV) ++DV->Refs;
+ return DV;
+ }
+ void release(DomainValue*);
+ DomainValue *resolve(DomainValue*&);
// LiveRegs manipulations.
- void SetLiveReg(int rx, DomainValue *DV);
- void Kill(int rx);
- void Force(int rx, unsigned domain);
- void Collapse(DomainValue *dv, unsigned domain);
- bool Merge(DomainValue *A, DomainValue *B);
-
- void enterBasicBlock();
- void visitGenericInstr(MachineInstr*);
+ void setLiveReg(int rx, DomainValue *DV);
+ void kill(int rx);
+ void force(int rx, unsigned domain);
+ void collapse(DomainValue *dv, unsigned domain);
+ bool merge(DomainValue *A, DomainValue *B);
+
+ void enterBasicBlock(MachineBasicBlock*);
+ void leaveBasicBlock(MachineBasicBlock*);
+ void visitInstr(MachineInstr*);
+ void processDefs(MachineInstr*, bool Kill);
void visitSoftInstr(MachineInstr*, unsigned mask);
void visitHardInstr(MachineInstr*, unsigned domain);
};
@@ -160,83 +192,108 @@ char ExeDepsFix::ID = 0;
/// Translate TRI register number to an index into our smaller tables of
/// interesting registers. Return -1 for boring registers.
-int ExeDepsFix::RegIndex(unsigned Reg) {
+int ExeDepsFix::regIndex(unsigned Reg) {
assert(Reg < AliasMap.size() && "Invalid register");
return AliasMap[Reg];
}
-DomainValue *ExeDepsFix::Alloc(int domain) {
+DomainValue *ExeDepsFix::alloc(int domain) {
DomainValue *dv = Avail.empty() ?
new(Allocator.Allocate()) DomainValue :
Avail.pop_back_val();
- dv->Dist = Distance;
if (domain >= 0)
dv->addDomain(domain);
+ assert(dv->Refs == 0 && "Reference count wasn't cleared");
+ assert(!dv->Next && "Chained DomainValue shouldn't have been recycled");
return dv;
}
-void ExeDepsFix::Recycle(DomainValue *dv) {
- assert(dv && "Cannot recycle NULL");
- dv->clear();
- Avail.push_back(dv);
+/// release - Release a reference to DV. When the last reference is released,
+/// collapse if needed.
+void ExeDepsFix::release(DomainValue *DV) {
+ while (DV) {
+ assert(DV->Refs && "Bad DomainValue");
+ if (--DV->Refs)
+ return;
+
+ // There are no more DV references. Collapse any contained instructions.
+ if (DV->AvailableDomains && !DV->isCollapsed())
+ collapse(DV, DV->getFirstDomain());
+
+ DomainValue *Next = DV->Next;
+ DV->clear();
+ Avail.push_back(DV);
+ // Also release the next DomainValue in the chain.
+ DV = Next;
+ }
+}
+
+/// resolve - Follow the chain of dead DomainValues until a live DomainValue is
+/// reached. Update the referenced pointer when necessary.
+DomainValue *ExeDepsFix::resolve(DomainValue *&DVRef) {
+ DomainValue *DV = DVRef;
+ if (!DV || !DV->Next)
+ return DV;
+
+ // DV has a chain. Find the end.
+ do DV = DV->Next;
+ while (DV->Next);
+
+ // Update DVRef to point to DV.
+ retain(DV);
+ release(DVRef);
+ DVRef = DV;
+ return DV;
}
/// Set LiveRegs[rx] = dv, updating reference counts.
-void ExeDepsFix::SetLiveReg(int rx, DomainValue *dv) {
+void ExeDepsFix::setLiveReg(int rx, DomainValue *dv) {
assert(unsigned(rx) < NumRegs && "Invalid index");
- if (!LiveRegs) {
- LiveRegs = new DomainValue*[NumRegs];
- std::fill(LiveRegs, LiveRegs+NumRegs, (DomainValue*)0);
- }
+ assert(LiveRegs && "Must enter basic block first.");
- if (LiveRegs[rx] == dv)
+ if (LiveRegs[rx].Value == dv)
return;
- if (LiveRegs[rx]) {
- assert(LiveRegs[rx]->Refs && "Bad refcount");
- if (--LiveRegs[rx]->Refs == 0) Recycle(LiveRegs[rx]);
- }
- LiveRegs[rx] = dv;
- if (dv) ++dv->Refs;
+ if (LiveRegs[rx].Value)
+ release(LiveRegs[rx].Value);
+ LiveRegs[rx].Value = retain(dv);
}
// Kill register rx, recycle or collapse any DomainValue.
-void ExeDepsFix::Kill(int rx) {
+void ExeDepsFix::kill(int rx) {
assert(unsigned(rx) < NumRegs && "Invalid index");
- if (!LiveRegs || !LiveRegs[rx]) return;
-
- // Before killing the last reference to an open DomainValue, collapse it to
- // the first available domain.
- if (LiveRegs[rx]->Refs == 1 && !LiveRegs[rx]->isCollapsed())
- Collapse(LiveRegs[rx], LiveRegs[rx]->getFirstDomain());
- else
- SetLiveReg(rx, 0);
+ assert(LiveRegs && "Must enter basic block first.");
+ if (!LiveRegs[rx].Value)
+ return;
+
+ release(LiveRegs[rx].Value);
+ LiveRegs[rx].Value = 0;
}
/// Force register rx into domain.
-void ExeDepsFix::Force(int rx, unsigned domain) {
+void ExeDepsFix::force(int rx, unsigned domain) {
assert(unsigned(rx) < NumRegs && "Invalid index");
- DomainValue *dv;
- if (LiveRegs && (dv = LiveRegs[rx])) {
+ assert(LiveRegs && "Must enter basic block first.");
+ if (DomainValue *dv = LiveRegs[rx].Value) {
if (dv->isCollapsed())
dv->addDomain(domain);
else if (dv->hasDomain(domain))
- Collapse(dv, domain);
+ collapse(dv, domain);
else {
// This is an incompatible open DomainValue. Collapse it to whatever and
// force the new value into domain. This costs a domain crossing.
- Collapse(dv, dv->getFirstDomain());
- assert(LiveRegs[rx] && "Not live after collapse?");
- LiveRegs[rx]->addDomain(domain);
+ collapse(dv, dv->getFirstDomain());
+ assert(LiveRegs[rx].Value && "Not live after collapse?");
+ LiveRegs[rx].Value->addDomain(domain);
}
} else {
// Set up basic collapsed DomainValue.
- SetLiveReg(rx, Alloc(domain));
+ setLiveReg(rx, alloc(domain));
}
}
/// Collapse open DomainValue into given domain. If there are multiple
/// registers using dv, they each get a unique collapsed DomainValue.
-void ExeDepsFix::Collapse(DomainValue *dv, unsigned domain) {
+void ExeDepsFix::collapse(DomainValue *dv, unsigned domain) {
assert(dv->hasDomain(domain) && "Cannot collapse");
// Collapse all the instructions.
@@ -247,13 +304,13 @@ void ExeDepsFix::Collapse(DomainValue *dv, unsigned domain) {
// If there are multiple users, give them new, unique DomainValues.
if (LiveRegs && dv->Refs > 1)
for (unsigned rx = 0; rx != NumRegs; ++rx)
- if (LiveRegs[rx] == dv)
- SetLiveReg(rx, Alloc(domain));
+ if (LiveRegs[rx].Value == dv)
+ setLiveReg(rx, alloc(domain));
}
/// Merge - All instructions and registers in B are moved to A, and B is
/// released.
-bool ExeDepsFix::Merge(DomainValue *A, DomainValue *B) {
+bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) {
assert(!A->isCollapsed() && "Cannot merge into collapsed");
assert(!B->isCollapsed() && "Cannot merge from collapsed");
if (A == B)
@@ -263,47 +320,188 @@ bool ExeDepsFix::Merge(DomainValue *A, DomainValue *B) {
if (!common)
return false;
A->AvailableDomains = common;
- A->Dist = std::max(A->Dist, B->Dist);
A->Instrs.append(B->Instrs.begin(), B->Instrs.end());
+
+ // Clear the old DomainValue so we won't try to swizzle instructions twice.
+ B->clear();
+ // All uses of B are referred to A.
+ B->Next = retain(A);
+
for (unsigned rx = 0; rx != NumRegs; ++rx)
- if (LiveRegs[rx] == B)
- SetLiveReg(rx, A);
+ if (LiveRegs[rx].Value == B)
+ setLiveReg(rx, A);
return true;
}
-void ExeDepsFix::enterBasicBlock() {
- // Try to coalesce live-out registers from predecessors.
- for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(),
+// enterBasicBlock - Set up LiveRegs by merging predecessor live-out values.
+void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
+ // Detect back-edges from predecessors we haven't processed yet.
+ SeenUnknownBackEdge = false;
+
+ // Reset instruction counter in each basic block.
+ CurInstr = 0;
+
+ // Set up LiveRegs to represent registers entering MBB.
+ if (!LiveRegs)
+ LiveRegs = new LiveReg[NumRegs];
+
+ // Default values are 'nothing happened a long time ago'.
+ for (unsigned rx = 0; rx != NumRegs; ++rx) {
+ LiveRegs[rx].Value = 0;
+ LiveRegs[rx].Def = -(1 << 20);
+ }
+
+ // This is the entry block.
+ if (MBB->pred_empty()) {
+ for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(),
e = MBB->livein_end(); i != e; ++i) {
- int rx = RegIndex(*i);
- if (rx < 0) continue;
- for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(),
- pe = MBB->pred_end(); pi != pe; ++pi) {
- LiveOutMap::const_iterator fi = LiveOuts.find(*pi);
- if (fi == LiveOuts.end()) continue;
- DomainValue *pdv = fi->second[rx];
- if (!pdv) continue;
- if (!LiveRegs || !LiveRegs[rx]) {
- SetLiveReg(rx, pdv);
+ int rx = regIndex(*i);
+ if (rx < 0)
+ continue;
+ // Treat function live-ins as if they were defined just before the first
+ // instruction. Usually, function arguments are set up immediately
+ // before the call.
+ LiveRegs[rx].Def = -1;
+ }
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": entry\n");
+ return;
+ }
+
+ // Try to coalesce live-out registers from predecessors.
+ for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(),
+ pe = MBB->pred_end(); pi != pe; ++pi) {
+ LiveOutMap::const_iterator fi = LiveOuts.find(*pi);
+ if (fi == LiveOuts.end()) {
+ SeenUnknownBackEdge = true;
+ continue;
+ }
+ assert(fi->second && "Can't have NULL entries");
+
+ for (unsigned rx = 0; rx != NumRegs; ++rx) {
+ // Use the most recent predecessor def for each register.
+ LiveRegs[rx].Def = std::max(LiveRegs[rx].Def, fi->second[rx].Def);
+
+ DomainValue *pdv = resolve(fi->second[rx].Value);
+ if (!pdv)
+ continue;
+ if (!LiveRegs[rx].Value) {
+ setLiveReg(rx, pdv);
continue;
}
// We have a live DomainValue from more than one predecessor.
- if (LiveRegs[rx]->isCollapsed()) {
+ if (LiveRegs[rx].Value->isCollapsed()) {
// We are already collapsed, but predecessor is not. Force him.
- unsigned domain = LiveRegs[rx]->getFirstDomain();
- if (!pdv->isCollapsed() && pdv->hasDomain(domain))
- Collapse(pdv, domain);
+ unsigned Domain = LiveRegs[rx].Value->getFirstDomain();
+ if (!pdv->isCollapsed() && pdv->hasDomain(Domain))
+ collapse(pdv, Domain);
continue;
}
// Currently open, merge in predecessor.
if (!pdv->isCollapsed())
- Merge(LiveRegs[rx], pdv);
+ merge(LiveRegs[rx].Value, pdv);
else
- Force(rx, pdv->getFirstDomain());
+ force(rx, pdv->getFirstDomain());
+ }
+ }
+ DEBUG(dbgs() << "BB#" << MBB->getNumber()
+ << (SeenUnknownBackEdge ? ": incomplete\n" : ": all preds known\n"));
+}
+
+void ExeDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) {
+ assert(LiveRegs && "Must enter basic block first.");
+ // Save live registers at end of MBB - used by enterBasicBlock().
+ // Also use LiveOuts as a visited set to detect back-edges.
+ bool First = LiveOuts.insert(std::make_pair(MBB, LiveRegs)).second;
+
+ if (First) {
+ // LiveRegs was inserted in LiveOuts. Adjust all defs to be relative to
+ // the end of this block instead of the beginning.
+ for (unsigned i = 0, e = NumRegs; i != e; ++i)
+ LiveRegs[i].Def -= CurInstr;
+ } else {
+ // Insertion failed, this must be the second pass.
+ // Release all the DomainValues instead of keeping them.
+ for (unsigned i = 0, e = NumRegs; i != e; ++i)
+ release(LiveRegs[i].Value);
+ delete[] LiveRegs;
+ }
+ LiveRegs = 0;
+}
+
+void ExeDepsFix::visitInstr(MachineInstr *MI) {
+ if (MI->isDebugValue())
+ return;
+
+ // Update instructions with explicit execution domains.
+ std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(MI);
+ if (DomP.first) {
+ if (DomP.second)
+ visitSoftInstr(MI, DomP.second);
+ else
+ visitHardInstr(MI, DomP.first);
+ }
+
+ // Process defs to track register ages, and kill values clobbered by generic
+ // instructions.
+ processDefs(MI, !DomP.first);
+}
+
+// Update def-ages for registers defined by MI.
+// If Kill is set, also kill off DomainValues clobbered by the defs.
+void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) {
+ assert(!MI->isDebugValue() && "Won't process debug values");
+ const MCInstrDesc &MCID = MI->getDesc();
+ for (unsigned i = 0,
+ e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs();
+ i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ if (MO.isImplicit())
+ break;
+ if (MO.isUse())
+ continue;
+ int rx = regIndex(MO.getReg());
+ if (rx < 0)
+ continue;
+
+ // This instruction explicitly defines rx.
+ DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr
+ << '\t' << *MI);
+
+ // How many instructions since rx was last written?
+ unsigned Clearance = CurInstr - LiveRegs[rx].Def;
+ LiveRegs[rx].Def = CurInstr;
+
+ // Kill off domains redefined by generic instructions.
+ if (Kill)
+ kill(rx);
+
+ // Verify clearance before partial register updates.
+ unsigned Pref = TII->getPartialRegUpdateClearance(MI, i, TRI);
+ if (!Pref)
+ continue;
+ DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref);
+ if (Pref > Clearance) {
+ DEBUG(dbgs() << ": Break dependency.\n");
+ TII->breakPartialRegDependency(MI, i, TRI);
+ continue;
+ }
+
+ // The current clearance seems OK, but we may be ignoring a def from a
+ // back-edge.
+ if (!SeenUnknownBackEdge || Pref <= unsigned(CurInstr)) {
+ DEBUG(dbgs() << ": OK.\n");
+ continue;
}
+
+ // A def from an unprocessed back-edge may make us break this dependency.
+ DEBUG(dbgs() << ": Wait for back-edge to resolve.\n");
}
+
+ ++CurInstr;
}
// A hard instruction only works in one domain. All input registers will be
@@ -314,19 +512,19 @@ void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
e = mi->getDesc().getNumOperands(); i != e; ++i) {
MachineOperand &mo = mi->getOperand(i);
if (!mo.isReg()) continue;
- int rx = RegIndex(mo.getReg());
+ int rx = regIndex(mo.getReg());
if (rx < 0) continue;
- Force(rx, domain);
+ force(rx, domain);
}
// Kill all defs and force them.
for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
MachineOperand &mo = mi->getOperand(i);
if (!mo.isReg()) continue;
- int rx = RegIndex(mo.getReg());
+ int rx = regIndex(mo.getReg());
if (rx < 0) continue;
- Kill(rx);
- Force(rx, domain);
+ kill(rx);
+ force(rx, domain);
}
}
@@ -343,9 +541,9 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
e = mi->getDesc().getNumOperands(); i != e; ++i) {
MachineOperand &mo = mi->getOperand(i);
if (!mo.isReg()) continue;
- int rx = RegIndex(mo.getReg());
+ int rx = regIndex(mo.getReg());
if (rx < 0) continue;
- if (DomainValue *dv = LiveRegs[rx]) {
+ if (DomainValue *dv = LiveRegs[rx].Value) {
// Bitmask of domains that dv and available have in common.
unsigned common = dv->getCommonDomains(available);
// Is it possible to use this collapsed register for free?
@@ -360,7 +558,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
else
// Open DomainValue is not compatible with instruction. It is useless
// now.
- Kill(rx);
+ kill(rx);
}
}
@@ -374,94 +572,89 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
// Kill off any remaining uses that don't match available, and build a list of
// incoming DomainValues that we want to merge.
- SmallVector<DomainValue*,4> doms;
+ SmallVector<LiveReg, 4> Regs;
for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) {
int rx = *i;
- DomainValue *dv = LiveRegs[rx];
+ const LiveReg &LR = LiveRegs[rx];
// This useless DomainValue could have been missed above.
- if (!dv->getCommonDomains(available)) {
- Kill(*i);
+ if (!LR.Value->getCommonDomains(available)) {
+ kill(rx);
continue;
}
- // sorted, uniqued insert.
- bool inserted = false;
- for (SmallVector<DomainValue*,4>::iterator i = doms.begin(), e = doms.end();
- i != e && !inserted; ++i) {
- if (dv == *i)
- inserted = true;
- else if (dv->Dist < (*i)->Dist) {
- inserted = true;
- doms.insert(i, dv);
+ // Sorted insertion.
+ bool Inserted = false;
+ for (SmallVector<LiveReg, 4>::iterator i = Regs.begin(), e = Regs.end();
+ i != e && !Inserted; ++i) {
+ if (LR.Def < i->Def) {
+ Inserted = true;
+ Regs.insert(i, LR);
}
}
- if (!inserted)
- doms.push_back(dv);
+ if (!Inserted)
+ Regs.push_back(LR);
}
// doms are now sorted in order of appearance. Try to merge them all, giving
// priority to the latest ones.
DomainValue *dv = 0;
- while (!doms.empty()) {
+ while (!Regs.empty()) {
if (!dv) {
- dv = doms.pop_back_val();
+ dv = Regs.pop_back_val().Value;
+ // Force the first dv to match the current instruction.
+ dv->AvailableDomains = dv->getCommonDomains(available);
+ assert(dv->AvailableDomains && "Domain should have been filtered");
continue;
}
- DomainValue *latest = doms.pop_back_val();
- if (Merge(dv, latest)) continue;
+ DomainValue *Latest = Regs.pop_back_val().Value;
+ // Skip already merged values.
+ if (Latest == dv || Latest->Next)
+ continue;
+ if (merge(dv, Latest))
+ continue;
// If latest didn't merge, it is useless now. Kill all registers using it.
for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i != e; ++i)
- if (LiveRegs[*i] == latest)
- Kill(*i);
+ if (LiveRegs[*i].Value == Latest)
+ kill(*i);
}
// dv is the DomainValue we are going to use for this instruction.
- if (!dv)
- dv = Alloc();
- dv->Dist = Distance;
- dv->AvailableDomains = available;
+ if (!dv) {
+ dv = alloc();
+ dv->AvailableDomains = available;
+ }
dv->Instrs.push_back(mi);
// Finally set all defs and non-collapsed uses to dv.
for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) {
MachineOperand &mo = mi->getOperand(i);
if (!mo.isReg()) continue;
- int rx = RegIndex(mo.getReg());
+ int rx = regIndex(mo.getReg());
if (rx < 0) continue;
- if (!LiveRegs || !LiveRegs[rx] || (mo.isDef() && LiveRegs[rx]!=dv)) {
- Kill(rx);
- SetLiveReg(rx, dv);
+ if (!LiveRegs[rx].Value || (mo.isDef() && LiveRegs[rx].Value != dv)) {
+ kill(rx);
+ setLiveReg(rx, dv);
}
}
}
-void ExeDepsFix::visitGenericInstr(MachineInstr *mi) {
- // Process explicit defs, kill any relevant registers redefined.
- for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
- MachineOperand &mo = mi->getOperand(i);
- if (!mo.isReg()) continue;
- int rx = RegIndex(mo.getReg());
- if (rx < 0) continue;
- Kill(rx);
- }
-}
-
bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
TII = MF->getTarget().getInstrInfo();
TRI = MF->getTarget().getRegisterInfo();
- MBB = 0;
LiveRegs = 0;
- Distance = 0;
assert(NumRegs == RC->getNumRegs() && "Bad regclass");
+ DEBUG(dbgs() << "********** FIX EXECUTION DEPENDENCIES: "
+ << RC->getName() << " **********\n");
+
// If no relevant registers are used in the function, we can skip it
// completely.
bool anyregs = false;
for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end();
I != E; ++I)
- if (MF->getRegInfo().isPhysRegUsed(*I)) {
+ if (MF->getRegInfo().isPhysRegOrOverlapUsed(*I)) {
anyregs = true;
break;
}
@@ -473,43 +666,48 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
// or -1.
AliasMap.resize(TRI->getNumRegs(), -1);
for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i)
- for (const unsigned *AI = TRI->getOverlaps(RC->getRegister(i)); *AI; ++AI)
+ for (const uint16_t *AI = TRI->getOverlaps(RC->getRegister(i)); *AI; ++AI)
AliasMap[*AI] = i;
}
MachineBasicBlock *Entry = MF->begin();
- SmallPtrSet<MachineBasicBlock*, 16> Visited;
- for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 16> >
- DFI = df_ext_begin(Entry, Visited), DFE = df_ext_end(Entry, Visited);
- DFI != DFE; ++DFI) {
- MBB = *DFI;
- enterBasicBlock();
+ ReversePostOrderTraversal<MachineBasicBlock*> RPOT(Entry);
+ SmallVector<MachineBasicBlock*, 16> Loops;
+ for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
+ MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock *MBB = *MBBI;
+ enterBasicBlock(MBB);
+ if (SeenUnknownBackEdge)
+ Loops.push_back(MBB);
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
- ++I) {
- MachineInstr *mi = I;
- if (mi->isDebugValue()) continue;
- ++Distance;
- std::pair<uint16_t, uint16_t> domp = TII->getExecutionDomain(mi);
- if (domp.first)
- if (domp.second)
- visitSoftInstr(mi, domp.second);
- else
- visitHardInstr(mi, domp.first);
- else if (LiveRegs)
- visitGenericInstr(mi);
- }
+ ++I)
+ visitInstr(I);
+ leaveBasicBlock(MBB);
+ }
- // Save live registers at end of MBB - used by enterBasicBlock().
- if (LiveRegs)
- LiveOuts.insert(std::make_pair(MBB, LiveRegs));
- LiveRegs = 0;
+ // Visit all the loop blocks again in order to merge DomainValues from
+ // back-edges.
+ for (unsigned i = 0, e = Loops.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = Loops[i];
+ enterBasicBlock(MBB);
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+ ++I)
+ if (!I->isDebugValue())
+ processDefs(I, false);
+ leaveBasicBlock(MBB);
}
- // Clear the LiveOuts vectors. Should we also collapse any remaining
- // DomainValues?
- for (LiveOutMap::const_iterator i = LiveOuts.begin(), e = LiveOuts.end();
- i != e; ++i)
- delete[] i->second;
+ // Clear the LiveOuts vectors and collapse any remaining DomainValues.
+ for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
+ MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
+ LiveOutMap::const_iterator FI = LiveOuts.find(*MBBI);
+ if (FI == LiveOuts.end() || !FI->second)
+ continue;
+ for (unsigned i = 0, e = NumRegs; i != e; ++i)
+ if (FI->second[i].Value)
+ release(FI->second[i].Value);
+ delete[] FI->second;
+ }
LiveOuts.clear();
Avail.clear();
Allocator.DestroyAll();
diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp
index a67140ece4a5..2c4a93543cc3 100644
--- a/lib/CodeGen/ExpandISelPseudos.cpp
+++ b/lib/CodeGen/ExpandISelPseudos.cpp
@@ -32,10 +32,6 @@ namespace {
private:
virtual bool runOnMachineFunction(MachineFunction &MF);
- const char *getPassName() const {
- return "Expand ISel Pseudo-instructions";
- }
-
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -43,12 +39,9 @@ namespace {
} // end anonymous namespace
char ExpandISelPseudos::ID = 0;
+char &llvm::ExpandISelPseudosID = ExpandISelPseudos::ID;
INITIALIZE_PASS(ExpandISelPseudos, "expand-isel-pseudos",
- "Expand CodeGen Pseudo-instructions", false, false)
-
-FunctionPass *llvm::createExpandISelPseudosPass() {
- return new ExpandISelPseudos();
-}
+ "Expand ISel Pseudo-instructions", false, false)
bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
@@ -62,8 +55,7 @@ bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
MachineInstr *MI = MBBI++;
// If MI is a pseudo, expand it.
- const MCInstrDesc &MCID = MI->getDesc();
- if (MCID.usesCustomInsertionHook()) {
+ if (MI->usesCustomInsertionHook()) {
Changed = true;
MachineBasicBlock *NewMBB =
TLI->EmitInstrWithCustomInserter(MI, MBB);
diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp
index e2a14a8dfd97..b14afc286d49 100644
--- a/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -36,10 +36,6 @@ public:
static char ID; // Pass identification, replacement for typeid
ExpandPostRA() : MachineFunctionPass(ID) {}
- const char *getPassName() const {
- return "Post-RA pseudo instruction expansion pass";
- }
-
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addPreservedID(MachineLoopInfoID);
@@ -61,10 +57,10 @@ private:
} // end anonymous namespace
char ExpandPostRA::ID = 0;
+char &llvm::ExpandPostRAPseudosID = ExpandPostRA::ID;
-FunctionPass *llvm::createExpandPostRAPseudosPass() {
- return new ExpandPostRA();
-}
+INITIALIZE_PASS(ExpandPostRA, "postrapseudos",
+ "Post-RA pseudo instruction expansion pass", false, false)
/// TransferDeadFlag - MI is a pseudo-instruction with DstReg dead,
/// and the lowered replacement instructions immediately precede it.
@@ -207,7 +203,7 @@ bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) {
++mi;
// Only expand pseudos.
- if (!MI->getDesc().isPseudo())
+ if (!MI->isPseudo())
continue;
// Give targets a chance to expand even standard pseudos.
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index d757cf409d50..1caf8c233976 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -143,12 +143,12 @@ void Printer::getAnalysisUsage(AnalysisUsage &AU) const {
static const char *DescKind(GC::PointKind Kind) {
switch (Kind) {
- default: llvm_unreachable("Unknown GC point kind");
case GC::Loop: return "loop";
case GC::Return: return "return";
case GC::PreCall: return "pre-call";
case GC::PostCall: return "post-call";
}
+ llvm_unreachable("Invalid point kind");
}
bool Printer::runOnFunction(Function &F) {
@@ -156,12 +156,12 @@ bool Printer::runOnFunction(Function &F) {
GCFunctionInfo *FD = &getAnalysis<GCModuleInfo>().getFunctionInfo(F);
- OS << "GC roots for " << FD->getFunction().getNameStr() << ":\n";
+ OS << "GC roots for " << FD->getFunction().getName() << ":\n";
for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(),
RE = FD->roots_end(); RI != RE; ++RI)
OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n";
- OS << "GC safe points for " << FD->getFunction().getNameStr() << ":\n";
+ OS << "GC safe points for " << FD->getFunction().getName() << ":\n";
for (GCFunctionInfo::iterator PI = FD->begin(),
PE = FD->end(); PI != PE; ++PI) {
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index 766c6ee542a9..506b5cf09457 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -10,8 +10,8 @@
// This file implements target- and collector-independent garbage collection
// infrastructure.
//
-// MachineCodeAnalysis identifies the GC safe points in the machine code. Roots
-// are identified in SelectionDAGISel.
+// GCMachineCodeAnalysis identifies the GC safe points in the machine code.
+// Roots are identified in SelectionDAGISel.
//
//===----------------------------------------------------------------------===//
@@ -35,9 +35,9 @@
using namespace llvm;
namespace {
-
+
/// LowerIntrinsics - This pass rewrites calls to the llvm.gcread or
- /// llvm.gcwrite intrinsics, replacing them with simple loads and stores as
+ /// llvm.gcwrite intrinsics, replacing them with simple loads and stores as
/// directed by the GCStrategy. It also performs automatic root initialization
/// and custom intrinsic lowering.
class LowerIntrinsics : public FunctionPass {
@@ -47,47 +47,46 @@ namespace {
bool PerformDefaultLowering(Function &F, GCStrategy &Coll);
static bool InsertRootInitializers(Function &F,
AllocaInst **Roots, unsigned Count);
-
+
public:
static char ID;
-
+
LowerIntrinsics();
const char *getPassName() const;
void getAnalysisUsage(AnalysisUsage &AU) const;
-
+
bool doInitialization(Module &M);
bool runOnFunction(Function &F);
};
-
-
- /// MachineCodeAnalysis - This is a target-independent pass over the machine
+
+
+ /// GCMachineCodeAnalysis - This is a target-independent pass over the machine
/// function representation to identify safe points for the garbage collector
/// in the machine code. It inserts labels at safe points and populates a
/// GCMetadata record for each function.
- class MachineCodeAnalysis : public MachineFunctionPass {
+ class GCMachineCodeAnalysis : public MachineFunctionPass {
const TargetMachine *TM;
GCFunctionInfo *FI;
MachineModuleInfo *MMI;
const TargetInstrInfo *TII;
-
+
void FindSafePoints(MachineFunction &MF);
void VisitCallPoint(MachineBasicBlock::iterator MI);
- MCSymbol *InsertLabel(MachineBasicBlock &MBB,
+ MCSymbol *InsertLabel(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
DebugLoc DL) const;
-
+
void FindStackOffsets(MachineFunction &MF);
-
+
public:
static char ID;
-
- MachineCodeAnalysis();
- const char *getPassName() const;
+
+ GCMachineCodeAnalysis();
void getAnalysisUsage(AnalysisUsage &AU) const;
-
+
bool runOnMachineFunction(MachineFunction &MF);
};
-
+
}
// -----------------------------------------------------------------------------
@@ -97,6 +96,7 @@ GCStrategy::GCStrategy() :
CustomReadBarriers(false),
CustomWriteBarriers(false),
CustomRoots(false),
+ CustomSafePoints(false),
InitRoots(true),
UsesMetadata(false)
{}
@@ -104,18 +104,24 @@ GCStrategy::GCStrategy() :
GCStrategy::~GCStrategy() {
for (iterator I = begin(), E = end(); I != E; ++I)
delete *I;
-
+
Functions.clear();
}
-
+
bool GCStrategy::initializeCustomLowering(Module &M) { return false; }
-
+
bool GCStrategy::performCustomLowering(Function &F) {
dbgs() << "gc " << getName() << " must override performCustomLowering.\n";
+ llvm_unreachable("must override performCustomLowering");
+}
+
+
+bool GCStrategy::findCustomSafePoints(GCFunctionInfo& FI, MachineFunction &F) {
+ dbgs() << "gc " << getName() << " must override findCustomSafePoints.\n";
llvm_unreachable(0);
- return 0;
}
+
GCFunctionInfo *GCStrategy::insertFunctionInfo(const Function &F) {
GCFunctionInfo *FI = new GCFunctionInfo(F, *this);
Functions.push_back(FI);
@@ -132,7 +138,7 @@ INITIALIZE_PASS_END(LowerIntrinsics, "gc-lowering", "GC Lowering", false, false)
FunctionPass *llvm::createGCLoweringPass() {
return new LowerIntrinsics();
}
-
+
char LowerIntrinsics::ID = 0;
LowerIntrinsics::LowerIntrinsics()
@@ -143,7 +149,7 @@ LowerIntrinsics::LowerIntrinsics()
const char *LowerIntrinsics::getPassName() const {
return "Lower Garbage Collection Instructions";
}
-
+
void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const {
FunctionPass::getAnalysisUsage(AU);
AU.addRequired<GCModuleInfo>();
@@ -161,22 +167,22 @@ bool LowerIntrinsics::doInitialization(Module &M) {
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
if (!I->isDeclaration() && I->hasGC())
MI->getFunctionInfo(*I); // Instantiate the GC strategy.
-
+
bool MadeChange = false;
for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I)
if (NeedsCustomLoweringPass(**I))
if ((*I)->initializeCustomLowering(M))
MadeChange = true;
-
+
return MadeChange;
}
-bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots,
+bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots,
unsigned Count) {
// Scroll past alloca instructions.
BasicBlock::iterator IP = F.getEntryBlock().begin();
while (isa<AllocaInst>(IP)) ++IP;
-
+
// Search for initializers in the initial BB.
SmallPtrSet<AllocaInst*,16> InitedRoots;
for (; !CouldBecomeSafePoint(IP); ++IP)
@@ -184,10 +190,10 @@ bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots,
if (AllocaInst *AI =
dyn_cast<AllocaInst>(SI->getOperand(1)->stripPointerCasts()))
InitedRoots.insert(AI);
-
+
// Add root initializers.
bool MadeChange = false;
-
+
for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I)
if (!InitedRoots.count(*I)) {
StoreInst* SI = new StoreInst(ConstantPointerNull::get(cast<PointerType>(
@@ -196,7 +202,7 @@ bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots,
SI->insertAfter(*I);
MadeChange = true;
}
-
+
return MadeChange;
}
@@ -220,26 +226,26 @@ bool LowerIntrinsics::NeedsCustomLoweringPass(const GCStrategy &C) {
bool LowerIntrinsics::CouldBecomeSafePoint(Instruction *I) {
// The natural definition of instructions which could introduce safe points
// are:
- //
+ //
// - call, invoke (AfterCall, BeforeCall)
// - phis (Loops)
// - invoke, ret, unwind (Exit)
- //
+ //
// However, instructions as seemingly inoccuous as arithmetic can become
// libcalls upon lowering (e.g., div i64 on a 32-bit platform), so instead
// it is necessary to take a conservative approach.
-
+
if (isa<AllocaInst>(I) || isa<GetElementPtrInst>(I) ||
isa<StoreInst>(I) || isa<LoadInst>(I))
return false;
-
+
// llvm.gcroot is safe because it doesn't do anything at runtime.
if (CallInst *CI = dyn_cast<CallInst>(I))
if (Function *F = CI->getCalledFunction())
if (unsigned IID = F->getIntrinsicID())
if (IID == Intrinsic::gcroot)
return false;
-
+
return true;
}
@@ -249,15 +255,15 @@ bool LowerIntrinsics::runOnFunction(Function &F) {
// Quick exit for functions that do not use GC.
if (!F.hasGC())
return false;
-
+
GCFunctionInfo &FI = getAnalysis<GCModuleInfo>().getFunctionInfo(F);
GCStrategy &S = FI.getStrategy();
-
+
bool MadeChange = false;
-
+
if (NeedsDefaultLoweringPass(S))
MadeChange |= PerformDefaultLowering(F, S);
-
+
bool UseCustomLoweringPass = NeedsCustomLoweringPass(S);
if (UseCustomLoweringPass)
MadeChange |= S.performCustomLowering(F);
@@ -275,9 +281,9 @@ bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
bool LowerWr = !S.customWriteBarrier();
bool LowerRd = !S.customReadBarrier();
bool InitRoots = S.initializeRoots();
-
+
SmallVector<AllocaInst*, 32> Roots;
-
+
bool MadeChange = false;
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
@@ -313,104 +319,104 @@ bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
default:
continue;
}
-
+
MadeChange = true;
}
}
}
-
+
if (Roots.size())
MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size());
-
+
return MadeChange;
}
// -----------------------------------------------------------------------------
-FunctionPass *llvm::createGCMachineCodeAnalysisPass() {
- return new MachineCodeAnalysis();
-}
+char GCMachineCodeAnalysis::ID = 0;
+char &llvm::GCMachineCodeAnalysisID = GCMachineCodeAnalysis::ID;
-char MachineCodeAnalysis::ID = 0;
+INITIALIZE_PASS(GCMachineCodeAnalysis, "gc-analysis",
+ "Analyze Machine Code For Garbage Collection", false, false)
-MachineCodeAnalysis::MachineCodeAnalysis()
+GCMachineCodeAnalysis::GCMachineCodeAnalysis()
: MachineFunctionPass(ID) {}
-const char *MachineCodeAnalysis::getPassName() const {
- return "Analyze Machine Code For Garbage Collection";
-}
-
-void MachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+void GCMachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
AU.setPreservesAll();
AU.addRequired<MachineModuleInfo>();
AU.addRequired<GCModuleInfo>();
}
-MCSymbol *MachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- DebugLoc DL) const {
+MCSymbol *GCMachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ DebugLoc DL) const {
MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol();
BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label);
return Label;
}
-void MachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
+void GCMachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
// Find the return address (next instruction), too, so as to bracket the call
// instruction.
- MachineBasicBlock::iterator RAI = CI;
- ++RAI;
-
+ MachineBasicBlock::iterator RAI = CI;
+ ++RAI;
+
if (FI->getStrategy().needsSafePoint(GC::PreCall)) {
MCSymbol* Label = InsertLabel(*CI->getParent(), CI, CI->getDebugLoc());
FI->addSafePoint(GC::PreCall, Label, CI->getDebugLoc());
}
-
+
if (FI->getStrategy().needsSafePoint(GC::PostCall)) {
MCSymbol* Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc());
FI->addSafePoint(GC::PostCall, Label, CI->getDebugLoc());
}
}
-void MachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
+void GCMachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
for (MachineFunction::iterator BBI = MF.begin(),
BBE = MF.end(); BBI != BBE; ++BBI)
for (MachineBasicBlock::iterator MI = BBI->begin(),
ME = BBI->end(); MI != ME; ++MI)
- if (MI->getDesc().isCall())
+ if (MI->isCall())
VisitCallPoint(MI);
}
-void MachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
+void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
const TargetFrameLowering *TFI = TM->getFrameLowering();
assert(TFI && "TargetRegisterInfo not available!");
-
+
for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(),
RE = FI->roots_end(); RI != RE; ++RI)
RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num);
}
-bool MachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
+bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
// Quick exit for functions that do not use GC.
if (!MF.getFunction()->hasGC())
return false;
-
+
FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF.getFunction());
if (!FI->getStrategy().needsSafePoints())
return false;
-
+
TM = &MF.getTarget();
MMI = &getAnalysis<MachineModuleInfo>();
TII = TM->getInstrInfo();
-
+
// Find the size of the stack frame.
FI->setFrameSize(MF.getFrameInfo()->getStackSize());
-
+
// Find all safe points.
- FindSafePoints(MF);
-
+ if (FI->getStrategy().customSafePoints()) {
+ FI->getStrategy().findCustomSafePoints(*FI, MF);
+ } else {
+ FindSafePoints(MF);
+ }
+
// Find the stack offsets for all roots.
FindStackOffsets(MF);
-
+
return false;
}
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index ce7ed293daac..75ae5b9c2c27 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -62,6 +62,7 @@ STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed");
STATISTIC(NumDiamonds, "Number of diamond if-conversions performed");
STATISTIC(NumIfConvBBs, "Number of if-converted blocks");
STATISTIC(NumDupBBs, "Number of duplicated blocks");
+STATISTIC(NumUnpred, "Number of true blocks of diamonds unpredicated");
namespace {
class IfConverter : public MachineFunctionPass {
@@ -169,7 +170,6 @@ namespace {
}
virtual bool runOnMachineFunction(MachineFunction &MF);
- virtual const char *getPassName() const { return "If Converter"; }
private:
bool ReverseBranchCondition(BBInfo &BBI);
@@ -195,7 +195,8 @@ namespace {
void PredicateBlock(BBInfo &BBI,
MachineBasicBlock::iterator E,
SmallVectorImpl<MachineOperand> &Cond,
- SmallSet<unsigned, 4> &Redefs);
+ SmallSet<unsigned, 4> &Redefs,
+ SmallSet<unsigned, 4> *LaterRedefs = 0);
void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
SmallVectorImpl<MachineOperand> &Cond,
SmallSet<unsigned, 4> &Redefs,
@@ -251,12 +252,12 @@ namespace {
char IfConverter::ID = 0;
}
+char &llvm::IfConverterID = IfConverter::ID;
+
INITIALIZE_PASS_BEGIN(IfConverter, "if-converter", "If Converter", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false)
-FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); }
-
bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
TLI = MF.getTarget().getTargetLowering();
TII = MF.getTarget().getInstrInfo();
@@ -313,8 +314,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
bool RetVal = false;
switch (Kind) {
- default: assert(false && "Unexpected!");
- break;
+ default: llvm_unreachable("Unexpected!");
case ICSimple:
case ICSimpleFalse: {
bool isFalse = Kind == ICSimpleFalse;
@@ -573,12 +573,12 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
// blocks, move the end iterators up past any branch instructions.
while (TIE != TIB) {
--TIE;
- if (!TIE->getDesc().isBranch())
+ if (!TIE->isBranch())
break;
}
while (FIE != FIB) {
--FIE;
- if (!FIE->getDesc().isBranch())
+ if (!FIE->isBranch())
break;
}
@@ -651,12 +651,11 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
if (I->isDebugValue())
continue;
- const MCInstrDesc &MCID = I->getDesc();
- if (MCID.isNotDuplicable())
+ if (I->isNotDuplicable())
BBI.CannotBeCopied = true;
bool isPredicated = TII->isPredicated(I);
- bool isCondBr = BBI.IsBrAnalyzable && MCID.isConditionalBranch();
+ bool isCondBr = BBI.IsBrAnalyzable && I->isConditionalBranch();
if (!isCondBr) {
if (!isPredicated) {
@@ -963,7 +962,7 @@ static void InitPredRedefs(MachineBasicBlock *BB, SmallSet<unsigned,4> &Redefs,
E = BB->livein_end(); I != E; ++I) {
unsigned Reg = *I;
Redefs.insert(Reg);
- for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
*Subreg; ++Subreg)
Redefs.insert(*Subreg);
}
@@ -984,7 +983,7 @@ static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs,
Defs.push_back(Reg);
else if (MO.isKill()) {
Redefs.erase(Reg);
- for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+ for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
Redefs.erase(*SR);
}
}
@@ -997,7 +996,7 @@ static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs,
true/*IsImp*/,false/*IsKill*/));
} else {
Redefs.insert(Reg);
- for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+ for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
Redefs.insert(*SR);
}
}
@@ -1035,7 +1034,7 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
if (Kind == ICSimpleFalse)
if (TII->ReverseBranchCondition(Cond))
- assert(false && "Unable to reverse branch condition!");
+ llvm_unreachable("Unable to reverse branch condition!");
// Initialize liveins to the first BB. These are potentiall redefined by
// predicated instructions.
@@ -1108,7 +1107,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
if (TII->ReverseBranchCondition(Cond))
- assert(false && "Unable to reverse branch condition!");
+ llvm_unreachable("Unable to reverse branch condition!");
if (Kind == ICTriangleRev || Kind == ICTriangleFRev) {
if (ReverseBranchCondition(*CvtBBI)) {
@@ -1155,7 +1154,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
SmallVector<MachineOperand, 4> RevCond(CvtBBI->BrCond.begin(),
CvtBBI->BrCond.end());
if (TII->ReverseBranchCondition(RevCond))
- assert(false && "Unable to reverse branch condition!");
+ llvm_unreachable("Unable to reverse branch condition!");
TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond, dl);
BBI.BB->addSuccessor(CvtBBI->FalseBB);
}
@@ -1227,7 +1226,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
BBInfo *BBI2 = &FalseBBI;
SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
if (TII->ReverseBranchCondition(RevCond))
- assert(false && "Unable to reverse branch condition!");
+ llvm_unreachable("Unable to reverse branch condition!");
SmallVector<MachineOperand, 4> *Cond1 = &BBI.BrCond;
SmallVector<MachineOperand, 4> *Cond2 = &RevCond;
@@ -1281,7 +1280,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1);
BBI2->BB->erase(BBI2->BB->begin(), DI2);
- // Predicate the 'true' block after removing its branch.
+ // Remove branch from 'true' block and remove duplicated instructions.
BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB);
DI1 = BBI1->BB->end();
for (unsigned i = 0; i != NumDups2; ) {
@@ -1294,9 +1293,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
++i;
}
BBI1->BB->erase(DI1, BBI1->BB->end());
- PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, Redefs);
- // Predicate the 'false' block.
+ // Remove 'false' block branch and find the last instruction to predicate.
BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB);
DI2 = BBI2->BB->end();
while (NumDups2 != 0) {
@@ -1308,6 +1306,55 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
if (!DI2->isDebugValue())
--NumDups2;
}
+
+ // Remember which registers would later be defined by the false block.
+ // This allows us not to predicate instructions in the true block that would
+ // later be re-defined. That is, rather than
+ // subeq r0, r1, #1
+ // addne r0, r1, #1
+ // generate:
+ // sub r0, r1, #1
+ // addne r0, r1, #1
+ SmallSet<unsigned, 4> RedefsByFalse;
+ SmallSet<unsigned, 4> ExtUses;
+ if (TII->isProfitableToUnpredicate(*BBI1->BB, *BBI2->BB)) {
+ for (MachineBasicBlock::iterator FI = BBI2->BB->begin(); FI != DI2; ++FI) {
+ if (FI->isDebugValue())
+ continue;
+ SmallVector<unsigned, 4> Defs;
+ for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = FI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isDef()) {
+ Defs.push_back(Reg);
+ } else if (!RedefsByFalse.count(Reg)) {
+ // These are defined before ctrl flow reach the 'false' instructions.
+ // They cannot be modified by the 'true' instructions.
+ ExtUses.insert(Reg);
+ for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+ ExtUses.insert(*SR);
+ }
+ }
+
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ unsigned Reg = Defs[i];
+ if (!ExtUses.count(Reg)) {
+ RedefsByFalse.insert(Reg);
+ for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+ RedefsByFalse.insert(*SR);
+ }
+ }
+ }
+ }
+
+ // Predicate the 'true' block.
+ PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, Redefs, &RedefsByFalse);
+
+ // Predicate the 'false' block.
PredicateBlock(*BBI2, DI2, *Cond2, Redefs);
// Merge the true block into the entry of the diamond.
@@ -1319,7 +1366,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
// fold the tail block in as well. Otherwise, unless it falls through to the
// tail, add a unconditional branch to it.
if (TailBB) {
- BBInfo TailBBI = BBAnalysis[TailBB->getNumber()];
+ BBInfo &TailBBI = BBAnalysis[TailBB->getNumber()];
bool CanMergeTail = !TailBBI.HasFallThrough;
// There may still be a fall-through edge from BBI1 or BBI2 to TailBB;
// check if there are any other predecessors besides those.
@@ -1356,15 +1403,49 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
return true;
}
+static bool MaySpeculate(const MachineInstr *MI,
+ SmallSet<unsigned, 4> &LaterRedefs,
+ const TargetInstrInfo *TII) {
+ bool SawStore = true;
+ if (!MI->isSafeToMove(TII, 0, SawStore))
+ return false;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isDef() && !LaterRedefs.count(Reg))
+ return false;
+ }
+
+ return true;
+}
+
/// PredicateBlock - Predicate instructions from the start of the block to the
/// specified end with the specified condition.
void IfConverter::PredicateBlock(BBInfo &BBI,
MachineBasicBlock::iterator E,
SmallVectorImpl<MachineOperand> &Cond,
- SmallSet<unsigned, 4> &Redefs) {
+ SmallSet<unsigned, 4> &Redefs,
+ SmallSet<unsigned, 4> *LaterRedefs) {
+ bool AnyUnpred = false;
+ bool MaySpec = LaterRedefs != 0;
for (MachineBasicBlock::iterator I = BBI.BB->begin(); I != E; ++I) {
if (I->isDebugValue() || TII->isPredicated(I))
continue;
+ // It may be possible not to predicate an instruction if it's the 'true'
+ // side of a diamond and the 'false' side may re-define the instruction's
+ // defs.
+ if (MaySpec && MaySpeculate(I, *LaterRedefs, TII)) {
+ AnyUnpred = true;
+ continue;
+ }
+ // If any instruction is predicated, then every instruction after it must
+ // be predicated.
+ MaySpec = false;
if (!TII->PredicateInstruction(I, Cond)) {
#ifndef NDEBUG
dbgs() << "Unable to predicate " << *I << "!\n";
@@ -1383,6 +1464,8 @@ void IfConverter::PredicateBlock(BBInfo &BBI,
BBI.NonPredSize = 0;
++NumIfConvBBs;
+ if (AnyUnpred)
+ ++NumUnpred;
}
/// CopyAndPredicateBlock - Copy and predicate instructions from source BB to
@@ -1395,9 +1478,8 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
for (MachineBasicBlock::iterator I = FromBBI.BB->begin(),
E = FromBBI.BB->end(); I != E; ++I) {
- const MCInstrDesc &MCID = I->getDesc();
// Do not copy the end of the block branches.
- if (IgnoreBr && MCID.isBranch())
+ if (IgnoreBr && I->isBranch())
break;
MachineInstr *MI = MF.CloneMachineInstr(I);
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index 726af4696578..d5ea666e4a17 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -14,14 +14,15 @@
#define DEBUG_TYPE "regalloc"
#include "Spiller.h"
-#include "LiveRangeEdit.h"
#include "VirtRegMap.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
@@ -173,8 +174,7 @@ private:
void reMaterializeAll();
bool coalesceStackAccess(MachineInstr *MI, unsigned Reg);
- bool foldMemoryOperand(MachineBasicBlock::iterator MI,
- const SmallVectorImpl<unsigned> &Ops,
+ bool foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> >,
MachineInstr *LoadMI = 0);
void insertReload(LiveInterval &NewLI, SlotIndex,
MachineBasicBlock::iterator MI);
@@ -578,7 +578,7 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI,
if (unsigned SrcReg = isFullCopyOf(MI, Reg)) {
if (isSibling(SrcReg)) {
LiveInterval &SrcLI = LIS.getInterval(SrcReg);
- LiveRange *SrcLR = SrcLI.getLiveRangeContaining(VNI->def.getUseIndex());
+ LiveRange *SrcLR = SrcLI.getLiveRangeContaining(VNI->def.getRegSlot(true));
assert(SrcLR && "Copy from non-existing value");
// Check if this COPY kills its source.
SVI->second.KillsSource = (SrcLR->end == VNI->def);
@@ -644,16 +644,18 @@ void InlineSpiller::analyzeSiblingValues() {
if (VNI->isUnused())
continue;
MachineInstr *DefMI = 0;
+ if (!VNI->isPHIDef()) {
+ DefMI = LIS.getInstructionFromIndex(VNI->def);
+ assert(DefMI && "No defining instruction");
+ }
// Check possible sibling copies.
- if (VNI->isPHIDef() || VNI->getCopy()) {
+ if (VNI->isPHIDef() || DefMI->isCopy()) {
VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def);
assert(OrigVNI && "Def outside original live range");
if (OrigVNI->def != VNI->def)
DefMI = traceSiblingValue(Reg, VNI, OrigVNI);
}
- if (!DefMI && !VNI->isPHIDef())
- DefMI = LIS.getInstructionFromIndex(VNI->def);
- if (DefMI && Edit->checkRematerializable(VNI, DefMI, TII, AA)) {
+ if (DefMI && Edit->checkRematerializable(VNI, DefMI, AA)) {
DEBUG(dbgs() << "Value " << PrintReg(Reg) << ':' << VNI->id << '@'
<< VNI->def << " may remat from " << *DefMI);
}
@@ -665,8 +667,8 @@ void InlineSpiller::analyzeSiblingValues() {
/// a spill at a better location.
bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) {
SlotIndex Idx = LIS.getInstructionIndex(CopyMI);
- VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getDefIndex());
- assert(VNI && VNI->def == Idx.getDefIndex() && "Not defined by copy");
+ VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getRegSlot());
+ assert(VNI && VNI->def == Idx.getRegSlot() && "Not defined by copy");
SibValueMap::iterator I = SibValues.find(VNI);
if (I == SibValues.end())
return false;
@@ -726,7 +728,6 @@ bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) {
MRI.getRegClass(SVI.SpillReg), &TRI);
--MII; // Point to store instruction.
LIS.InsertMachineInstrInMaps(MII);
- VRM.addSpillSlotUse(StackSlot, MII);
DEBUG(dbgs() << "\thoisted: " << SVI.SpillVNI->def << '\t' << *MII);
++NumSpills;
@@ -760,7 +761,7 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
// Find all spills and copies of VNI.
for (MachineRegisterInfo::use_nodbg_iterator UI = MRI.use_nodbg_begin(Reg);
MachineInstr *MI = UI.skipInstruction();) {
- if (!MI->isCopy() && !MI->getDesc().mayStore())
+ if (!MI->isCopy() && !MI->mayStore())
continue;
SlotIndex Idx = LIS.getInstructionIndex(MI);
if (LI->getVNInfoAt(Idx) != VNI)
@@ -770,9 +771,9 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
if (unsigned DstReg = isFullCopyOf(MI, Reg)) {
if (isSibling(DstReg)) {
LiveInterval &DstLI = LIS.getInterval(DstReg);
- VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getDefIndex());
+ VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getRegSlot());
assert(DstVNI && "Missing defined value");
- assert(DstVNI->def == Idx.getDefIndex() && "Wrong copy def slot");
+ assert(DstVNI->def == Idx.getRegSlot() && "Wrong copy def slot");
WorkList.push_back(std::make_pair(&DstLI, DstVNI));
}
continue;
@@ -811,7 +812,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def);
for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
PE = MBB->pred_end(); PI != PE; ++PI) {
- VNInfo *PVNI = LI->getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot());
+ VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(*PI));
if (PVNI)
WorkList.push_back(std::make_pair(LI, PVNI));
}
@@ -824,7 +825,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
continue;
LiveInterval &SnipLI = LIS.getInterval(MI->getOperand(1).getReg());
assert(isRegToSpill(SnipLI.reg) && "Unexpected register in copy");
- VNInfo *SnipVNI = SnipLI.getVNInfoAt(VNI->def.getUseIndex());
+ VNInfo *SnipVNI = SnipLI.getVNInfoAt(VNI->def.getRegSlot(true));
assert(SnipVNI && "Snippet undefined before copy");
WorkList.push_back(std::make_pair(&SnipLI, SnipVNI));
} while (!WorkList.empty());
@@ -833,7 +834,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
/// reMaterializeFor - Attempt to rematerialize before MI instead of reloading.
bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
MachineBasicBlock::iterator MI) {
- SlotIndex UseIdx = LIS.getInstructionIndex(MI).getUseIndex();
+ SlotIndex UseIdx = LIS.getInstructionIndex(MI).getRegSlot(true);
VNInfo *ParentVNI = VirtReg.getVNInfoAt(UseIdx.getBaseIndex());
if (!ParentVNI) {
@@ -855,7 +856,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
SibValueMap::const_iterator SibI = SibValues.find(ParentVNI);
if (SibI != SibValues.end())
RM.OrigMI = SibI->second.DefMI;
- if (!Edit->canRematerializeAt(RM, UseIdx, false, LIS)) {
+ if (!Edit->canRematerializeAt(RM, UseIdx, false)) {
markValueUsed(&VirtReg, ParentVNI);
DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << *MI);
return false;
@@ -863,42 +864,37 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
// If the instruction also writes VirtReg.reg, it had better not require the
// same register for uses and defs.
- bool Reads, Writes;
- SmallVector<unsigned, 8> Ops;
- tie(Reads, Writes) = MI->readsWritesVirtualRegister(VirtReg.reg, &Ops);
- if (Writes) {
- for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(Ops[i]);
- if (MO.isUse() ? MI->isRegTiedToDefOperand(Ops[i]) : MO.getSubReg()) {
- markValueUsed(&VirtReg, ParentVNI);
- DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI);
- return false;
- }
- }
+ SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops;
+ MIBundleOperands::RegInfo RI =
+ MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops);
+ if (RI.Tied) {
+ markValueUsed(&VirtReg, ParentVNI);
+ DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI);
+ return false;
}
// Before rematerializing into a register for a single instruction, try to
// fold a load into the instruction. That avoids allocating a new register.
- if (RM.OrigMI->getDesc().canFoldAsLoad() &&
- foldMemoryOperand(MI, Ops, RM.OrigMI)) {
+ if (RM.OrigMI->canFoldAsLoad() &&
+ foldMemoryOperand(Ops, RM.OrigMI)) {
Edit->markRematerialized(RM.ParentVNI);
++NumFoldedLoads;
return true;
}
// Alocate a new register for the remat.
- LiveInterval &NewLI = Edit->createFrom(Original, LIS, VRM);
+ LiveInterval &NewLI = Edit->createFrom(Original);
NewLI.markNotSpillable();
// Finally we can rematerialize OrigMI before MI.
SlotIndex DefIdx = Edit->rematerializeAt(*MI->getParent(), MI, NewLI.reg, RM,
- LIS, TII, TRI);
+ TRI);
DEBUG(dbgs() << "\tremat: " << DefIdx << '\t'
<< *LIS.getInstructionFromIndex(DefIdx));
// Replace operands
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(Ops[i]);
+ MachineOperand &MO = MI->getOperand(Ops[i].second);
if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) {
MO.setReg(NewLI.reg);
MO.setIsKill();
@@ -906,8 +902,8 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
}
DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI);
- VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, LIS.getVNInfoAllocator());
- NewLI.addRange(LiveRange(DefIdx, UseIdx.getDefIndex(), DefVNI));
+ VNInfo *DefVNI = NewLI.getNextValue(DefIdx, LIS.getVNInfoAllocator());
+ NewLI.addRange(LiveRange(DefIdx, UseIdx.getRegSlot(), DefVNI));
DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
++NumRemats;
return true;
@@ -917,7 +913,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
/// and trim the live ranges after.
void InlineSpiller::reMaterializeAll() {
// analyzeSiblingValues has already tested all relevant defining instructions.
- if (!Edit->anyRematerializable(LIS, TII, AA))
+ if (!Edit->anyRematerializable(AA))
return;
UsedValues.clear();
@@ -929,7 +925,7 @@ void InlineSpiller::reMaterializeAll() {
LiveInterval &LI = LIS.getInterval(Reg);
for (MachineRegisterInfo::use_nodbg_iterator
RI = MRI.use_nodbg_begin(Reg);
- MachineInstr *MI = RI.skipInstruction();)
+ MachineInstr *MI = RI.skipBundle();)
anyRemat |= reMaterializeFor(LI, MI);
}
if (!anyRemat)
@@ -958,7 +954,7 @@ void InlineSpiller::reMaterializeAll() {
if (DeadDefs.empty())
return;
DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n");
- Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII);
+ Edit->eliminateDeadDefs(DeadDefs, RegsToSpill);
// Get rid of deleted and empty intervals.
for (unsigned i = RegsToSpill.size(); i != 0; --i) {
@@ -970,7 +966,7 @@ void InlineSpiller::reMaterializeAll() {
LiveInterval &LI = LIS.getInterval(Reg);
if (!LI.empty())
continue;
- Edit->eraseVirtReg(Reg, LIS);
+ Edit->eraseVirtReg(Reg);
RegsToSpill.erase(RegsToSpill.begin() + (i - 1));
}
DEBUG(dbgs() << RegsToSpill.size() << " registers to spill after remat.\n");
@@ -1008,23 +1004,35 @@ bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) {
return true;
}
-/// foldMemoryOperand - Try folding stack slot references in Ops into MI.
-/// @param MI Instruction using or defining the current register.
-/// @param Ops Operand indices from readsWritesVirtualRegister().
+/// foldMemoryOperand - Try folding stack slot references in Ops into their
+/// instructions.
+///
+/// @param Ops Operand indices from analyzeVirtReg().
/// @param LoadMI Load instruction to use instead of stack slot when non-null.
-/// @return True on success, and MI will be erased.
-bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr *LoadMI) {
+/// @return True on success.
+bool InlineSpiller::
+foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
+ MachineInstr *LoadMI) {
+ if (Ops.empty())
+ return false;
+ // Don't attempt folding in bundles.
+ MachineInstr *MI = Ops.front().first;
+ if (Ops.back().first != MI || MI->isBundled())
+ return false;
+
bool WasCopy = MI->isCopy();
+ unsigned ImpReg = 0;
+
// TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied
// operands.
SmallVector<unsigned, 8> FoldOps;
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
- unsigned Idx = Ops[i];
+ unsigned Idx = Ops[i].second;
MachineOperand &MO = MI->getOperand(Idx);
- if (MO.isImplicit())
+ if (MO.isImplicit()) {
+ ImpReg = MO.getReg();
continue;
+ }
// FIXME: Teach targets to deal with subregs.
if (MO.getSubReg())
return false;
@@ -1042,13 +1050,24 @@ bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI,
if (!FoldMI)
return false;
LIS.ReplaceMachineInstrInMaps(MI, FoldMI);
- if (!LoadMI)
- VRM.addSpillSlotUse(StackSlot, FoldMI);
MI->eraseFromParent();
- DEBUG(dbgs() << "\tfolded: " << *FoldMI);
+
+ // TII.foldMemoryOperand may have left some implicit operands on the
+ // instruction. Strip them.
+ if (ImpReg)
+ for (unsigned i = FoldMI->getNumOperands(); i; --i) {
+ MachineOperand &MO = FoldMI->getOperand(i - 1);
+ if (!MO.isReg() || !MO.isImplicit())
+ break;
+ if (MO.getReg() == ImpReg)
+ FoldMI->RemoveOperand(i - 1);
+ }
+
+ DEBUG(dbgs() << "\tfolded: " << LIS.getInstructionIndex(FoldMI) << '\t'
+ << *FoldMI);
if (!WasCopy)
++NumFolded;
- else if (Ops.front() == 0)
+ else if (Ops.front().second == 0)
++NumSpills;
else
++NumReloads;
@@ -1063,11 +1082,9 @@ void InlineSpiller::insertReload(LiveInterval &NewLI,
TII.loadRegFromStackSlot(MBB, MI, NewLI.reg, StackSlot,
MRI.getRegClass(NewLI.reg), &TRI);
--MI; // Point to load instruction.
- SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getDefIndex();
- VRM.addSpillSlotUse(StackSlot, MI);
+ SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot();
DEBUG(dbgs() << "\treload: " << LoadIdx << '\t' << *MI);
- VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, 0,
- LIS.getVNInfoAllocator());
+ VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, LIS.getVNInfoAllocator());
NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI));
++NumReloads;
}
@@ -1079,10 +1096,9 @@ void InlineSpiller::insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI,
TII.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, StackSlot,
MRI.getRegClass(NewLI.reg), &TRI);
--MI; // Point to store instruction.
- SlotIndex StoreIdx = LIS.InsertMachineInstrInMaps(MI).getDefIndex();
- VRM.addSpillSlotUse(StackSlot, MI);
+ SlotIndex StoreIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot();
DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI);
- VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, LIS.getVNInfoAllocator());
+ VNInfo *StoreVNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator());
NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI));
++NumSpills;
}
@@ -1093,8 +1109,8 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
LiveInterval &OldLI = LIS.getInterval(Reg);
// Iterate over instructions using Reg.
- for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Reg);
- MachineInstr *MI = RI.skipInstruction();) {
+ for (MachineRegisterInfo::reg_iterator RegI = MRI.reg_begin(Reg);
+ MachineInstr *MI = RegI.skipBundle();) {
// Debug values are not allowed to affect codegen.
if (MI->isDebugValue()) {
@@ -1123,14 +1139,14 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
continue;
// Analyze instruction.
- bool Reads, Writes;
- SmallVector<unsigned, 8> Ops;
- tie(Reads, Writes) = MI->readsWritesVirtualRegister(Reg, &Ops);
+ SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops;
+ MIBundleOperands::RegInfo RI =
+ MIBundleOperands(MI).analyzeVirtReg(Reg, &Ops);
// Find the slot index where this instruction reads and writes OldLI.
// This is usually the def slot, except for tied early clobbers.
- SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex();
- if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getUseIndex()))
+ SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
+ if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getRegSlot(true)))
if (SlotIndex::isSameInstr(Idx, VNI->def))
Idx = VNI->def;
@@ -1143,7 +1159,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
SnippetCopies.insert(MI);
continue;
}
- if (Writes) {
+ if (RI.Writes) {
// Hoist the spill of a sib-reg copy.
if (hoistSpill(OldLI, MI)) {
// This COPY is now dead, the value is already in the stack slot.
@@ -1160,24 +1176,24 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
}
// Attempt to fold memory ops.
- if (foldMemoryOperand(MI, Ops))
+ if (foldMemoryOperand(Ops))
continue;
// Allocate interval around instruction.
// FIXME: Infer regclass from instruction alone.
- LiveInterval &NewLI = Edit->createFrom(Reg, LIS, VRM);
+ LiveInterval &NewLI = Edit->createFrom(Reg);
NewLI.markNotSpillable();
- if (Reads)
+ if (RI.Reads)
insertReload(NewLI, Idx, MI);
// Rewrite instruction operands.
bool hasLiveDef = false;
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(Ops[i]);
+ MachineOperand &MO = Ops[i].first->getOperand(Ops[i].second);
MO.setReg(NewLI.reg);
if (MO.isUse()) {
- if (!MI->isRegTiedToDefOperand(Ops[i]))
+ if (!Ops[i].first->isRegTiedToDefOperand(Ops[i].second))
MO.setIsKill();
} else {
if (!MO.isDead())
@@ -1187,15 +1203,15 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI);
// FIXME: Use a second vreg if instruction has no tied ops.
- if (Writes) {
- if (hasLiveDef)
- insertSpill(NewLI, OldLI, Idx, MI);
- else {
- // This instruction defines a dead value. We don't need to spill it,
- // but do create a live range for the dead value.
- VNInfo *VNI = NewLI.getNextValue(Idx, 0, LIS.getVNInfoAllocator());
- NewLI.addRange(LiveRange(Idx, Idx.getNextSlot(), VNI));
- }
+ if (RI.Writes) {
+ if (hasLiveDef)
+ insertSpill(NewLI, OldLI, Idx, MI);
+ else {
+ // This instruction defines a dead value. We don't need to spill it,
+ // but do create a live range for the dead value.
+ VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator());
+ NewLI.addRange(LiveRange(Idx, Idx.getDeadSlot(), VNI));
+ }
}
DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
@@ -1208,7 +1224,7 @@ void InlineSpiller::spillAll() {
if (StackSlot == VirtRegMap::NO_STACK_SLOT) {
StackSlot = VRM.assignVirt2StackSlot(Original);
StackInt = &LSS.getOrCreateInterval(StackSlot, MRI.getRegClass(Original));
- StackInt->getNextValue(SlotIndex(), 0, LSS.getVNInfoAllocator());
+ StackInt->getNextValue(SlotIndex(), LSS.getVNInfoAllocator());
} else
StackInt = &LSS.getInterval(StackSlot);
@@ -1228,7 +1244,7 @@ void InlineSpiller::spillAll() {
// Hoisted spills may cause dead code.
if (!DeadDefs.empty()) {
DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n");
- Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII);
+ Edit->eliminateDeadDefs(DeadDefs, RegsToSpill);
}
// Finally delete the SnippetCopies.
@@ -1237,7 +1253,6 @@ void InlineSpiller::spillAll() {
MachineInstr *MI = RI.skipInstruction();) {
assert(SnippetCopies.count(MI) && "Remaining use wasn't a snippet copy");
// FIXME: Do this with a LiveRangeEdit callback.
- VRM.RemoveMachineInstrFromMaps(MI);
LIS.RemoveMachineInstrFromMaps(MI);
MI->eraseFromParent();
}
@@ -1245,7 +1260,7 @@ void InlineSpiller::spillAll() {
// Delete all spilled registers.
for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i)
- Edit->eraseVirtReg(RegsToSpill[i], LIS);
+ Edit->eraseVirtReg(RegsToSpill[i]);
}
void InlineSpiller::spill(LiveRangeEdit &edit) {
@@ -1274,5 +1289,5 @@ void InlineSpiller::spill(LiveRangeEdit &edit) {
if (!RegsToSpill.empty())
spillAll();
- Edit->calculateRegClassAndHint(MF, LIS, Loops);
+ Edit->calculateRegClassAndHint(MF, Loops);
}
diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp
index 29b47bd67ece..8368b58880a3 100644
--- a/lib/CodeGen/InterferenceCache.cpp
+++ b/lib/CodeGen/InterferenceCache.cpp
@@ -1,4 +1,4 @@
-//===-- InterferenceCache.h - Caching per-block interference ---*- C++ -*--===//
+//===-- InterferenceCache.cpp - Caching per-block interference ---------*--===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,6 +15,7 @@
#include "InterferenceCache.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
using namespace llvm;
@@ -24,13 +25,14 @@ InterferenceCache::BlockInterference InterferenceCache::Cursor::NoInterference;
void InterferenceCache::init(MachineFunction *mf,
LiveIntervalUnion *liuarray,
SlotIndexes *indexes,
+ LiveIntervals *lis,
const TargetRegisterInfo *tri) {
MF = mf;
LIUArray = liuarray;
TRI = tri;
PhysRegEntries.assign(TRI->getNumRegs(), 0);
for (unsigned i = 0; i != CacheEntries; ++i)
- Entries[i].clear(mf, indexes);
+ Entries[i].clear(mf, indexes, lis);
}
InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) {
@@ -78,7 +80,7 @@ void InterferenceCache::Entry::reset(unsigned physReg,
PhysReg = physReg;
Blocks.resize(MF->getNumBlockIDs());
Aliases.clear();
- for (const unsigned *AS = TRI->getOverlaps(PhysReg); *AS; ++AS) {
+ for (const uint16_t *AS = TRI->getOverlaps(PhysReg); *AS; ++AS) {
LiveIntervalUnion *LIU = LIUArray + *AS;
Aliases.push_back(std::make_pair(LIU, LIU->getTag()));
}
@@ -94,7 +96,7 @@ void InterferenceCache::Entry::reset(unsigned physReg,
bool InterferenceCache::Entry::valid(LiveIntervalUnion *LIUArray,
const TargetRegisterInfo *TRI) {
unsigned i = 0, e = Aliases.size();
- for (const unsigned *AS = TRI->getOverlaps(PhysReg); *AS; ++AS, ++i) {
+ for (const uint16_t *AS = TRI->getOverlaps(PhysReg); *AS; ++AS, ++i) {
LiveIntervalUnion *LIU = LIUArray + *AS;
if (i == e || Aliases[i].first != LIU)
return false;
@@ -121,6 +123,8 @@ void InterferenceCache::Entry::update(unsigned MBBNum) {
MachineFunction::const_iterator MFI = MF->getBlockNumbered(MBBNum);
BlockInterference *BI = &Blocks[MBBNum];
+ ArrayRef<SlotIndex> RegMaskSlots;
+ ArrayRef<const uint32_t*> RegMaskBits;
for (;;) {
BI->Tag = Tag;
BI->First = BI->Last = SlotIndex();
@@ -137,6 +141,18 @@ void InterferenceCache::Entry::update(unsigned MBBNum) {
BI->First = StartI;
}
+ // Also check for register mask interference.
+ RegMaskSlots = LIS->getRegMaskSlotsInBlock(MBBNum);
+ RegMaskBits = LIS->getRegMaskBitsInBlock(MBBNum);
+ SlotIndex Limit = BI->First.isValid() ? BI->First : Stop;
+ for (unsigned i = 0, e = RegMaskSlots.size();
+ i != e && RegMaskSlots[i] < Limit; ++i)
+ if (MachineOperand::clobbersPhysReg(RegMaskBits[i], PhysReg)) {
+ // Register mask i clobbers PhysReg before the LIU interference.
+ BI->First = RegMaskSlots[i];
+ break;
+ }
+
PrevPos = Stop;
if (BI->First.isValid())
break;
@@ -166,4 +182,15 @@ void InterferenceCache::Entry::update(unsigned MBBNum) {
if (Backup)
++I;
}
+
+ // Also check for register mask interference.
+ SlotIndex Limit = BI->Last.isValid() ? BI->Last : Start;
+ for (unsigned i = RegMaskSlots.size();
+ i && RegMaskSlots[i-1].getDeadSlot() > Limit; --i)
+ if (MachineOperand::clobbersPhysReg(RegMaskBits[i-1], PhysReg)) {
+ // Register mask i-1 clobbers PhysReg after the LIU interference.
+ // Model the regmask clobber as a dead def.
+ BI->Last = RegMaskSlots[i-1].getDeadSlot();
+ break;
+ }
}
diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h
index 4df0a9e5c393..485a325aa146 100644
--- a/lib/CodeGen/InterferenceCache.h
+++ b/lib/CodeGen/InterferenceCache.h
@@ -18,10 +18,11 @@
namespace llvm {
+class LiveIntervals;
+
class InterferenceCache {
const TargetRegisterInfo *TRI;
LiveIntervalUnion *LIUArray;
- SlotIndexes *Indexes;
MachineFunction *MF;
/// BlockInterference - information about the interference in a single basic
@@ -52,6 +53,9 @@ class InterferenceCache {
/// Indexes - Mapping block numbers to SlotIndex ranges.
SlotIndexes *Indexes;
+ /// LIS - Used for accessing register mask interference maps.
+ LiveIntervals *LIS;
+
/// PrevPos - The previous position the iterators were moved to.
SlotIndex PrevPos;
@@ -71,13 +75,14 @@ class InterferenceCache {
void update(unsigned MBBNum);
public:
- Entry() : PhysReg(0), Tag(0), RefCount(0), Indexes(0) {}
+ Entry() : PhysReg(0), Tag(0), RefCount(0), Indexes(0), LIS(0) {}
- void clear(MachineFunction *mf, SlotIndexes *indexes) {
+ void clear(MachineFunction *mf, SlotIndexes *indexes, LiveIntervals *lis) {
assert(!hasRefs() && "Cannot clear cache entry with references");
PhysReg = 0;
MF = mf;
Indexes = indexes;
+ LIS = lis;
}
unsigned getPhysReg() const { return PhysReg; }
@@ -124,10 +129,10 @@ class InterferenceCache {
Entry *get(unsigned PhysReg);
public:
- InterferenceCache() : TRI(0), LIUArray(0), Indexes(0), MF(0), RoundRobin(0) {}
+ InterferenceCache() : TRI(0), LIUArray(0), MF(0), RoundRobin(0) {}
/// init - Prepare cache for a new function.
- void init(MachineFunction*, LiveIntervalUnion*, SlotIndexes*,
+ void init(MachineFunction*, LiveIntervalUnion*, SlotIndexes*, LiveIntervals*,
const TargetRegisterInfo *);
/// getMaxCursors - Return the maximum number of concurrent cursors that can
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 0f92c2d06bdd..a9ca42f69b97 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -448,11 +448,6 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
case Intrinsic::dbg_declare:
break; // Simply strip out debugging intrinsics
- case Intrinsic::eh_exception:
- case Intrinsic::eh_selector:
- CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
- break;
-
case Intrinsic::eh_typeid_for:
// Return something different to eh_selector.
CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
diff --git a/lib/CodeGen/JITCodeEmitter.cpp b/lib/CodeGen/JITCodeEmitter.cpp
new file mode 100644
index 000000000000..96a53892f6d3
--- /dev/null
+++ b/lib/CodeGen/JITCodeEmitter.cpp
@@ -0,0 +1,14 @@
+//===-- llvm/CodeGen/JITCodeEmitter.cpp - Code emission --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/JITCodeEmitter.h"
+
+using namespace llvm;
+
+void JITCodeEmitter::anchor() { }
diff --git a/lib/CodeGen/LLVMBuild.txt b/lib/CodeGen/LLVMBuild.txt
new file mode 100644
index 000000000000..fee0347ea659
--- /dev/null
+++ b/lib/CodeGen/LLVMBuild.txt
@@ -0,0 +1,25 @@
+;===- ./lib/CodeGen/LLVMBuild.txt ------------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = AsmPrinter SelectionDAG
+
+[component_0]
+type = Library
+name = CodeGen
+parent = Libraries
+required_libraries = Analysis Core MC Scalar Support Target TransformUtils
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 187147a3e252..a1f479a4275f 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -11,82 +11,42 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/PassManager.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Assembly/PrintModulePass.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/GCStrategy.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
-namespace llvm {
- bool EnableFastISel;
-}
+// Enable or disable FastISel. Both options are needed, because
+// FastISel is enabled by default with -fast, and we wish to be
+// able to enable or disable fast-isel independently from -O0.
+static cl::opt<cl::boolOrDefault>
+EnableFastISelOption("fast-isel", cl::Hidden,
+ cl::desc("Enable the \"fast\" instruction selector"));
-static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden,
- cl::desc("Disable Post Regalloc"));
-static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden,
- cl::desc("Disable branch folding"));
-static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden,
- cl::desc("Disable tail duplication"));
-static cl::opt<bool> DisableEarlyTailDup("disable-early-taildup", cl::Hidden,
- cl::desc("Disable pre-register allocation tail duplication"));
-static cl::opt<bool> DisableCodePlace("disable-code-place", cl::Hidden,
- cl::desc("Disable code placement"));
-static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden,
- cl::desc("Disable Stack Slot Coloring"));
-static cl::opt<bool> DisableMachineDCE("disable-machine-dce", cl::Hidden,
- cl::desc("Disable Machine Dead Code Elimination"));
-static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden,
- cl::desc("Disable Machine LICM"));
-static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden,
- cl::desc("Disable Machine Common Subexpression Elimination"));
-static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm",
- cl::Hidden,
- cl::desc("Disable Machine LICM"));
-static cl::opt<bool> DisableMachineSink("disable-machine-sink", cl::Hidden,
- cl::desc("Disable Machine Sinking"));
-static cl::opt<bool> DisableLSR("disable-lsr", cl::Hidden,
- cl::desc("Disable Loop Strength Reduction Pass"));
-static cl::opt<bool> DisableCGP("disable-cgp", cl::Hidden,
- cl::desc("Disable Codegen Prepare"));
-static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden,
- cl::desc("Print LLVM IR produced by the loop-reduce pass"));
-static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden,
- cl::desc("Print LLVM IR input to isel pass"));
-static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
- cl::desc("Dump garbage collector data"));
static cl::opt<bool> ShowMCEncoding("show-mc-encoding", cl::Hidden,
cl::desc("Show encoding in .s output"));
static cl::opt<bool> ShowMCInst("show-mc-inst", cl::Hidden,
cl::desc("Show instruction structure in .s output"));
-static cl::opt<bool> EnableMCLogging("enable-mc-api-logging", cl::Hidden,
- cl::desc("Enable MC API logging"));
-static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
- cl::desc("Verify generated machine code"),
- cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
static cl::opt<cl::boolOrDefault>
AsmVerbose("asm-verbose", cl::desc("Add comments to directives."),
@@ -94,25 +54,20 @@ AsmVerbose("asm-verbose", cl::desc("Add comments to directives."),
static bool getVerboseAsm() {
switch (AsmVerbose) {
- default:
case cl::BOU_UNSET: return TargetMachine::getAsmVerbosityDefault();
case cl::BOU_TRUE: return true;
case cl::BOU_FALSE: return false;
}
+ llvm_unreachable("Invalid verbose asm state");
}
-// Enable or disable FastISel. Both options are needed, because
-// FastISel is enabled by default with -fast, and we wish to be
-// able to enable or disable fast-isel independently from -O0.
-static cl::opt<cl::boolOrDefault>
-EnableFastISelOption("fast-isel", cl::Hidden,
- cl::desc("Enable the \"fast\" instruction selector"));
-
LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM)
- : TargetMachine(T, Triple, CPU, FS) {
- CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM);
+ TargetOptions Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : TargetMachine(T, Triple, CPU, FS, Options) {
+ CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM, OL);
AsmInfo = T.createMCAsmInfo(Triple);
// TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0,
// and if the old one gets included then MCAsmInfo will be NULL and
@@ -123,16 +78,88 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple,
"and that InitializeAllTargetMCs() is being invoked!");
}
+/// Turn exception handling constructs into something the code generators can
+/// handle.
+static void addPassesToHandleExceptions(TargetMachine *TM,
+ PassManagerBase &PM) {
+ switch (TM->getMCAsmInfo()->getExceptionHandlingType()) {
+ case ExceptionHandling::SjLj:
+ // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both
+ // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise,
+ // catch info can get misplaced when a selector ends up more than one block
+ // removed from the parent invoke(s). This could happen when a landing
+ // pad is shared by multiple invokes and is also a target of a normal
+ // edge from elsewhere.
+ PM.add(createSjLjEHPreparePass(TM->getTargetLowering()));
+ // FALLTHROUGH
+ case ExceptionHandling::DwarfCFI:
+ case ExceptionHandling::ARM:
+ case ExceptionHandling::Win64:
+ PM.add(createDwarfEHPass(TM));
+ break;
+ case ExceptionHandling::None:
+ PM.add(createLowerInvokePass(TM->getTargetLowering()));
+
+ // The lower invoke pass may create unreachable code. Remove it.
+ PM.add(createUnreachableBlockEliminationPass());
+ break;
+ }
+}
+
+/// addPassesToX helper drives creation and initialization of TargetPassConfig.
+static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
+ PassManagerBase &PM,
+ bool DisableVerify) {
+ // Targets may override createPassConfig to provide a target-specific sublass.
+ TargetPassConfig *PassConfig = TM->createPassConfig(PM);
+
+ // Set PassConfig options provided by TargetMachine.
+ PassConfig->setDisableVerify(DisableVerify);
+
+ PM.add(PassConfig);
+
+ PassConfig->addIRPasses();
+
+ addPassesToHandleExceptions(TM, PM);
+
+ PassConfig->addISelPrepare();
+
+ // Install a MachineModuleInfo class, which is an immutable pass that holds
+ // all the per-module stuff we're generating, including MCContext.
+ MachineModuleInfo *MMI =
+ new MachineModuleInfo(*TM->getMCAsmInfo(), *TM->getRegisterInfo(),
+ &TM->getTargetLowering()->getObjFileLowering());
+ PM.add(MMI);
+ MCContext *Context = &MMI->getContext(); // Return the MCContext by-ref.
+
+ // Set up a MachineFunction for the rest of CodeGen to work on.
+ PM.add(new MachineFunctionAnalysis(*TM));
+
+ // Enable FastISel with -fast, but allow that to be overridden.
+ if (EnableFastISelOption == cl::BOU_TRUE ||
+ (TM->getOptLevel() == CodeGenOpt::None &&
+ EnableFastISelOption != cl::BOU_FALSE))
+ TM->setFastISel(true);
+
+ // Ask the target for an isel.
+ if (PassConfig->addInstSelector())
+ return NULL;
+
+ PassConfig->addMachinePasses();
+
+ PassConfig->setInitialized();
+
+ return Context;
+}
+
bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
formatted_raw_ostream &Out,
CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel,
bool DisableVerify) {
// Add common CodeGen passes.
- MCContext *Context = 0;
- if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Context))
+ MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify);
+ if (!Context)
return true;
- assert(Context != 0 && "Failed to get MCContext");
if (hasMCSaveTempLabels())
Context->setAllowTemporaryLabels(false);
@@ -142,10 +169,11 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
OwningPtr<MCStreamer> AsmStreamer;
switch (FileType) {
- default: return true;
case CGFT_AssemblyFile: {
MCInstPrinter *InstPrinter =
- getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, STI);
+ getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI,
+ *getInstrInfo(),
+ Context->getRegisterInfo(), STI);
// Create a code emitter if asked to show the encoding.
MCCodeEmitter *MCE = 0;
@@ -160,6 +188,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
getVerboseAsm(),
hasMCUseLoc(),
hasMCUseCFI(),
+ hasMCUseDwarfDirectory(),
InstPrinter,
MCE, MAB,
ShowMCInst);
@@ -189,9 +218,6 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
break;
}
- if (EnableMCLogging)
- AsmStreamer.reset(createLoggingStreamer(AsmStreamer.take(), errs()));
-
// Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
if (Printer == 0)
@@ -214,14 +240,13 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
///
bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
JITCodeEmitter &JCE,
- CodeGenOpt::Level OptLevel,
bool DisableVerify) {
// Add common CodeGen passes.
- MCContext *Ctx = 0;
- if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx))
+ MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify);
+ if (!Context)
return true;
- addCodeEmitter(PM, OptLevel, JCE);
+ addCodeEmitter(PM, JCE);
PM.add(createGCInfoDeleter());
return false; // success!
@@ -235,10 +260,10 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
MCContext *&Ctx,
raw_ostream &Out,
- CodeGenOpt::Level OptLevel,
bool DisableVerify) {
// Add common CodeGen passes.
- if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx))
+ Ctx = addPassesToGenerateCode(this, PM, DisableVerify);
+ if (!Ctx)
return true;
if (hasMCSaveTempLabels())
@@ -247,7 +272,8 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
// Create the code emitter for the target if it exists. If not, .o file
// emission fails.
const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
- MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(),STI, *Ctx);
+ MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(),STI,
+ *Ctx);
MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple());
if (MCE == 0 || MAB == 0)
return true;
@@ -271,227 +297,3 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
return false; // success!
}
-
-static void printNoVerify(PassManagerBase &PM, const char *Banner) {
- if (PrintMachineCode)
- PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
-}
-
-static void printAndVerify(PassManagerBase &PM,
- const char *Banner) {
- if (PrintMachineCode)
- PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
-
- if (VerifyMachineCode)
- PM.add(createMachineVerifierPass(Banner));
-}
-
-/// addCommonCodeGenPasses - Add standard LLVM codegen passes used for both
-/// emitting to assembly files or machine code output.
-///
-bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel,
- bool DisableVerify,
- MCContext *&OutContext) {
- // Standard LLVM-Level Passes.
-
- // Basic AliasAnalysis support.
- // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
- // BasicAliasAnalysis wins if they disagree. This is intended to help
- // support "obvious" type-punning idioms.
- PM.add(createTypeBasedAliasAnalysisPass());
- PM.add(createBasicAliasAnalysisPass());
-
- // Before running any passes, run the verifier to determine if the input
- // coming from the front-end and/or optimizer is valid.
- if (!DisableVerify)
- PM.add(createVerifierPass());
-
- // Run loop strength reduction before anything else.
- if (OptLevel != CodeGenOpt::None && !DisableLSR) {
- PM.add(createLoopStrengthReducePass(getTargetLowering()));
- if (PrintLSR)
- PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
- }
-
- PM.add(createGCLoweringPass());
-
- // Make sure that no unreachable blocks are instruction selected.
- PM.add(createUnreachableBlockEliminationPass());
-
- // Turn exception handling constructs into something the code generators can
- // handle.
- switch (getMCAsmInfo()->getExceptionHandlingType()) {
- case ExceptionHandling::SjLj:
- // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both
- // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise,
- // catch info can get misplaced when a selector ends up more than one block
- // removed from the parent invoke(s). This could happen when a landing
- // pad is shared by multiple invokes and is also a target of a normal
- // edge from elsewhere.
- PM.add(createSjLjEHPass(getTargetLowering()));
- // FALLTHROUGH
- case ExceptionHandling::DwarfCFI:
- case ExceptionHandling::ARM:
- case ExceptionHandling::Win64:
- PM.add(createDwarfEHPass(this));
- break;
- case ExceptionHandling::None:
- PM.add(createLowerInvokePass(getTargetLowering()));
-
- // The lower invoke pass may create unreachable code. Remove it.
- PM.add(createUnreachableBlockEliminationPass());
- break;
- }
-
- if (OptLevel != CodeGenOpt::None && !DisableCGP)
- PM.add(createCodeGenPreparePass(getTargetLowering()));
-
- PM.add(createStackProtectorPass(getTargetLowering()));
-
- addPreISel(PM, OptLevel);
-
- if (PrintISelInput)
- PM.add(createPrintFunctionPass("\n\n"
- "*** Final LLVM Code input to ISel ***\n",
- &dbgs()));
-
- // All passes which modify the LLVM IR are now complete; run the verifier
- // to ensure that the IR is valid.
- if (!DisableVerify)
- PM.add(createVerifierPass());
-
- // Standard Lower-Level Passes.
-
- // Install a MachineModuleInfo class, which is an immutable pass that holds
- // all the per-module stuff we're generating, including MCContext.
- MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(),
- *getRegisterInfo(),
- &getTargetLowering()->getObjFileLowering());
- PM.add(MMI);
- OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref.
-
- // Set up a MachineFunction for the rest of CodeGen to work on.
- PM.add(new MachineFunctionAnalysis(*this, OptLevel));
-
- // Enable FastISel with -fast, but allow that to be overridden.
- if (EnableFastISelOption == cl::BOU_TRUE ||
- (OptLevel == CodeGenOpt::None && EnableFastISelOption != cl::BOU_FALSE))
- EnableFastISel = true;
-
- // Ask the target for an isel.
- if (addInstSelector(PM, OptLevel))
- return true;
-
- // Print the instruction selected machine code...
- printAndVerify(PM, "After Instruction Selection");
-
- // Expand pseudo-instructions emitted by ISel.
- PM.add(createExpandISelPseudosPass());
-
- // Pre-ra tail duplication.
- if (OptLevel != CodeGenOpt::None && !DisableEarlyTailDup) {
- PM.add(createTailDuplicatePass(true));
- printAndVerify(PM, "After Pre-RegAlloc TailDuplicate");
- }
-
- // Optimize PHIs before DCE: removing dead PHI cycles may make more
- // instructions dead.
- if (OptLevel != CodeGenOpt::None)
- PM.add(createOptimizePHIsPass());
-
- // If the target requests it, assign local variables to stack slots relative
- // to one another and simplify frame index references where possible.
- PM.add(createLocalStackSlotAllocationPass());
-
- if (OptLevel != CodeGenOpt::None) {
- // With optimization, dead code should already be eliminated. However
- // there is one known exception: lowered code for arguments that are only
- // used by tail calls, where the tail calls reuse the incoming stack
- // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
- if (!DisableMachineDCE)
- PM.add(createDeadMachineInstructionElimPass());
- printAndVerify(PM, "After codegen DCE pass");
-
- if (!DisableMachineLICM)
- PM.add(createMachineLICMPass());
- if (!DisableMachineCSE)
- PM.add(createMachineCSEPass());
- if (!DisableMachineSink)
- PM.add(createMachineSinkingPass());
- printAndVerify(PM, "After Machine LICM, CSE and Sinking passes");
-
- PM.add(createPeepholeOptimizerPass());
- printAndVerify(PM, "After codegen peephole optimization pass");
- }
-
- // Run pre-ra passes.
- if (addPreRegAlloc(PM, OptLevel))
- printAndVerify(PM, "After PreRegAlloc passes");
-
- // Perform register allocation.
- PM.add(createRegisterAllocator(OptLevel));
- printAndVerify(PM, "After Register Allocation");
-
- // Perform stack slot coloring and post-ra machine LICM.
- if (OptLevel != CodeGenOpt::None) {
- // FIXME: Re-enable coloring with register when it's capable of adding
- // kill markers.
- if (!DisableSSC)
- PM.add(createStackSlotColoringPass(false));
-
- // Run post-ra machine LICM to hoist reloads / remats.
- if (!DisablePostRAMachineLICM)
- PM.add(createMachineLICMPass(false));
-
- printAndVerify(PM, "After StackSlotColoring and postra Machine LICM");
- }
-
- // Run post-ra passes.
- if (addPostRegAlloc(PM, OptLevel))
- printAndVerify(PM, "After PostRegAlloc passes");
-
- PM.add(createExpandPostRAPseudosPass());
- printAndVerify(PM, "After ExpandPostRAPseudos");
-
- // Insert prolog/epilog code. Eliminate abstract frame index references...
- PM.add(createPrologEpilogCodeInserter());
- printAndVerify(PM, "After PrologEpilogCodeInserter");
-
- // Run pre-sched2 passes.
- if (addPreSched2(PM, OptLevel))
- printAndVerify(PM, "After PreSched2 passes");
-
- // Second pass scheduler.
- if (OptLevel != CodeGenOpt::None && !DisablePostRA) {
- PM.add(createPostRAScheduler(OptLevel));
- printAndVerify(PM, "After PostRAScheduler");
- }
-
- // Branch folding must be run after regalloc and prolog/epilog insertion.
- if (OptLevel != CodeGenOpt::None && !DisableBranchFold) {
- PM.add(createBranchFoldingPass(getEnableTailMergeDefault()));
- printNoVerify(PM, "After BranchFolding");
- }
-
- // Tail duplication.
- if (OptLevel != CodeGenOpt::None && !DisableTailDuplicate) {
- PM.add(createTailDuplicatePass(false));
- printNoVerify(PM, "After TailDuplicate");
- }
-
- PM.add(createGCMachineCodeAnalysisPass());
-
- if (PrintGCInfo)
- PM.add(createGCInfoPrinter(dbgs()));
-
- if (OptLevel != CodeGenOpt::None && !DisableCodePlace) {
- PM.add(createCodePlacementOptPass());
- printNoVerify(PM, "After CodePlacementOpt");
- }
-
- if (addPreEmitPass(PM, OptLevel))
- printNoVerify(PM, "After PreEmit passes");
-
- return false;
-}
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
index 0eb009ddac29..deab05a412c9 100644
--- a/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -46,7 +46,7 @@ bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
// Finally, just to provide a stable ordering, use the node number as a
// deciding factor.
- return LHSNum < RHSNum;
+ return RHSNum < LHSNum;
}
@@ -84,11 +84,11 @@ void LatencyPriorityQueue::push(SUnit *SU) {
}
-// ScheduledNode - As nodes are scheduled, we look to see if there are any
+// scheduledNode - As nodes are scheduled, we look to see if there are any
// successor nodes that have a single unscheduled predecessor. If so, that
// single predecessor has a higher priority, since scheduling it will make
// the node available.
-void LatencyPriorityQueue::ScheduledNode(SUnit *SU) {
+void LatencyPriorityQueue::scheduledNode(SUnit *SU) {
for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
AdjustPriorityOfUnscheduledPreds(I->getSUnit());
diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp
index a12e1a36d113..f1abcbb1dd5c 100644
--- a/lib/CodeGen/LexicalScopes.cpp
+++ b/lib/CodeGen/LexicalScopes.cpp
@@ -311,6 +311,8 @@ bool LexicalScopes::dominates(DebugLoc DL, MachineBasicBlock *MBB) {
return Result;
}
+void LexicalScope::anchor() { }
+
/// dump - Print data structures.
void LexicalScope::dump() const {
#ifndef NDEBUG
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index 3dfe4c0e8cfa..2187833031ee 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -226,7 +226,7 @@ public:
LiveInterval *LI, const VNInfo *VNI,
SmallVectorImpl<SlotIndex> *Kills,
LiveIntervals &LIS, MachineDominatorTree &MDT,
- UserValueScopes &UVS);
+ UserValueScopes &UVS);
/// addDefsFromCopies - The value in LI/LocNo may be copies to other
/// registers. Determine if any of the copies are available at the kill
@@ -468,7 +468,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) {
// DBG_VALUE has no slot index, use the previous instruction instead.
SlotIndex Idx = MBBI == MBB->begin() ?
LIS->getMBBStartIdx(MBB) :
- LIS->getInstructionIndex(llvm::prior(MBBI)).getDefIndex();
+ LIS->getInstructionIndex(llvm::prior(MBBI)).getRegSlot();
// Handle consecutive DBG_VALUE instructions with the same slot index.
do {
if (handleDebugValue(MBBI, Idx)) {
@@ -486,7 +486,7 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo,
LiveInterval *LI, const VNInfo *VNI,
SmallVectorImpl<SlotIndex> *Kills,
LiveIntervals &LIS, MachineDominatorTree &MDT,
- UserValueScopes &UVS) {
+ UserValueScopes &UVS) {
SmallVector<SlotIndex, 16> Todo;
Todo.push_back(Idx);
do {
@@ -575,15 +575,15 @@ UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo,
// Is LocNo extended to reach this copy? If not, another def may be blocking
// it, or we are looking at a wrong value of LI.
SlotIndex Idx = LIS.getInstructionIndex(MI);
- LocMap::iterator I = locInts.find(Idx.getUseIndex());
+ LocMap::iterator I = locInts.find(Idx.getRegSlot(true));
if (!I.valid() || I.value() != LocNo)
continue;
if (!LIS.hasInterval(DstReg))
continue;
LiveInterval *DstLI = &LIS.getInterval(DstReg);
- const VNInfo *DstVNI = DstLI->getVNInfoAt(Idx.getDefIndex());
- assert(DstVNI && DstVNI->def == Idx.getDefIndex() && "Bad copy value");
+ const VNInfo *DstVNI = DstLI->getVNInfoAt(Idx.getRegSlot());
+ assert(DstVNI && DstVNI->def == Idx.getRegSlot() && "Bad copy value");
CopyValues.push_back(std::make_pair(DstLI, DstVNI));
}
@@ -620,7 +620,7 @@ void
UserValue::computeIntervals(MachineRegisterInfo &MRI,
LiveIntervals &LIS,
MachineDominatorTree &MDT,
- UserValueScopes &UVS) {
+ UserValueScopes &UVS) {
SmallVector<std::pair<SlotIndex, unsigned>, 16> Defs;
// Collect all defs to be extended (Skipping undefs).
@@ -841,7 +841,7 @@ bool
UserValue::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) {
bool DidChange = false;
// Split locations referring to OldReg. Iterate backwards so splitLocation can
- // safely erase unuused locations.
+ // safely erase unused locations.
for (unsigned i = locations.size(); i ; --i) {
unsigned LocNo = i-1;
const MachineOperand *Loc = &locations[LocNo];
@@ -889,8 +889,7 @@ UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI) {
// index is no longer available. That means the user value is in a
// non-existent sub-register, and %noreg is exactly what we want.
Loc.substPhysReg(VRM.getPhys(VirtReg), TRI);
- } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT &&
- VRM.isSpillSlotUsed(VRM.getStackSlot(VirtReg))) {
+ } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT) {
// FIXME: Translate SubIdx to a stackslot offset.
Loc = MachineOperand::CreateFI(VRM.getStackSlot(VirtReg));
} else {
@@ -921,8 +920,8 @@ findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx,
}
// Don't insert anything after the first terminator, though.
- return MI->getDesc().isTerminator() ? MBB->getFirstTerminator() :
- llvm::next(MachineBasicBlock::iterator(MI));
+ return MI->isTerminator() ? MBB->getFirstTerminator() :
+ llvm::next(MachineBasicBlock::iterator(MI));
}
DebugLoc UserValue::findDebugLoc() {
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index b69945aea98f..ac18843ac30d 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -381,37 +381,40 @@ void LiveInterval::join(LiveInterval &Other,
for (unsigned i = 0; i != NumVals; ++i) {
unsigned LHSValID = LHSValNoAssignments[i];
if (i != LHSValID ||
- (NewVNInfo[LHSValID] && NewVNInfo[LHSValID] != getValNumInfo(i)))
+ (NewVNInfo[LHSValID] && NewVNInfo[LHSValID] != getValNumInfo(i))) {
MustMapCurValNos = true;
+ break;
+ }
}
// If we have to apply a mapping to our base interval assignment, rewrite it
// now.
if (MustMapCurValNos) {
// Map the first live range.
+
iterator OutIt = begin();
OutIt->valno = NewVNInfo[LHSValNoAssignments[OutIt->valno->id]];
- ++OutIt;
- for (iterator I = OutIt, E = end(); I != E; ++I) {
- OutIt->valno = NewVNInfo[LHSValNoAssignments[I->valno->id]];
+ for (iterator I = next(OutIt), E = end(); I != E; ++I) {
+ VNInfo* nextValNo = NewVNInfo[LHSValNoAssignments[I->valno->id]];
+ assert(nextValNo != 0 && "Huh?");
// If this live range has the same value # as its immediate predecessor,
// and if they are neighbors, remove one LiveRange. This happens when we
- // have [0,3:0)[4,7:1) and map 0/1 onto the same value #.
- if (OutIt->valno == (OutIt-1)->valno && (OutIt-1)->end == OutIt->start) {
- (OutIt-1)->end = OutIt->end;
+ // have [0,4:0)[4,7:1) and map 0/1 onto the same value #.
+ if (OutIt->valno == nextValNo && OutIt->end == I->start) {
+ OutIt->end = I->end;
} else {
- if (I != OutIt) {
+ // Didn't merge. Move OutIt to the next interval,
+ ++OutIt;
+ OutIt->valno = nextValNo;
+ if (OutIt != I) {
OutIt->start = I->start;
OutIt->end = I->end;
}
-
- // Didn't merge, on to the next one.
- ++OutIt;
}
}
-
// If we merge some live ranges, chop off the end.
+ ++OutIt;
ranges.erase(OutIt, end());
}
@@ -639,8 +642,6 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
OS << "-phidef";
if (vni->hasPHIKill())
OS << "-phikill";
- if (vni->hasRedefByEC())
- OS << "-ec";
}
}
}
@@ -680,15 +681,14 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) {
// Connect to values live out of predecessors.
for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
PE = MBB->pred_end(); PI != PE; ++PI)
- if (const VNInfo *PVNI =
- LI->getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot()))
+ if (const VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(*PI)))
EqClass.join(VNI->id, PVNI->id);
} else {
// Normal value defined by an instruction. Check for two-addr redef.
// FIXME: This could be coincidental. Should we really check for a tied
// operand constraint?
// Note that VNI->def may be a use slot for an early clobber def.
- if (const VNInfo *UVNI = LI->getVNInfoAt(VNI->def.getPrevSlot()))
+ if (const VNInfo *UVNI = LI->getVNInfoBefore(VNI->def))
EqClass.join(VNI->id, UVNI->id);
}
}
@@ -716,7 +716,7 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[],
continue;
// DBG_VALUE instructions should have been eliminated earlier.
SlotIndex Idx = LIS.getInstructionIndex(MI);
- Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex();
+ Idx = Idx.getRegSlot(MO.isUse());
const VNInfo *VNI = LI.getVNInfoAt(Idx);
assert(VNI && "Interval not live at use.");
MO.setReg(LIV[getEqClass(VNI)]->reg);
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index b1e202a273d3..3ade66097cbd 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -15,31 +15,22 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "liveintervals"
+#define DEBUG_TYPE "regalloc"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "VirtRegMap.h"
#include "llvm/Value.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveVariables.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/ProcessImplicitDefs.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include <algorithm>
@@ -52,19 +43,14 @@ static cl::opt<bool> DisableReMat("disable-rematerialization",
cl::init(false), cl::Hidden);
STATISTIC(numIntervals , "Number of original intervals");
-STATISTIC(numFolds , "Number of loads/stores folded into instructions");
-STATISTIC(numSplits , "Number of intervals split");
char LiveIntervals::ID = 0;
INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals",
"Live Interval Analysis", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_DEPENDENCY(LiveVariables)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_DEPENDENCY(PHIElimination)
-INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass)
-INITIALIZE_PASS_DEPENDENCY(ProcessImplicitDefs)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(LiveIntervals, "liveintervals",
"Live Interval Analysis", false, false)
@@ -74,18 +60,8 @@ void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<AliasAnalysis>();
AU.addRequired<LiveVariables>();
AU.addPreserved<LiveVariables>();
- AU.addRequired<MachineLoopInfo>();
- AU.addPreserved<MachineLoopInfo>();
+ AU.addPreservedID(MachineLoopInfoID);
AU.addPreservedID(MachineDominatorsID);
-
- if (!StrongPHIElim) {
- AU.addPreservedID(PHIEliminationID);
- AU.addRequiredID(PHIEliminationID);
- }
-
- AU.addRequiredID(TwoAddressInstructionPassID);
- AU.addPreserved<ProcessImplicitDefs>();
- AU.addRequired<ProcessImplicitDefs>();
AU.addPreserved<SlotIndexes>();
AU.addRequiredTransitive<SlotIndexes>();
MachineFunctionPass::getAnalysisUsage(AU);
@@ -98,14 +74,12 @@ void LiveIntervals::releaseMemory() {
delete I->second;
r2iMap_.clear();
+ RegMaskSlots.clear();
+ RegMaskBits.clear();
+ RegMaskBlocks.clear();
// Release VNInfo memory regions, VNInfo objects don't need to be dtor'd.
VNInfoAllocator.Reset();
- while (!CloneMIs.empty()) {
- MachineInstr *MI = CloneMIs.back();
- CloneMIs.pop_back();
- mf_->DeleteMachineInstr(MI);
- }
}
/// runOnMachineFunction - Register allocate the whole function
@@ -120,6 +94,7 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
lv_ = &getAnalysis<LiveVariables>();
indexes_ = &getAnalysis<SlotIndexes>();
allocatableRegs_ = tri_->getAllocatableSet(fn);
+ reservedRegs_ = tri_->getReservedRegs(fn);
computeIntervals();
@@ -132,10 +107,21 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
/// print - Implement the dump method.
void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
OS << "********** INTERVALS **********\n";
- for (const_iterator I = begin(), E = end(); I != E; ++I) {
- I->second->print(OS, tri_);
- OS << "\n";
- }
+
+ // Dump the physregs.
+ for (unsigned Reg = 1, RegE = tri_->getNumRegs(); Reg != RegE; ++Reg)
+ if (const LiveInterval *LI = r2iMap_.lookup(Reg)) {
+ LI->print(OS, tri_);
+ OS << '\n';
+ }
+
+ // Dump the virtregs.
+ for (unsigned Reg = 0, RegE = mri_->getNumVirtRegs(); Reg != RegE; ++Reg)
+ if (const LiveInterval *LI =
+ r2iMap_.lookup(TargetRegisterInfo::index2VirtReg(Reg))) {
+ LI->print(OS, tri_);
+ OS << '\n';
+ }
printInstrs(OS);
}
@@ -149,103 +135,6 @@ void LiveIntervals::dumpInstrs() const {
printInstrs(dbgs());
}
-bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li,
- VirtRegMap &vrm, unsigned reg) {
- // We don't handle fancy stuff crossing basic block boundaries
- if (li.ranges.size() != 1)
- return true;
- const LiveRange &range = li.ranges.front();
- SlotIndex idx = range.start.getBaseIndex();
- SlotIndex end = range.end.getPrevSlot().getBaseIndex().getNextIndex();
-
- // Skip deleted instructions
- MachineInstr *firstMI = getInstructionFromIndex(idx);
- while (!firstMI && idx != end) {
- idx = idx.getNextIndex();
- firstMI = getInstructionFromIndex(idx);
- }
- if (!firstMI)
- return false;
-
- // Find last instruction in range
- SlotIndex lastIdx = end.getPrevIndex();
- MachineInstr *lastMI = getInstructionFromIndex(lastIdx);
- while (!lastMI && lastIdx != idx) {
- lastIdx = lastIdx.getPrevIndex();
- lastMI = getInstructionFromIndex(lastIdx);
- }
- if (!lastMI)
- return false;
-
- // Range cannot cross basic block boundaries or terminators
- MachineBasicBlock *MBB = firstMI->getParent();
- if (MBB != lastMI->getParent() || lastMI->getDesc().isTerminator())
- return true;
-
- MachineBasicBlock::const_iterator E = lastMI;
- ++E;
- for (MachineBasicBlock::const_iterator I = firstMI; I != E; ++I) {
- const MachineInstr &MI = *I;
-
- // Allow copies to and from li.reg
- if (MI.isCopy())
- if (MI.getOperand(0).getReg() == li.reg ||
- MI.getOperand(1).getReg() == li.reg)
- continue;
-
- // Check for operands using reg
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand& mop = MI.getOperand(i);
- if (!mop.isReg())
- continue;
- unsigned PhysReg = mop.getReg();
- if (PhysReg == 0 || PhysReg == li.reg)
- continue;
- if (TargetRegisterInfo::isVirtualRegister(PhysReg)) {
- if (!vrm.hasPhys(PhysReg))
- continue;
- PhysReg = vrm.getPhys(PhysReg);
- }
- if (PhysReg && tri_->regsOverlap(PhysReg, reg))
- return true;
- }
- }
-
- // No conflicts found.
- return false;
-}
-
-bool LiveIntervals::conflictsWithAliasRef(LiveInterval &li, unsigned Reg,
- SmallPtrSet<MachineInstr*,32> &JoinedCopies) {
- for (LiveInterval::Ranges::const_iterator
- I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
- for (SlotIndex index = I->start.getBaseIndex(),
- end = I->end.getPrevSlot().getBaseIndex().getNextIndex();
- index != end;
- index = index.getNextIndex()) {
- MachineInstr *MI = getInstructionFromIndex(index);
- if (!MI)
- continue; // skip deleted instructions
-
- if (JoinedCopies.count(MI))
- continue;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand& MO = MI->getOperand(i);
- if (!MO.isReg())
- continue;
- unsigned PhysReg = MO.getReg();
- if (PhysReg == 0 || PhysReg == Reg ||
- TargetRegisterInfo::isVirtualRegister(PhysReg))
- continue;
- if (tri_->regsOverlap(Reg, PhysReg))
- return true;
- }
- }
- }
-
- return false;
-}
-
static
bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) {
unsigned Reg = MI.getOperand(MOIdx).getReg();
@@ -271,9 +160,9 @@ bool LiveIntervals::isPartialRedef(SlotIndex MIIdx, MachineOperand &MO,
if (!MO.getSubReg() || MO.isEarlyClobber())
return false;
- SlotIndex RedefIndex = MIIdx.getDefIndex();
+ SlotIndex RedefIndex = MIIdx.getRegSlot();
const LiveRange *OldLR =
- interval.getLiveRangeContaining(RedefIndex.getUseIndex());
+ interval.getLiveRangeContaining(RedefIndex.getRegSlot(true));
MachineInstr *DefMI = getInstructionFromIndex(OldLR->valno->def);
if (DefMI != 0) {
return DefMI->findRegisterDefOperandIdx(interval.reg) != -1;
@@ -296,34 +185,13 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg);
if (interval.empty()) {
// Get the Idx of the defining instructions.
- SlotIndex defIndex = MIIdx.getDefIndex();
- // Earlyclobbers move back one, so that they overlap the live range
- // of inputs.
- if (MO.isEarlyClobber())
- defIndex = MIIdx.getUseIndex();
-
- // Make sure the first definition is not a partial redefinition. Add an
- // <imp-def> of the full register.
- // FIXME: LiveIntervals shouldn't modify the code like this. Whoever
- // created the machine instruction should annotate it with <undef> flags
- // as needed. Then we can simply assert here. The REG_SEQUENCE lowering
- // is the main suspect.
- if (MO.getSubReg()) {
- mi->addRegisterDefined(interval.reg);
- // Mark all defs of interval.reg on this instruction as reading <undef>.
- for (unsigned i = MOIdx, e = mi->getNumOperands(); i != e; ++i) {
- MachineOperand &MO2 = mi->getOperand(i);
- if (MO2.isReg() && MO2.getReg() == interval.reg && MO2.getSubReg())
- MO2.setIsUndef();
- }
- }
+ SlotIndex defIndex = MIIdx.getRegSlot(MO.isEarlyClobber());
- MachineInstr *CopyMI = NULL;
- if (mi->isCopyLike()) {
- CopyMI = mi;
- }
+ // Make sure the first definition is not a partial redefinition.
+ assert(!MO.readsReg() && "First def cannot also read virtual register "
+ "missing <undef> flag?");
- VNInfo *ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator);
+ VNInfo *ValNo = interval.getNextValue(defIndex, VNInfoAllocator);
assert(ValNo->id == 0 && "First value in interval is not 0?");
// Loop over all of the blocks that the vreg is defined in. There are
@@ -334,9 +202,9 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
// FIXME: what about dead vars?
SlotIndex killIdx;
if (vi.Kills[0] != mi)
- killIdx = getInstructionIndex(vi.Kills[0]).getDefIndex();
+ killIdx = getInstructionIndex(vi.Kills[0]).getRegSlot();
else
- killIdx = defIndex.getStoreIndex();
+ killIdx = defIndex.getDeadSlot();
// If the kill happens after the definition, we have an intra-block
// live range.
@@ -384,14 +252,14 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) {
MachineInstr *Kill = vi.Kills[i];
SlotIndex Start = getMBBStartIdx(Kill->getParent());
- SlotIndex killIdx = getInstructionIndex(Kill).getDefIndex();
+ SlotIndex killIdx = getInstructionIndex(Kill).getRegSlot();
// Create interval with one of a NEW value number. Note that this value
// number isn't actually defined by an instruction, weird huh? :)
if (PHIJoin) {
assert(getInstructionFromIndex(Start) == 0 &&
"PHI def index points at actual instruction.");
- ValNo = interval.getNextValue(Start, 0, VNInfoAllocator);
+ ValNo = interval.getNextValue(Start, VNInfoAllocator);
ValNo->setIsPHIDef(true);
}
LiveRange LR(Start, killIdx, ValNo);
@@ -422,14 +290,12 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
// are actually two values in the live interval. Because of this we
// need to take the LiveRegion that defines this register and split it
// into two values.
- SlotIndex RedefIndex = MIIdx.getDefIndex();
- if (MO.isEarlyClobber())
- RedefIndex = MIIdx.getUseIndex();
+ SlotIndex RedefIndex = MIIdx.getRegSlot(MO.isEarlyClobber());
const LiveRange *OldLR =
- interval.getLiveRangeContaining(RedefIndex.getUseIndex());
+ interval.getLiveRangeContaining(RedefIndex.getRegSlot(true));
VNInfo *OldValNo = OldLR->valno;
- SlotIndex DefIndex = OldValNo->def.getDefIndex();
+ SlotIndex DefIndex = OldValNo->def.getRegSlot();
// Delete the previous value, which should be short and continuous,
// because the 2-addr copy must be in the same MBB as the redef.
@@ -440,12 +306,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
VNInfo *ValNo = interval.createValueCopy(OldValNo, VNInfoAllocator);
// Value#0 is now defined by the 2-addr instruction.
- OldValNo->def = RedefIndex;
- OldValNo->setCopy(0);
-
- // A re-def may be a copy. e.g. %reg1030:6<def> = VMOVD %reg1026, ...
- if (PartReDef && mi->isCopyLike())
- OldValNo->setCopy(&*mi);
+ OldValNo->def = RedefIndex;
// Add the new live interval which replaces the range for the input copy.
LiveRange LR(DefIndex, RedefIndex, ValNo);
@@ -455,7 +316,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
// If this redefinition is dead, we need to add a dummy unit live
// range covering the def slot.
if (MO.isDead())
- interval.addRange(LiveRange(RedefIndex, RedefIndex.getStoreIndex(),
+ interval.addRange(LiveRange(RedefIndex, RedefIndex.getDeadSlot(),
OldValNo));
DEBUG({
@@ -467,15 +328,11 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
// live until the end of the block. We've already taken care of the
// rest of the live range.
- SlotIndex defIndex = MIIdx.getDefIndex();
+ SlotIndex defIndex = MIIdx.getRegSlot();
if (MO.isEarlyClobber())
- defIndex = MIIdx.getUseIndex();
+ defIndex = MIIdx.getRegSlot(true);
- VNInfo *ValNo;
- MachineInstr *CopyMI = NULL;
- if (mi->isCopyLike())
- CopyMI = mi;
- ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator);
+ VNInfo *ValNo = interval.getNextValue(defIndex, VNInfoAllocator);
SlotIndex killIndex = getMBBEndIdx(mbb);
LiveRange LR(defIndex, killIndex, ValNo);
@@ -490,21 +347,26 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
DEBUG(dbgs() << '\n');
}
+static bool isRegLiveIntoSuccessor(const MachineBasicBlock *MBB, unsigned Reg) {
+ for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end();
+ SI != SE; ++SI) {
+ const MachineBasicBlock* succ = *SI;
+ if (succ->isLiveIn(Reg))
+ return true;
+ }
+ return false;
+}
+
void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
MachineBasicBlock::iterator mi,
SlotIndex MIIdx,
MachineOperand& MO,
- LiveInterval &interval,
- MachineInstr *CopyMI) {
- // A physical register cannot be live across basic block, so its
- // lifetime must end somewhere in its defining basic block.
+ LiveInterval &interval) {
DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_));
SlotIndex baseIndex = MIIdx;
- SlotIndex start = baseIndex.getDefIndex();
- // Earlyclobbers move back one.
- if (MO.isEarlyClobber())
- start = MIIdx.getUseIndex();
+ SlotIndex start = baseIndex.getRegSlot(MO.isEarlyClobber());
SlotIndex end = start;
// If it is not used after definition, it is considered dead at
@@ -514,7 +376,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
// advance below compensates.
if (MO.isDead()) {
DEBUG(dbgs() << " dead");
- end = start.getStoreIndex();
+ end = start.getDeadSlot();
goto exit;
}
@@ -531,21 +393,21 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
if (mi->killsRegister(interval.reg, tri_)) {
DEBUG(dbgs() << " killed");
- end = baseIndex.getDefIndex();
+ end = baseIndex.getRegSlot();
goto exit;
} else {
int DefIdx = mi->findRegisterDefOperandIdx(interval.reg,false,false,tri_);
if (DefIdx != -1) {
if (mi->isRegTiedToUseOperand(DefIdx)) {
// Two-address instruction.
- end = baseIndex.getDefIndex();
+ end = baseIndex.getRegSlot(mi->getOperand(DefIdx).isEarlyClobber());
} else {
// Another instruction redefines the register before it is ever read.
// Then the register is essentially dead at the instruction that
// defines it. Hence its interval is:
// [defSlot(def), defSlot(def)+1)
DEBUG(dbgs() << " dead");
- end = start.getStoreIndex();
+ end = start.getDeadSlot();
}
goto exit;
}
@@ -554,12 +416,19 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
baseIndex = baseIndex.getNextIndex();
}
- // The only case we should have a dead physreg here without a killing or
- // instruction where we know it's dead is if it is live-in to the function
- // and never used. Another possible case is the implicit use of the
- // physical register has been deleted by two-address pass.
- end = start.getStoreIndex();
+ // If we get here the register *should* be live out.
+ assert(!isAllocatable(interval.reg) && "Physregs shouldn't be live out!");
+ // FIXME: We need saner rules for reserved regs.
+ if (isReserved(interval.reg)) {
+ end = start.getDeadSlot();
+ } else {
+ // Unreserved, unallocable registers like EFLAGS can be live across basic
+ // block boundaries.
+ assert(isRegLiveIntoSuccessor(MBB, interval.reg) &&
+ "Unreserved reg not live-out?");
+ end = getMBBEndIdx(MBB);
+ }
exit:
assert(start < end && "did not find end of interval?");
@@ -567,9 +436,7 @@ exit:
VNInfo *ValNo = interval.getVNInfoAt(start);
bool Extend = ValNo != 0;
if (!Extend)
- ValNo = interval.getNextValue(start, CopyMI, VNInfoAllocator);
- if (Extend && MO.isEarlyClobber())
- ValNo->setHasRedefByEC(true);
+ ValNo = interval.getNextValue(start, VNInfoAllocator);
LiveRange LR(start, end, ValNo);
interval.addRange(LR);
DEBUG(dbgs() << " +" << LR << '\n');
@@ -583,18 +450,20 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
handleVirtualRegisterDef(MBB, MI, MIIdx, MO, MOIdx,
getOrCreateInterval(MO.getReg()));
- else {
- MachineInstr *CopyMI = NULL;
- if (MI->isCopyLike())
- CopyMI = MI;
+ else
handlePhysicalRegisterDef(MBB, MI, MIIdx, MO,
- getOrCreateInterval(MO.getReg()), CopyMI);
- }
+ getOrCreateInterval(MO.getReg()));
}
void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
SlotIndex MIIdx,
- LiveInterval &interval, bool isAlias) {
+ LiveInterval &interval) {
+ assert(TargetRegisterInfo::isPhysicalRegister(interval.reg) &&
+ "Only physical registers can be live in.");
+ assert((!isAllocatable(interval.reg) || MBB->getParent()->begin() ||
+ MBB->isLandingPad()) &&
+ "Allocatable live-ins only valid for entry blocks and landing pads.");
+
DEBUG(dbgs() << "\t\tlivein register: " << PrintReg(interval.reg, tri_));
// Look for kills, if it reaches a def before it's killed, then it shouldn't
@@ -621,16 +490,16 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
while (mi != E) {
if (mi->killsRegister(interval.reg, tri_)) {
DEBUG(dbgs() << " killed");
- end = baseIndex.getDefIndex();
+ end = baseIndex.getRegSlot();
SeenDefUse = true;
break;
- } else if (mi->definesRegister(interval.reg, tri_)) {
+ } else if (mi->modifiesRegister(interval.reg, tri_)) {
// Another instruction redefines the register before it is ever read.
// Then the register is essentially dead at the instruction that defines
// it. Hence its interval is:
// [defSlot(def), defSlot(def)+1)
DEBUG(dbgs() << " dead");
- end = start.getStoreIndex();
+ end = start.getDeadSlot();
SeenDefUse = true;
break;
}
@@ -644,10 +513,16 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
// Live-in register might not be used at all.
if (!SeenDefUse) {
- if (isAlias) {
+ if (isAllocatable(interval.reg) ||
+ !isRegLiveIntoSuccessor(MBB, interval.reg)) {
+ // Allocatable registers are never live through.
+ // Non-allocatable registers that aren't live into any successors also
+ // aren't live through.
DEBUG(dbgs() << " dead");
- end = MIIdx.getStoreIndex();
+ return;
} else {
+ // If we get here the register is non-allocatable and live into some
+ // successor. We'll conservatively assume it's live-through.
DEBUG(dbgs() << " live through");
end = getMBBEndIdx(MBB);
}
@@ -656,8 +531,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
SlotIndex defIdx = getMBBStartIdx(MBB);
assert(getInstructionFromIndex(defIdx) == 0 &&
"PHI def index points at actual instruction.");
- VNInfo *vni =
- interval.getNextValue(defIdx, 0, VNInfoAllocator);
+ VNInfo *vni = interval.getNextValue(defIdx, VNInfoAllocator);
vni->setIsPHIDef(true);
LiveRange LR(start, end, vni);
@@ -674,10 +548,14 @@ void LiveIntervals::computeIntervals() {
<< "********** Function: "
<< ((Value*)mf_->getFunction())->getName() << '\n');
+ RegMaskBlocks.resize(mf_->getNumBlockIDs());
+
SmallVector<unsigned, 8> UndefUses;
for (MachineFunction::iterator MBBI = mf_->begin(), E = mf_->end();
MBBI != E; ++MBBI) {
MachineBasicBlock *MBB = MBBI;
+ RegMaskBlocks[MBB->getNumber()].first = RegMaskSlots.size();
+
if (MBB->empty())
continue;
@@ -690,11 +568,6 @@ void LiveIntervals::computeIntervals() {
for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(),
LE = MBB->livein_end(); LI != LE; ++LI) {
handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*LI));
- // Multiple live-ins can alias the same register.
- for (const unsigned* AS = tri_->getSubRegisters(*LI); *AS; ++AS)
- if (!hasInterval(*AS))
- handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*AS),
- true);
}
// Skip over empty initial indices.
@@ -706,10 +579,20 @@ void LiveIntervals::computeIntervals() {
DEBUG(dbgs() << MIIndex << "\t" << *MI);
if (MI->isDebugValue())
continue;
+ assert(indexes_->getInstructionFromIndex(MIIndex) == MI &&
+ "Lost SlotIndex synchronization");
// Handle defs.
for (int i = MI->getNumOperands() - 1; i >= 0; --i) {
MachineOperand &MO = MI->getOperand(i);
+
+ // Collect register masks.
+ if (MO.isRegMask()) {
+ RegMaskSlots.push_back(MIIndex.getRegSlot());
+ RegMaskBits.push_back(MO.getRegMask());
+ continue;
+ }
+
if (!MO.isReg() || !MO.getReg())
continue;
@@ -723,6 +606,10 @@ void LiveIntervals::computeIntervals() {
// Move to the next instr slot.
MIIndex = indexes_->getNextNonNullIndex(MIIndex);
}
+
+ // Compute the number of register mask instructions in this block.
+ std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB->getNumber()];
+ RMB.second = RegMaskSlots.size() - RMB.first;;
}
// Create empty intervals for registers defined by implicit_def's (except
@@ -754,7 +641,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
SmallVectorImpl<MachineInstr*> *dead) {
DEBUG(dbgs() << "Shrink: " << *li << '\n');
assert(TargetRegisterInfo::isVirtualRegister(li->reg)
- && "Can't only shrink physical registers");
+ && "Can only shrink virtual registers");
// Find all the values used, including PHI kills.
SmallVector<std::pair<SlotIndex, VNInfo*>, 16> WorkList;
@@ -766,8 +653,10 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
MachineInstr *UseMI = I.skipInstruction();) {
if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg))
continue;
- SlotIndex Idx = getInstructionIndex(UseMI).getUseIndex();
- VNInfo *VNI = li->getVNInfoAt(Idx);
+ SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot();
+ // Note: This intentionally picks up the wrong VNI in case of an EC redef.
+ // See below.
+ VNInfo *VNI = li->getVNInfoBefore(Idx);
if (!VNI) {
// This shouldn't happen: readsVirtualRegister returns true, but there is
// no live value. It is likely caused by a target getting <undef> flags
@@ -777,11 +666,12 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
<< *li << '\n');
continue;
}
- if (VNI->def == Idx) {
- // Special case: An early-clobber tied operand reads and writes the
- // register one slot early.
- Idx = Idx.getPrevSlot();
- VNI = li->getVNInfoAt(Idx);
+ // Special case: An early-clobber tied operand reads and writes the
+ // register one slot early. The getVNInfoBefore call above would have
+ // picked up the value defined by UseMI. Adjust the kill slot and value.
+ if (SlotIndex::isSameInstr(VNI->def, Idx)) {
+ Idx = VNI->def;
+ VNI = li->getVNInfoBefore(Idx);
assert(VNI && "Early-clobber tied value not available");
}
WorkList.push_back(std::make_pair(Idx, VNI));
@@ -794,14 +684,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
VNInfo *VNI = *I;
if (VNI->isUnused())
continue;
- NewLI.addRange(LiveRange(VNI->def, VNI->def.getNextSlot(), VNI));
-
- // A use tied to an early-clobber def ends at the load slot and isn't caught
- // above. Catch it here instead. This probably only ever happens for inline
- // assembly.
- if (VNI->def.isUse())
- if (VNInfo *UVNI = li->getVNInfoAt(VNI->def.getLoadIndex()))
- WorkList.push_back(std::make_pair(VNI->def.getLoadIndex(), UVNI));
+ NewLI.addRange(LiveRange(VNI->def, VNI->def.getDeadSlot(), VNI));
}
// Keep track of the PHIs that are in use.
@@ -812,11 +695,11 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
SlotIndex Idx = WorkList.back().first;
VNInfo *VNI = WorkList.back().second;
WorkList.pop_back();
- const MachineBasicBlock *MBB = getMBBFromIndex(Idx);
+ const MachineBasicBlock *MBB = getMBBFromIndex(Idx.getPrevSlot());
SlotIndex BlockStart = getMBBStartIdx(MBB);
// Extend the live range for VNI to be live at Idx.
- if (VNInfo *ExtVNI = NewLI.extendInBlock(BlockStart, Idx.getNextSlot())) {
+ if (VNInfo *ExtVNI = NewLI.extendInBlock(BlockStart, Idx)) {
(void)ExtVNI;
assert(ExtVNI == VNI && "Unexpected existing value number");
// Is this a PHIDef we haven't seen before?
@@ -827,9 +710,9 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
PE = MBB->pred_end(); PI != PE; ++PI) {
if (!LiveOut.insert(*PI))
continue;
- SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot();
+ SlotIndex Stop = getMBBEndIdx(*PI);
// A predecessor is not required to have a live-out value for a PHI.
- if (VNInfo *PVNI = li->getVNInfoAt(Stop))
+ if (VNInfo *PVNI = li->getVNInfoBefore(Stop))
WorkList.push_back(std::make_pair(Stop, PVNI));
}
continue;
@@ -837,15 +720,16 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
// VNI is live-in to MBB.
DEBUG(dbgs() << " live-in at " << BlockStart << '\n');
- NewLI.addRange(LiveRange(BlockStart, Idx.getNextSlot(), VNI));
+ NewLI.addRange(LiveRange(BlockStart, Idx, VNI));
// Make sure VNI is live-out from the predecessors.
for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
PE = MBB->pred_end(); PI != PE; ++PI) {
if (!LiveOut.insert(*PI))
continue;
- SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot();
- assert(li->getVNInfoAt(Stop) == VNI && "Wrong value out of predecessor");
+ SlotIndex Stop = getMBBEndIdx(*PI);
+ assert(li->getVNInfoBefore(Stop) == VNI &&
+ "Wrong value out of predecessor");
WorkList.push_back(std::make_pair(Stop, VNI));
}
}
@@ -859,7 +743,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
continue;
LiveInterval::iterator LII = NewLI.FindLiveRangeContaining(VNI->def);
assert(LII != NewLI.end() && "Missing live range for PHI");
- if (LII->end != VNI->def.getNextSlot())
+ if (LII->end != VNI->def.getDeadSlot())
continue;
if (VNI->isPHIDef()) {
// This is a dead PHI. Remove it.
@@ -890,28 +774,6 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
// Register allocator hooks.
//
-MachineBasicBlock::iterator
-LiveIntervals::getLastSplitPoint(const LiveInterval &li,
- MachineBasicBlock *mbb) const {
- const MachineBasicBlock *lpad = mbb->getLandingPadSuccessor();
-
- // If li is not live into a landing pad, we can insert spill code before the
- // first terminator.
- if (!lpad || !isLiveInToMBB(li, lpad))
- return mbb->getFirstTerminator();
-
- // When there is a landing pad, spill code must go before the call instruction
- // that can throw.
- MachineBasicBlock::iterator I = mbb->end(), B = mbb->begin();
- while (I != B) {
- --I;
- if (I->getDesc().isCall())
- return I;
- }
- // The block contains no calls that can throw, so use the first terminator.
- return mbb->getFirstTerminator();
-}
-
void LiveIntervals::addKillFlags() {
for (iterator I = begin(), E = end(); I != E; ++I) {
unsigned Reg = I->first;
@@ -924,8 +786,8 @@ void LiveIntervals::addKillFlags() {
// Every instruction that kills Reg corresponds to a live range end point.
for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE;
++RI) {
- // A LOAD index indicates an MBB edge.
- if (RI->end.isLoad())
+ // A block index indicates an MBB edge.
+ if (RI->end.isBlock())
continue;
MachineInstr *MI = getInstructionFromIndex(RI->end);
if (!MI)
@@ -949,16 +811,10 @@ unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li,
if (Reg == 0 || Reg == li.reg)
continue;
- if (TargetRegisterInfo::isPhysicalRegister(Reg) &&
- !allocatableRegs_[Reg])
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isAllocatable(Reg))
continue;
- // FIXME: For now, only remat MI with at most one register operand.
- assert(!RegOp &&
- "Can't rematerialize instruction with multiple register operand!");
RegOp = MO.getReg();
-#ifndef NDEBUG
- break;
-#endif
+ break; // Found vreg operand - leave the loop.
}
return RegOp;
}
@@ -1011,14 +867,6 @@ LiveIntervals::isReMaterializable(const LiveInterval &li,
return true;
}
-/// isReMaterializable - Returns true if the definition MI of the specified
-/// val# of the specified interval is re-materializable.
-bool LiveIntervals::isReMaterializable(const LiveInterval &li,
- const VNInfo *ValNo, MachineInstr *MI) {
- bool Dummy2;
- return isReMaterializable(li, ValNo, MI, 0, Dummy2);
-}
-
/// isReMaterializable - Returns true if every definition of MI of every
/// val# of the specified interval is re-materializable.
bool
@@ -1044,1141 +892,653 @@ LiveIntervals::isReMaterializable(const LiveInterval &li,
return true;
}
-/// FilterFoldedOps - Filter out two-address use operands. Return
-/// true if it finds any issue with the operands that ought to prevent
-/// folding.
-static bool FilterFoldedOps(MachineInstr *MI,
- SmallVector<unsigned, 2> &Ops,
- unsigned &MRInfo,
- SmallVector<unsigned, 2> &FoldOps) {
- MRInfo = 0;
- for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
- unsigned OpIdx = Ops[i];
- MachineOperand &MO = MI->getOperand(OpIdx);
- // FIXME: fold subreg use.
- if (MO.getSubReg())
- return true;
- if (MO.isDef())
- MRInfo |= (unsigned)VirtRegMap::isMod;
- else {
- // Filter out two-address use operand(s).
- if (MI->isRegTiedToDefOperand(OpIdx)) {
- MRInfo = VirtRegMap::isModRef;
- continue;
- }
- MRInfo |= (unsigned)VirtRegMap::isRef;
- }
- FoldOps.push_back(OpIdx);
- }
- return false;
+MachineBasicBlock*
+LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const {
+ // A local live range must be fully contained inside the block, meaning it is
+ // defined and killed at instructions, not at block boundaries. It is not
+ // live in or or out of any block.
+ //
+ // It is technically possible to have a PHI-defined live range identical to a
+ // single block, but we are going to return false in that case.
+
+ SlotIndex Start = LI.beginIndex();
+ if (Start.isBlock())
+ return NULL;
+
+ SlotIndex Stop = LI.endIndex();
+ if (Stop.isBlock())
+ return NULL;
+
+ // getMBBFromIndex doesn't need to search the MBB table when both indexes
+ // belong to proper instructions.
+ MachineBasicBlock *MBB1 = indexes_->getMBBFromIndex(Start);
+ MachineBasicBlock *MBB2 = indexes_->getMBBFromIndex(Stop);
+ return MBB1 == MBB2 ? MBB1 : NULL;
}
+float
+LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) {
+ // Limit the loop depth ridiculousness.
+ if (loopDepth > 200)
+ loopDepth = 200;
-/// tryFoldMemoryOperand - Attempts to fold either a spill / restore from
-/// slot / to reg or any rematerialized load into ith operand of specified
-/// MI. If it is successul, MI is updated with the newly created MI and
-/// returns true.
-bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI,
- VirtRegMap &vrm, MachineInstr *DefMI,
- SlotIndex InstrIdx,
- SmallVector<unsigned, 2> &Ops,
- bool isSS, int Slot, unsigned Reg) {
- // If it is an implicit def instruction, just delete it.
- if (MI->isImplicitDef()) {
- RemoveMachineInstrFromMaps(MI);
- vrm.RemoveMachineInstrFromMaps(MI);
- MI->eraseFromParent();
- ++numFolds;
- return true;
- }
+ // The loop depth is used to roughly estimate the number of times the
+ // instruction is executed. Something like 10^d is simple, but will quickly
+ // overflow a float. This expression behaves like 10^d for small d, but is
+ // more tempered for large d. At d=200 we get 6.7e33 which leaves a bit of
+ // headroom before overflow.
+ // By the way, powf() might be unavailable here. For consistency,
+ // We may take pow(double,double).
+ float lc = std::pow(1 + (100.0 / (loopDepth + 10)), (double)loopDepth);
- // Filter the list of operand indexes that are to be folded. Abort if
- // any operand will prevent folding.
- unsigned MRInfo = 0;
- SmallVector<unsigned, 2> FoldOps;
- if (FilterFoldedOps(MI, Ops, MRInfo, FoldOps))
- return false;
+ return (isDef + isUse) * lc;
+}
- // The only time it's safe to fold into a two address instruction is when
- // it's folding reload and spill from / into a spill stack slot.
- if (DefMI && (MRInfo & VirtRegMap::isMod))
- return false;
+LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
+ MachineInstr* startInst) {
+ LiveInterval& Interval = getOrCreateInterval(reg);
+ VNInfo* VN = Interval.getNextValue(
+ SlotIndex(getInstructionIndex(startInst).getRegSlot()),
+ getVNInfoAllocator());
+ VN->setHasPHIKill(true);
+ LiveRange LR(
+ SlotIndex(getInstructionIndex(startInst).getRegSlot()),
+ getMBBEndIdx(startInst->getParent()), VN);
+ Interval.addRange(LR);
- MachineInstr *fmi = isSS ? tii_->foldMemoryOperand(MI, FoldOps, Slot)
- : tii_->foldMemoryOperand(MI, FoldOps, DefMI);
- if (fmi) {
- // Remember this instruction uses the spill slot.
- if (isSS) vrm.addSpillSlotUse(Slot, fmi);
-
- // Attempt to fold the memory reference into the instruction. If
- // we can do this, we don't need to insert spill code.
- if (isSS && !mf_->getFrameInfo()->isImmutableObjectIndex(Slot))
- vrm.virtFolded(Reg, MI, fmi, (VirtRegMap::ModRef)MRInfo);
- vrm.transferSpillPts(MI, fmi);
- vrm.transferRestorePts(MI, fmi);
- vrm.transferEmergencySpills(MI, fmi);
- ReplaceMachineInstrInMaps(MI, fmi);
- MI->eraseFromParent();
- MI = fmi;
- ++numFolds;
- return true;
- }
- return false;
+ return LR;
}
-/// canFoldMemoryOperand - Returns true if the specified load / store
-/// folding is possible.
-bool LiveIntervals::canFoldMemoryOperand(MachineInstr *MI,
- SmallVector<unsigned, 2> &Ops,
- bool ReMat) const {
- // Filter the list of operand indexes that are to be folded. Abort if
- // any operand will prevent folding.
- unsigned MRInfo = 0;
- SmallVector<unsigned, 2> FoldOps;
- if (FilterFoldedOps(MI, Ops, MRInfo, FoldOps))
+
+//===----------------------------------------------------------------------===//
+// Register mask functions
+//===----------------------------------------------------------------------===//
+
+bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI,
+ BitVector &UsableRegs) {
+ if (LI.empty())
return false;
+ LiveInterval::iterator LiveI = LI.begin(), LiveE = LI.end();
+
+ // Use a smaller arrays for local live ranges.
+ ArrayRef<SlotIndex> Slots;
+ ArrayRef<const uint32_t*> Bits;
+ if (MachineBasicBlock *MBB = intervalIsInOneMBB(LI)) {
+ Slots = getRegMaskSlotsInBlock(MBB->getNumber());
+ Bits = getRegMaskBitsInBlock(MBB->getNumber());
+ } else {
+ Slots = getRegMaskSlots();
+ Bits = getRegMaskBits();
+ }
- // It's only legal to remat for a use, not a def.
- if (ReMat && (MRInfo & VirtRegMap::isMod))
+ // We are going to enumerate all the register mask slots contained in LI.
+ // Start with a binary search of RegMaskSlots to find a starting point.
+ ArrayRef<SlotIndex>::iterator SlotI =
+ std::lower_bound(Slots.begin(), Slots.end(), LiveI->start);
+ ArrayRef<SlotIndex>::iterator SlotE = Slots.end();
+
+ // No slots in range, LI begins after the last call.
+ if (SlotI == SlotE)
return false;
- return tii_->canFoldMemoryOperand(MI, FoldOps);
+ bool Found = false;
+ for (;;) {
+ assert(*SlotI >= LiveI->start);
+ // Loop over all slots overlapping this segment.
+ while (*SlotI < LiveI->end) {
+ // *SlotI overlaps LI. Collect mask bits.
+ if (!Found) {
+ // This is the first overlap. Initialize UsableRegs to all ones.
+ UsableRegs.clear();
+ UsableRegs.resize(tri_->getNumRegs(), true);
+ Found = true;
+ }
+ // Remove usable registers clobbered by this mask.
+ UsableRegs.clearBitsNotInMask(Bits[SlotI-Slots.begin()]);
+ if (++SlotI == SlotE)
+ return Found;
+ }
+ // *SlotI is beyond the current LI segment.
+ LiveI = LI.advanceTo(LiveI, *SlotI);
+ if (LiveI == LiveE)
+ return Found;
+ // Advance SlotI until it overlaps.
+ while (*SlotI < LiveI->start)
+ if (++SlotI == SlotE)
+ return Found;
+ }
}
-bool LiveIntervals::intervalIsInOneMBB(const LiveInterval &li) const {
- LiveInterval::Ranges::const_iterator itr = li.ranges.begin();
+//===----------------------------------------------------------------------===//
+// IntervalUpdate class.
+//===----------------------------------------------------------------------===//
- MachineBasicBlock *mbb = indexes_->getMBBCoveringRange(itr->start, itr->end);
+// HMEditor is a toolkit used by handleMove to trim or extend live intervals.
+class LiveIntervals::HMEditor {
+private:
+ LiveIntervals& LIS;
+ const MachineRegisterInfo& MRI;
+ const TargetRegisterInfo& TRI;
+ SlotIndex NewIdx;
+
+ typedef std::pair<LiveInterval*, LiveRange*> IntRangePair;
+ typedef DenseSet<IntRangePair> RangeSet;
+
+ struct RegRanges {
+ LiveRange* Use;
+ LiveRange* EC;
+ LiveRange* Dead;
+ LiveRange* Def;
+ RegRanges() : Use(0), EC(0), Dead(0), Def(0) {}
+ };
+ typedef DenseMap<unsigned, RegRanges> BundleRanges;
+
+public:
+ HMEditor(LiveIntervals& LIS, const MachineRegisterInfo& MRI,
+ const TargetRegisterInfo& TRI, SlotIndex NewIdx)
+ : LIS(LIS), MRI(MRI), TRI(TRI), NewIdx(NewIdx) {}
+
+ // Update intervals for all operands of MI from OldIdx to NewIdx.
+ // This assumes that MI used to be at OldIdx, and now resides at
+ // NewIdx.
+ void moveAllRangesFrom(MachineInstr* MI, SlotIndex OldIdx) {
+ assert(NewIdx != OldIdx && "No-op move? That's a bit strange.");
+
+ // Collect the operands.
+ RangeSet Entering, Internal, Exiting;
+ bool hasRegMaskOp = false;
+ collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx);
+
+ // To keep the LiveRanges valid within an interval, move the ranges closest
+ // to the destination first. This prevents ranges from overlapping, to that
+ // APIs like removeRange still work.
+ if (NewIdx < OldIdx) {
+ moveAllEnteringFrom(OldIdx, Entering);
+ moveAllInternalFrom(OldIdx, Internal);
+ moveAllExitingFrom(OldIdx, Exiting);
+ }
+ else {
+ moveAllExitingFrom(OldIdx, Exiting);
+ moveAllInternalFrom(OldIdx, Internal);
+ moveAllEnteringFrom(OldIdx, Entering);
+ }
- if (mbb == 0)
- return false;
+ if (hasRegMaskOp)
+ updateRegMaskSlots(OldIdx);
- for (++itr; itr != li.ranges.end(); ++itr) {
- MachineBasicBlock *mbb2 =
- indexes_->getMBBCoveringRange(itr->start, itr->end);
+#ifndef NDEBUG
+ LIValidator validator;
+ std::for_each(Entering.begin(), Entering.end(), validator);
+ std::for_each(Internal.begin(), Internal.end(), validator);
+ std::for_each(Exiting.begin(), Exiting.end(), validator);
+ assert(validator.rangesOk() && "moveAllOperandsFrom broke liveness.");
+#endif
- if (mbb2 != mbb)
- return false;
}
- return true;
-}
+ // Update intervals for all operands of MI to refer to BundleStart's
+ // SlotIndex.
+ void moveAllRangesInto(MachineInstr* MI, MachineInstr* BundleStart) {
+ if (MI == BundleStart)
+ return; // Bundling instr with itself - nothing to do.
+
+ SlotIndex OldIdx = LIS.getSlotIndexes()->getInstructionIndex(MI);
+ assert(LIS.getSlotIndexes()->getInstructionFromIndex(OldIdx) == MI &&
+ "SlotIndex <-> Instruction mapping broken for MI");
+
+ // Collect all ranges already in the bundle.
+ MachineBasicBlock::instr_iterator BII(BundleStart);
+ RangeSet Entering, Internal, Exiting;
+ bool hasRegMaskOp = false;
+ collectRanges(BII, Entering, Internal, Exiting, hasRegMaskOp, NewIdx);
+ assert(!hasRegMaskOp && "Can't have RegMask operand in bundle.");
+ for (++BII; &*BII == MI || BII->isInsideBundle(); ++BII) {
+ if (&*BII == MI)
+ continue;
+ collectRanges(BII, Entering, Internal, Exiting, hasRegMaskOp, NewIdx);
+ assert(!hasRegMaskOp && "Can't have RegMask operand in bundle.");
+ }
-/// rewriteImplicitOps - Rewrite implicit use operands of MI (i.e. uses of
-/// interval on to-be re-materialized operands of MI) with new register.
-void LiveIntervals::rewriteImplicitOps(const LiveInterval &li,
- MachineInstr *MI, unsigned NewVReg,
- VirtRegMap &vrm) {
- // There is an implicit use. That means one of the other operand is
- // being remat'ed and the remat'ed instruction has li.reg as an
- // use operand. Make sure we rewrite that as well.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg())
- continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
- continue;
- if (!vrm.isReMaterialized(Reg))
- continue;
- MachineInstr *ReMatMI = vrm.getReMaterializedMI(Reg);
- MachineOperand *UseMO = ReMatMI->findRegisterUseOperand(li.reg);
- if (UseMO)
- UseMO->setReg(NewVReg);
- }
-}
+ BundleRanges BR = createBundleRanges(Entering, Internal, Exiting);
-/// rewriteInstructionForSpills, rewriteInstructionsForSpills - Helper functions
-/// for addIntervalsForSpills to rewrite uses / defs for the given live range.
-bool LiveIntervals::
-rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
- bool TrySplit, SlotIndex index, SlotIndex end,
- MachineInstr *MI,
- MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI,
- unsigned Slot, int LdSlot,
- bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete,
- VirtRegMap &vrm,
- const TargetRegisterClass* rc,
- SmallVector<int, 4> &ReMatIds,
- const MachineLoopInfo *loopInfo,
- unsigned &NewVReg, unsigned ImpUse, bool &HasDef, bool &HasUse,
- DenseMap<unsigned,unsigned> &MBBVRegsMap,
- std::vector<LiveInterval*> &NewLIs) {
- bool CanFold = false;
- RestartInstruction:
- for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
- MachineOperand& mop = MI->getOperand(i);
- if (!mop.isReg())
- continue;
- unsigned Reg = mop.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
- continue;
- if (Reg != li.reg)
- continue;
+ collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx);
+ assert(!hasRegMaskOp && "Can't have RegMask operand in bundle.");
- bool TryFold = !DefIsReMat;
- bool FoldSS = true; // Default behavior unless it's a remat.
- int FoldSlot = Slot;
- if (DefIsReMat) {
- // If this is the rematerializable definition MI itself and
- // all of its uses are rematerialized, simply delete it.
- if (MI == ReMatOrigDefMI && CanDelete) {
- DEBUG(dbgs() << "\t\t\t\tErasing re-materializable def: "
- << *MI << '\n');
- RemoveMachineInstrFromMaps(MI);
- vrm.RemoveMachineInstrFromMaps(MI);
- MI->eraseFromParent();
- break;
- }
+ DEBUG(dbgs() << "Entering: " << Entering.size() << "\n");
+ DEBUG(dbgs() << "Internal: " << Internal.size() << "\n");
+ DEBUG(dbgs() << "Exiting: " << Exiting.size() << "\n");
- // If def for this use can't be rematerialized, then try folding.
- // If def is rematerializable and it's a load, also try folding.
- TryFold = !ReMatDefMI || (ReMatDefMI && (MI == ReMatOrigDefMI || isLoad));
- if (isLoad) {
- // Try fold loads (from stack slot, constant pool, etc.) into uses.
- FoldSS = isLoadSS;
- FoldSlot = LdSlot;
- }
- }
+ moveAllEnteringFromInto(OldIdx, Entering, BR);
+ moveAllInternalFromInto(OldIdx, Internal, BR);
+ moveAllExitingFromInto(OldIdx, Exiting, BR);
- // Scan all of the operands of this instruction rewriting operands
- // to use NewVReg instead of li.reg as appropriate. We do this for
- // two reasons:
- //
- // 1. If the instr reads the same spilled vreg multiple times, we
- // want to reuse the NewVReg.
- // 2. If the instr is a two-addr instruction, we are required to
- // keep the src/dst regs pinned.
- //
- // Keep track of whether we replace a use and/or def so that we can
- // create the spill interval with the appropriate range.
- SmallVector<unsigned, 2> Ops;
- tie(HasUse, HasDef) = MI->readsWritesVirtualRegister(Reg, &Ops);
-
- // Create a new virtual register for the spill interval.
- // Create the new register now so we can map the fold instruction
- // to the new register so when it is unfolded we get the correct
- // answer.
- bool CreatedNewVReg = false;
- if (NewVReg == 0) {
- NewVReg = mri_->createVirtualRegister(rc);
- vrm.grow();
- CreatedNewVReg = true;
-
- // The new virtual register should get the same allocation hints as the
- // old one.
- std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(Reg);
- if (Hint.first || Hint.second)
- mri_->setRegAllocationHint(NewVReg, Hint.first, Hint.second);
- }
- if (!TryFold)
- CanFold = false;
- else {
- // Do not fold load / store here if we are splitting. We'll find an
- // optimal point to insert a load / store later.
- if (!TrySplit) {
- if (tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index,
- Ops, FoldSS, FoldSlot, NewVReg)) {
- // Folding the load/store can completely change the instruction in
- // unpredictable ways, rescan it from the beginning.
-
- if (FoldSS) {
- // We need to give the new vreg the same stack slot as the
- // spilled interval.
- vrm.assignVirt2StackSlot(NewVReg, FoldSlot);
- }
-
- HasUse = false;
- HasDef = false;
- CanFold = false;
- if (isNotInMIMap(MI))
- break;
- goto RestartInstruction;
- }
- } else {
- // We'll try to fold it later if it's profitable.
- CanFold = canFoldMemoryOperand(MI, Ops, DefIsReMat);
- }
- }
+#ifndef NDEBUG
+ LIValidator validator;
+ std::for_each(Entering.begin(), Entering.end(), validator);
+ std::for_each(Internal.begin(), Internal.end(), validator);
+ std::for_each(Exiting.begin(), Exiting.end(), validator);
+ assert(validator.rangesOk() && "moveAllOperandsInto broke liveness.");
+#endif
+ }
- mop.setReg(NewVReg);
- if (mop.isImplicit())
- rewriteImplicitOps(li, MI, NewVReg, vrm);
-
- // Reuse NewVReg for other reads.
- bool HasEarlyClobber = false;
- for (unsigned j = 0, e = Ops.size(); j != e; ++j) {
- MachineOperand &mopj = MI->getOperand(Ops[j]);
- mopj.setReg(NewVReg);
- if (mopj.isImplicit())
- rewriteImplicitOps(li, MI, NewVReg, vrm);
- if (mopj.isEarlyClobber())
- HasEarlyClobber = true;
- }
+private:
- if (CreatedNewVReg) {
- if (DefIsReMat) {
- vrm.setVirtIsReMaterialized(NewVReg, ReMatDefMI);
- if (ReMatIds[VNI->id] == VirtRegMap::MAX_STACK_SLOT) {
- // Each valnum may have its own remat id.
- ReMatIds[VNI->id] = vrm.assignVirtReMatId(NewVReg);
- } else {
- vrm.assignVirtReMatId(NewVReg, ReMatIds[VNI->id]);
- }
- if (!CanDelete || (HasUse && HasDef)) {
- // If this is a two-addr instruction then its use operands are
- // rematerializable but its def is not. It should be assigned a
- // stack slot.
- vrm.assignVirt2StackSlot(NewVReg, Slot);
- }
- } else {
- vrm.assignVirt2StackSlot(NewVReg, Slot);
+#ifndef NDEBUG
+ class LIValidator {
+ private:
+ DenseSet<const LiveInterval*> Checked, Bogus;
+ public:
+ void operator()(const IntRangePair& P) {
+ const LiveInterval* LI = P.first;
+ if (Checked.count(LI))
+ return;
+ Checked.insert(LI);
+ if (LI->empty())
+ return;
+ SlotIndex LastEnd = LI->begin()->start;
+ for (LiveInterval::const_iterator LRI = LI->begin(), LRE = LI->end();
+ LRI != LRE; ++LRI) {
+ const LiveRange& LR = *LRI;
+ if (LastEnd > LR.start || LR.start >= LR.end)
+ Bogus.insert(LI);
+ LastEnd = LR.end;
}
- } else if (HasUse && HasDef &&
- vrm.getStackSlot(NewVReg) == VirtRegMap::NO_STACK_SLOT) {
- // If this interval hasn't been assigned a stack slot (because earlier
- // def is a deleted remat def), do it now.
- assert(Slot != VirtRegMap::NO_STACK_SLOT);
- vrm.assignVirt2StackSlot(NewVReg, Slot);
}
- // Re-matting an instruction with virtual register use. Add the
- // register as an implicit use on the use MI.
- if (DefIsReMat && ImpUse)
- MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true));
-
- // Create a new register interval for this spill / remat.
- LiveInterval &nI = getOrCreateInterval(NewVReg);
- if (CreatedNewVReg) {
- NewLIs.push_back(&nI);
- MBBVRegsMap.insert(std::make_pair(MI->getParent()->getNumber(), NewVReg));
- if (TrySplit)
- vrm.setIsSplitFromReg(NewVReg, li.reg);
+ bool rangesOk() const {
+ return Bogus.empty();
}
+ };
+#endif
- if (HasUse) {
- if (CreatedNewVReg) {
- LiveRange LR(index.getLoadIndex(), index.getDefIndex(),
- nI.getNextValue(SlotIndex(), 0, VNInfoAllocator));
- DEBUG(dbgs() << " +" << LR);
- nI.addRange(LR);
- } else {
- // Extend the split live interval to this def / use.
- SlotIndex End = index.getDefIndex();
- LiveRange LR(nI.ranges[nI.ranges.size()-1].end, End,
- nI.getValNumInfo(nI.getNumValNums()-1));
- DEBUG(dbgs() << " +" << LR);
- nI.addRange(LR);
+ // Collect IntRangePairs for all operands of MI that may need fixing.
+ // Treat's MI's index as OldIdx (regardless of what it is in SlotIndexes'
+ // maps).
+ void collectRanges(MachineInstr* MI, RangeSet& Entering, RangeSet& Internal,
+ RangeSet& Exiting, bool& hasRegMaskOp, SlotIndex OldIdx) {
+ hasRegMaskOp = false;
+ for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end();
+ MOI != MOE; ++MOI) {
+ const MachineOperand& MO = *MOI;
+
+ if (MO.isRegMask()) {
+ hasRegMaskOp = true;
+ continue;
}
- }
- if (HasDef) {
- // An early clobber starts at the use slot, except for an early clobber
- // tied to a use operand (yes, that is a thing).
- LiveRange LR(HasEarlyClobber && !HasUse ?
- index.getUseIndex() : index.getDefIndex(),
- index.getStoreIndex(),
- nI.getNextValue(SlotIndex(), 0, VNInfoAllocator));
- DEBUG(dbgs() << " +" << LR);
- nI.addRange(LR);
- }
- DEBUG({
- dbgs() << "\t\t\t\tAdded new interval: ";
- nI.print(dbgs(), tri_);
- dbgs() << '\n';
- });
- }
- return CanFold;
-}
-bool LiveIntervals::anyKillInMBBAfterIdx(const LiveInterval &li,
- const VNInfo *VNI,
- MachineBasicBlock *MBB,
- SlotIndex Idx) const {
- return li.killedInRange(Idx.getNextSlot(), getMBBEndIdx(MBB));
-}
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue;
-/// RewriteInfo - Keep track of machine instrs that will be rewritten
-/// during spilling.
-namespace {
- struct RewriteInfo {
- SlotIndex Index;
- MachineInstr *MI;
- RewriteInfo(SlotIndex i, MachineInstr *mi) : Index(i), MI(mi) {}
- };
+ unsigned Reg = MO.getReg();
- struct RewriteInfoCompare {
- bool operator()(const RewriteInfo &LHS, const RewriteInfo &RHS) const {
- return LHS.Index < RHS.Index;
- }
- };
-}
+ // TODO: Currently we're skipping uses that are reserved or have no
+ // interval, but we're not updating their kills. This should be
+ // fixed.
+ if (!LIS.hasInterval(Reg) ||
+ (TargetRegisterInfo::isPhysicalRegister(Reg) && LIS.isReserved(Reg)))
+ continue;
-void LiveIntervals::
-rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
- LiveInterval::Ranges::const_iterator &I,
- MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI,
- unsigned Slot, int LdSlot,
- bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete,
- VirtRegMap &vrm,
- const TargetRegisterClass* rc,
- SmallVector<int, 4> &ReMatIds,
- const MachineLoopInfo *loopInfo,
- BitVector &SpillMBBs,
- DenseMap<unsigned, std::vector<SRInfo> > &SpillIdxes,
- BitVector &RestoreMBBs,
- DenseMap<unsigned, std::vector<SRInfo> > &RestoreIdxes,
- DenseMap<unsigned,unsigned> &MBBVRegsMap,
- std::vector<LiveInterval*> &NewLIs) {
- bool AllCanFold = true;
- unsigned NewVReg = 0;
- SlotIndex start = I->start.getBaseIndex();
- SlotIndex end = I->end.getPrevSlot().getBaseIndex().getNextIndex();
-
- // First collect all the def / use in this live range that will be rewritten.
- // Make sure they are sorted according to instruction index.
- std::vector<RewriteInfo> RewriteMIs;
- for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(li.reg),
- re = mri_->reg_end(); ri != re; ) {
- MachineInstr *MI = &*ri;
- MachineOperand &O = ri.getOperand();
- ++ri;
- if (MI->isDebugValue()) {
- // Modify DBG_VALUE now that the value is in a spill slot.
- if (Slot != VirtRegMap::MAX_STACK_SLOT || isLoadSS) {
- uint64_t Offset = MI->getOperand(1).getImm();
- const MDNode *MDPtr = MI->getOperand(2).getMetadata();
- DebugLoc DL = MI->getDebugLoc();
- int FI = isLoadSS ? LdSlot : (int)Slot;
- if (MachineInstr *NewDV = tii_->emitFrameIndexDebugValue(*mf_, FI,
- Offset, MDPtr, DL)) {
- DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
- ReplaceMachineInstrInMaps(MI, NewDV);
- MachineBasicBlock *MBB = MI->getParent();
- MBB->insert(MBB->erase(MI), NewDV);
- continue;
+ LiveInterval* LI = &LIS.getInterval(Reg);
+
+ if (MO.readsReg()) {
+ LiveRange* LR = LI->getLiveRangeContaining(OldIdx);
+ if (LR != 0)
+ Entering.insert(std::make_pair(LI, LR));
+ }
+ if (MO.isDef()) {
+ if (MO.isEarlyClobber()) {
+ LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getRegSlot(true));
+ assert(LR != 0 && "No EC range?");
+ if (LR->end > OldIdx.getDeadSlot())
+ Exiting.insert(std::make_pair(LI, LR));
+ else
+ Internal.insert(std::make_pair(LI, LR));
+ } else if (MO.isDead()) {
+ LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getRegSlot());
+ assert(LR != 0 && "No dead-def range?");
+ Internal.insert(std::make_pair(LI, LR));
+ } else {
+ LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getDeadSlot());
+ assert(LR && LR->end > OldIdx.getDeadSlot() &&
+ "Non-dead-def should have live range exiting.");
+ Exiting.insert(std::make_pair(LI, LR));
}
}
-
- DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI);
- RemoveMachineInstrFromMaps(MI);
- vrm.RemoveMachineInstrFromMaps(MI);
- MI->eraseFromParent();
- continue;
}
- assert(!(O.isImplicit() && O.isUse()) &&
- "Spilling register that's used as implicit use?");
- SlotIndex index = getInstructionIndex(MI);
- if (index < start || index >= end)
- continue;
-
- if (O.isUndef())
- // Must be defined by an implicit def. It should not be spilled. Note,
- // this is for correctness reason. e.g.
- // 8 %reg1024<def> = IMPLICIT_DEF
- // 12 %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
- // The live range [12, 14) are not part of the r1024 live interval since
- // it's defined by an implicit def. It will not conflicts with live
- // interval of r1025. Now suppose both registers are spilled, you can
- // easily see a situation where both registers are reloaded before
- // the INSERT_SUBREG and both target registers that would overlap.
- continue;
- RewriteMIs.push_back(RewriteInfo(index, MI));
}
- std::sort(RewriteMIs.begin(), RewriteMIs.end(), RewriteInfoCompare());
-
- unsigned ImpUse = DefIsReMat ? getReMatImplicitUse(li, ReMatDefMI) : 0;
- // Now rewrite the defs and uses.
- for (unsigned i = 0, e = RewriteMIs.size(); i != e; ) {
- RewriteInfo &rwi = RewriteMIs[i];
- ++i;
- SlotIndex index = rwi.Index;
- MachineInstr *MI = rwi.MI;
- // If MI def and/or use the same register multiple times, then there
- // are multiple entries.
- while (i != e && RewriteMIs[i].MI == MI) {
- assert(RewriteMIs[i].Index == index);
- ++i;
- }
- MachineBasicBlock *MBB = MI->getParent();
- if (ImpUse && MI != ReMatDefMI) {
- // Re-matting an instruction with virtual register use. Prevent interval
- // from being spilled.
- getInterval(ImpUse).markNotSpillable();
- }
+ // Collect IntRangePairs for all operands of MI that may need fixing.
+ void collectRangesInBundle(MachineInstr* MI, RangeSet& Entering,
+ RangeSet& Exiting, SlotIndex MIStartIdx,
+ SlotIndex MIEndIdx) {
+ for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end();
+ MOI != MOE; ++MOI) {
+ const MachineOperand& MO = *MOI;
+ assert(!MO.isRegMask() && "Can't have RegMasks in bundles.");
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue;
- unsigned MBBId = MBB->getNumber();
- unsigned ThisVReg = 0;
- if (TrySplit) {
- DenseMap<unsigned,unsigned>::iterator NVI = MBBVRegsMap.find(MBBId);
- if (NVI != MBBVRegsMap.end()) {
- ThisVReg = NVI->second;
- // One common case:
- // x = use
- // ...
- // ...
- // def = ...
- // = use
- // It's better to start a new interval to avoid artificially
- // extend the new interval.
- if (MI->readsWritesVirtualRegister(li.reg) ==
- std::make_pair(false,true)) {
- MBBVRegsMap.erase(MBB->getNumber());
- ThisVReg = 0;
- }
- }
- }
+ unsigned Reg = MO.getReg();
+
+ // TODO: Currently we're skipping uses that are reserved or have no
+ // interval, but we're not updating their kills. This should be
+ // fixed.
+ if (!LIS.hasInterval(Reg) ||
+ (TargetRegisterInfo::isPhysicalRegister(Reg) && LIS.isReserved(Reg)))
+ continue;
- bool IsNew = ThisVReg == 0;
- if (IsNew) {
- // This ends the previous live interval. If all of its def / use
- // can be folded, give it a low spill weight.
- if (NewVReg && TrySplit && AllCanFold) {
- LiveInterval &nI = getOrCreateInterval(NewVReg);
- nI.weight /= 10.0F;
+ LiveInterval* LI = &LIS.getInterval(Reg);
+
+ if (MO.readsReg()) {
+ LiveRange* LR = LI->getLiveRangeContaining(MIStartIdx);
+ if (LR != 0)
+ Entering.insert(std::make_pair(LI, LR));
+ }
+ if (MO.isDef()) {
+ assert(!MO.isEarlyClobber() && "Early clobbers not allowed in bundles.");
+ assert(!MO.isDead() && "Dead-defs not allowed in bundles.");
+ LiveRange* LR = LI->getLiveRangeContaining(MIEndIdx.getDeadSlot());
+ assert(LR != 0 && "Internal ranges not allowed in bundles.");
+ Exiting.insert(std::make_pair(LI, LR));
}
- AllCanFold = true;
}
- NewVReg = ThisVReg;
-
- bool HasDef = false;
- bool HasUse = false;
- bool CanFold = rewriteInstructionForSpills(li, I->valno, TrySplit,
- index, end, MI, ReMatOrigDefMI, ReMatDefMI,
- Slot, LdSlot, isLoad, isLoadSS, DefIsReMat,
- CanDelete, vrm, rc, ReMatIds, loopInfo, NewVReg,
- ImpUse, HasDef, HasUse, MBBVRegsMap, NewLIs);
- if (!HasDef && !HasUse)
- continue;
+ }
- AllCanFold &= CanFold;
+ BundleRanges createBundleRanges(RangeSet& Entering, RangeSet& Internal, RangeSet& Exiting) {
+ BundleRanges BR;
- // Update weight of spill interval.
- LiveInterval &nI = getOrCreateInterval(NewVReg);
- if (!TrySplit) {
- // The spill weight is now infinity as it cannot be spilled again.
- nI.markNotSpillable();
- continue;
+ for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
+ EI != EE; ++EI) {
+ LiveInterval* LI = EI->first;
+ LiveRange* LR = EI->second;
+ BR[LI->reg].Use = LR;
}
- // Keep track of the last def and first use in each MBB.
- if (HasDef) {
- if (MI != ReMatOrigDefMI || !CanDelete) {
- bool HasKill = false;
- if (!HasUse)
- HasKill = anyKillInMBBAfterIdx(li, I->valno, MBB, index.getDefIndex());
- else {
- // If this is a two-address code, then this index starts a new VNInfo.
- const VNInfo *VNI = li.findDefinedVNInfoForRegInt(index.getDefIndex());
- if (VNI)
- HasKill = anyKillInMBBAfterIdx(li, VNI, MBB, index.getDefIndex());
- }
- DenseMap<unsigned, std::vector<SRInfo> >::iterator SII =
- SpillIdxes.find(MBBId);
- if (!HasKill) {
- if (SII == SpillIdxes.end()) {
- std::vector<SRInfo> S;
- S.push_back(SRInfo(index, NewVReg, true));
- SpillIdxes.insert(std::make_pair(MBBId, S));
- } else if (SII->second.back().vreg != NewVReg) {
- SII->second.push_back(SRInfo(index, NewVReg, true));
- } else if (index > SII->second.back().index) {
- // If there is an earlier def and this is a two-address
- // instruction, then it's not possible to fold the store (which
- // would also fold the load).
- SRInfo &Info = SII->second.back();
- Info.index = index;
- Info.canFold = !HasUse;
- }
- SpillMBBs.set(MBBId);
- } else if (SII != SpillIdxes.end() &&
- SII->second.back().vreg == NewVReg &&
- index > SII->second.back().index) {
- // There is an earlier def that's not killed (must be two-address).
- // The spill is no longer needed.
- SII->second.pop_back();
- if (SII->second.empty()) {
- SpillIdxes.erase(MBBId);
- SpillMBBs.reset(MBBId);
- }
- }
+ for (RangeSet::iterator II = Internal.begin(), IE = Internal.end();
+ II != IE; ++II) {
+ LiveInterval* LI = II->first;
+ LiveRange* LR = II->second;
+ if (LR->end.isDead()) {
+ BR[LI->reg].Dead = LR;
+ } else {
+ BR[LI->reg].EC = LR;
}
}
- if (HasUse) {
- DenseMap<unsigned, std::vector<SRInfo> >::iterator SII =
- SpillIdxes.find(MBBId);
- if (SII != SpillIdxes.end() &&
- SII->second.back().vreg == NewVReg &&
- index > SII->second.back().index)
- // Use(s) following the last def, it's not safe to fold the spill.
- SII->second.back().canFold = false;
- DenseMap<unsigned, std::vector<SRInfo> >::iterator RII =
- RestoreIdxes.find(MBBId);
- if (RII != RestoreIdxes.end() && RII->second.back().vreg == NewVReg)
- // If we are splitting live intervals, only fold if it's the first
- // use and there isn't another use later in the MBB.
- RII->second.back().canFold = false;
- else if (IsNew) {
- // Only need a reload if there isn't an earlier def / use.
- if (RII == RestoreIdxes.end()) {
- std::vector<SRInfo> Infos;
- Infos.push_back(SRInfo(index, NewVReg, true));
- RestoreIdxes.insert(std::make_pair(MBBId, Infos));
- } else {
- RII->second.push_back(SRInfo(index, NewVReg, true));
- }
- RestoreMBBs.set(MBBId);
- }
+ for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end();
+ EI != EE; ++EI) {
+ LiveInterval* LI = EI->first;
+ LiveRange* LR = EI->second;
+ BR[LI->reg].Def = LR;
}
- // Update spill weight.
- unsigned loopDepth = loopInfo->getLoopDepth(MBB);
- nI.weight += getSpillWeight(HasDef, HasUse, loopDepth);
+ return BR;
}
- if (NewVReg && TrySplit && AllCanFold) {
- // If all of its def / use can be folded, give it a low spill weight.
- LiveInterval &nI = getOrCreateInterval(NewVReg);
- nI.weight /= 10.0F;
+ void moveKillFlags(unsigned reg, SlotIndex OldIdx, SlotIndex newKillIdx) {
+ MachineInstr* OldKillMI = LIS.getInstructionFromIndex(OldIdx);
+ if (!OldKillMI->killsRegister(reg))
+ return; // Bail out if we don't have kill flags on the old register.
+ MachineInstr* NewKillMI = LIS.getInstructionFromIndex(newKillIdx);
+ assert(OldKillMI->killsRegister(reg) && "Old 'kill' instr isn't a kill.");
+ assert(!NewKillMI->killsRegister(reg) && "New kill instr is already a kill.");
+ OldKillMI->clearRegisterKills(reg, &TRI);
+ NewKillMI->addRegisterKilled(reg, &TRI);
}
-}
-bool LiveIntervals::alsoFoldARestore(int Id, SlotIndex index,
- unsigned vr, BitVector &RestoreMBBs,
- DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes) {
- if (!RestoreMBBs[Id])
- return false;
- std::vector<SRInfo> &Restores = RestoreIdxes[Id];
- for (unsigned i = 0, e = Restores.size(); i != e; ++i)
- if (Restores[i].index == index &&
- Restores[i].vreg == vr &&
- Restores[i].canFold)
- return true;
- return false;
-}
-
-void LiveIntervals::eraseRestoreInfo(int Id, SlotIndex index,
- unsigned vr, BitVector &RestoreMBBs,
- DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes) {
- if (!RestoreMBBs[Id])
- return;
- std::vector<SRInfo> &Restores = RestoreIdxes[Id];
- for (unsigned i = 0, e = Restores.size(); i != e; ++i)
- if (Restores[i].index == index && Restores[i].vreg)
- Restores[i].index = SlotIndex();
-}
+ void updateRegMaskSlots(SlotIndex OldIdx) {
+ SmallVectorImpl<SlotIndex>::iterator RI =
+ std::lower_bound(LIS.RegMaskSlots.begin(), LIS.RegMaskSlots.end(),
+ OldIdx);
+ assert(*RI == OldIdx && "No RegMask at OldIdx.");
+ *RI = NewIdx;
+ assert(*prior(RI) < *RI && *RI < *next(RI) &&
+ "RegSlots out of order. Did you move one call across another?");
+ }
-/// handleSpilledImpDefs - Remove IMPLICIT_DEF instructions which are being
-/// spilled and create empty intervals for their uses.
-void
-LiveIntervals::handleSpilledImpDefs(const LiveInterval &li, VirtRegMap &vrm,
- const TargetRegisterClass* rc,
- std::vector<LiveInterval*> &NewLIs) {
- for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(li.reg),
- re = mri_->reg_end(); ri != re; ) {
- MachineOperand &O = ri.getOperand();
- MachineInstr *MI = &*ri;
- ++ri;
- if (MI->isDebugValue()) {
- // Remove debug info for now.
- O.setReg(0U);
- DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI);
- continue;
- }
- if (O.isDef()) {
- assert(MI->isImplicitDef() &&
- "Register def was not rewritten?");
- RemoveMachineInstrFromMaps(MI);
- vrm.RemoveMachineInstrFromMaps(MI);
- MI->eraseFromParent();
- } else {
- // This must be an use of an implicit_def so it's not part of the live
- // interval. Create a new empty live interval for it.
- // FIXME: Can we simply erase some of the instructions? e.g. Stores?
- unsigned NewVReg = mri_->createVirtualRegister(rc);
- vrm.grow();
- vrm.setIsImplicitlyDefined(NewVReg);
- NewLIs.push_back(&getOrCreateInterval(NewVReg));
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.getReg() == li.reg) {
- MO.setReg(NewVReg);
- MO.setIsUndef();
- }
- }
+ // Return the last use of reg between NewIdx and OldIdx.
+ SlotIndex findLastUseBefore(unsigned Reg, SlotIndex OldIdx) {
+ SlotIndex LastUse = NewIdx;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI.use_nodbg_begin(Reg),
+ UE = MRI.use_nodbg_end();
+ UI != UE; UI.skipInstruction()) {
+ const MachineInstr* MI = &*UI;
+ SlotIndex InstSlot = LIS.getSlotIndexes()->getInstructionIndex(MI);
+ if (InstSlot > LastUse && InstSlot < OldIdx)
+ LastUse = InstSlot;
}
+ return LastUse;
}
-}
-
-float
-LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) {
- // Limit the loop depth ridiculousness.
- if (loopDepth > 200)
- loopDepth = 200;
-
- // The loop depth is used to roughly estimate the number of times the
- // instruction is executed. Something like 10^d is simple, but will quickly
- // overflow a float. This expression behaves like 10^d for small d, but is
- // more tempered for large d. At d=200 we get 6.7e33 which leaves a bit of
- // headroom before overflow.
- // By the way, powf() might be unavailable here. For consistency,
- // We may take pow(double,double).
- float lc = std::pow(1 + (100.0 / (loopDepth + 10)), (double)loopDepth);
-
- return (isDef + isUse) * lc;
-}
-static void normalizeSpillWeights(std::vector<LiveInterval*> &NewLIs) {
- for (unsigned i = 0, e = NewLIs.size(); i != e; ++i)
- NewLIs[i]->weight =
- normalizeSpillWeight(NewLIs[i]->weight, NewLIs[i]->getSize());
-}
+ void moveEnteringUpFrom(SlotIndex OldIdx, IntRangePair& P) {
+ LiveInterval* LI = P.first;
+ LiveRange* LR = P.second;
+ bool LiveThrough = LR->end > OldIdx.getRegSlot();
+ if (LiveThrough)
+ return;
+ SlotIndex LastUse = findLastUseBefore(LI->reg, OldIdx);
+ if (LastUse != NewIdx)
+ moveKillFlags(LI->reg, NewIdx, LastUse);
+ LR->end = LastUse.getRegSlot();
+ }
-std::vector<LiveInterval*> LiveIntervals::
-addIntervalsForSpills(const LiveInterval &li,
- const SmallVectorImpl<LiveInterval*> *SpillIs,
- const MachineLoopInfo *loopInfo, VirtRegMap &vrm) {
- assert(li.isSpillable() && "attempt to spill already spilled interval!");
-
- DEBUG({
- dbgs() << "\t\t\t\tadding intervals for spills for interval: ";
- li.print(dbgs(), tri_);
- dbgs() << '\n';
- });
-
- // Each bit specify whether a spill is required in the MBB.
- BitVector SpillMBBs(mf_->getNumBlockIDs());
- DenseMap<unsigned, std::vector<SRInfo> > SpillIdxes;
- BitVector RestoreMBBs(mf_->getNumBlockIDs());
- DenseMap<unsigned, std::vector<SRInfo> > RestoreIdxes;
- DenseMap<unsigned,unsigned> MBBVRegsMap;
- std::vector<LiveInterval*> NewLIs;
- const TargetRegisterClass* rc = mri_->getRegClass(li.reg);
-
- unsigned NumValNums = li.getNumValNums();
- SmallVector<MachineInstr*, 4> ReMatDefs;
- ReMatDefs.resize(NumValNums, NULL);
- SmallVector<MachineInstr*, 4> ReMatOrigDefs;
- ReMatOrigDefs.resize(NumValNums, NULL);
- SmallVector<int, 4> ReMatIds;
- ReMatIds.resize(NumValNums, VirtRegMap::MAX_STACK_SLOT);
- BitVector ReMatDelete(NumValNums);
- unsigned Slot = VirtRegMap::MAX_STACK_SLOT;
-
- // Spilling a split live interval. It cannot be split any further. Also,
- // it's also guaranteed to be a single val# / range interval.
- if (vrm.getPreSplitReg(li.reg)) {
- vrm.setIsSplitFromReg(li.reg, 0);
- // Unset the split kill marker on the last use.
- SlotIndex KillIdx = vrm.getKillPoint(li.reg);
- if (KillIdx != SlotIndex()) {
- MachineInstr *KillMI = getInstructionFromIndex(KillIdx);
- assert(KillMI && "Last use disappeared?");
- int KillOp = KillMI->findRegisterUseOperandIdx(li.reg, true);
- assert(KillOp != -1 && "Last use disappeared?");
- KillMI->getOperand(KillOp).setIsKill(false);
- }
- vrm.removeKillPoint(li.reg);
- bool DefIsReMat = vrm.isReMaterialized(li.reg);
- Slot = vrm.getStackSlot(li.reg);
- assert(Slot != VirtRegMap::MAX_STACK_SLOT);
- MachineInstr *ReMatDefMI = DefIsReMat ?
- vrm.getReMaterializedMI(li.reg) : NULL;
- int LdSlot = 0;
- bool isLoadSS = DefIsReMat && tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot);
- bool isLoad = isLoadSS ||
- (DefIsReMat && (ReMatDefMI->getDesc().canFoldAsLoad()));
- bool IsFirstRange = true;
- for (LiveInterval::Ranges::const_iterator
- I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
- // If this is a split live interval with multiple ranges, it means there
- // are two-address instructions that re-defined the value. Only the
- // first def can be rematerialized!
- if (IsFirstRange) {
- // Note ReMatOrigDefMI has already been deleted.
- rewriteInstructionsForSpills(li, false, I, NULL, ReMatDefMI,
- Slot, LdSlot, isLoad, isLoadSS, DefIsReMat,
- false, vrm, rc, ReMatIds, loopInfo,
- SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes,
- MBBVRegsMap, NewLIs);
- } else {
- rewriteInstructionsForSpills(li, false, I, NULL, 0,
- Slot, 0, false, false, false,
- false, vrm, rc, ReMatIds, loopInfo,
- SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes,
- MBBVRegsMap, NewLIs);
+ void moveEnteringDownFrom(SlotIndex OldIdx, IntRangePair& P) {
+ LiveInterval* LI = P.first;
+ LiveRange* LR = P.second;
+ // Extend the LiveRange if NewIdx is past the end.
+ if (NewIdx > LR->end) {
+ // Move kill flags if OldIdx was not originally the end
+ // (otherwise LR->end points to an invalid slot).
+ if (LR->end.getRegSlot() != OldIdx.getRegSlot()) {
+ assert(LR->end > OldIdx && "LiveRange does not cover original slot");
+ moveKillFlags(LI->reg, LR->end, NewIdx);
}
- IsFirstRange = false;
+ LR->end = NewIdx.getRegSlot();
}
-
- handleSpilledImpDefs(li, vrm, rc, NewLIs);
- normalizeSpillWeights(NewLIs);
- return NewLIs;
}
- bool TrySplit = !intervalIsInOneMBB(li);
- if (TrySplit)
- ++numSplits;
- bool NeedStackSlot = false;
- for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end();
- i != e; ++i) {
- const VNInfo *VNI = *i;
- unsigned VN = VNI->id;
- if (VNI->isUnused())
- continue; // Dead val#.
- // Is the def for the val# rematerializable?
- MachineInstr *ReMatDefMI = getInstructionFromIndex(VNI->def);
- bool dummy;
- if (ReMatDefMI && isReMaterializable(li, VNI, ReMatDefMI, SpillIs, dummy)) {
- // Remember how to remat the def of this val#.
- ReMatOrigDefs[VN] = ReMatDefMI;
- // Original def may be modified so we have to make a copy here.
- MachineInstr *Clone = mf_->CloneMachineInstr(ReMatDefMI);
- CloneMIs.push_back(Clone);
- ReMatDefs[VN] = Clone;
-
- bool CanDelete = true;
- if (VNI->hasPHIKill()) {
- // A kill is a phi node, not all of its uses can be rematerialized.
- // It must not be deleted.
- CanDelete = false;
- // Need a stack slot if there is any live range where uses cannot be
- // rematerialized.
- NeedStackSlot = true;
- }
- if (CanDelete)
- ReMatDelete.set(VN);
+ void moveAllEnteringFrom(SlotIndex OldIdx, RangeSet& Entering) {
+ bool GoingUp = NewIdx < OldIdx;
+
+ if (GoingUp) {
+ for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
+ EI != EE; ++EI)
+ moveEnteringUpFrom(OldIdx, *EI);
} else {
- // Need a stack slot if there is any live range where uses cannot be
- // rematerialized.
- NeedStackSlot = true;
+ for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
+ EI != EE; ++EI)
+ moveEnteringDownFrom(OldIdx, *EI);
}
}
- // One stack slot per live interval.
- if (NeedStackSlot && vrm.getPreSplitReg(li.reg) == 0) {
- if (vrm.getStackSlot(li.reg) == VirtRegMap::NO_STACK_SLOT)
- Slot = vrm.assignVirt2StackSlot(li.reg);
-
- // This case only occurs when the prealloc splitter has already assigned
- // a stack slot to this vreg.
- else
- Slot = vrm.getStackSlot(li.reg);
+ void moveInternalFrom(SlotIndex OldIdx, IntRangePair& P) {
+ LiveInterval* LI = P.first;
+ LiveRange* LR = P.second;
+ assert(OldIdx < LR->start && LR->start < OldIdx.getDeadSlot() &&
+ LR->end <= OldIdx.getDeadSlot() &&
+ "Range should be internal to OldIdx.");
+ LiveRange Tmp(*LR);
+ Tmp.start = NewIdx.getRegSlot(LR->start.isEarlyClobber());
+ Tmp.valno->def = Tmp.start;
+ Tmp.end = LR->end.isDead() ? NewIdx.getDeadSlot() : NewIdx.getRegSlot();
+ LI->removeRange(*LR);
+ LI->addRange(Tmp);
}
- // Create new intervals and rewrite defs and uses.
- for (LiveInterval::Ranges::const_iterator
- I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
- MachineInstr *ReMatDefMI = ReMatDefs[I->valno->id];
- MachineInstr *ReMatOrigDefMI = ReMatOrigDefs[I->valno->id];
- bool DefIsReMat = ReMatDefMI != NULL;
- bool CanDelete = ReMatDelete[I->valno->id];
- int LdSlot = 0;
- bool isLoadSS = DefIsReMat && tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot);
- bool isLoad = isLoadSS ||
- (DefIsReMat && ReMatDefMI->getDesc().canFoldAsLoad());
- rewriteInstructionsForSpills(li, TrySplit, I, ReMatOrigDefMI, ReMatDefMI,
- Slot, LdSlot, isLoad, isLoadSS, DefIsReMat,
- CanDelete, vrm, rc, ReMatIds, loopInfo,
- SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes,
- MBBVRegsMap, NewLIs);
+ void moveAllInternalFrom(SlotIndex OldIdx, RangeSet& Internal) {
+ for (RangeSet::iterator II = Internal.begin(), IE = Internal.end();
+ II != IE; ++II)
+ moveInternalFrom(OldIdx, *II);
}
- // Insert spills / restores if we are splitting.
- if (!TrySplit) {
- handleSpilledImpDefs(li, vrm, rc, NewLIs);
- normalizeSpillWeights(NewLIs);
- return NewLIs;
+ void moveExitingFrom(SlotIndex OldIdx, IntRangePair& P) {
+ LiveRange* LR = P.second;
+ assert(OldIdx < LR->start && LR->start < OldIdx.getDeadSlot() &&
+ "Range should start in OldIdx.");
+ assert(LR->end > OldIdx.getDeadSlot() && "Range should exit OldIdx.");
+ SlotIndex NewStart = NewIdx.getRegSlot(LR->start.isEarlyClobber());
+ LR->start = NewStart;
+ LR->valno->def = NewStart;
}
- SmallPtrSet<LiveInterval*, 4> AddedKill;
- SmallVector<unsigned, 2> Ops;
- if (NeedStackSlot) {
- int Id = SpillMBBs.find_first();
- while (Id != -1) {
- std::vector<SRInfo> &spills = SpillIdxes[Id];
- for (unsigned i = 0, e = spills.size(); i != e; ++i) {
- SlotIndex index = spills[i].index;
- unsigned VReg = spills[i].vreg;
- LiveInterval &nI = getOrCreateInterval(VReg);
- bool isReMat = vrm.isReMaterialized(VReg);
- MachineInstr *MI = getInstructionFromIndex(index);
- bool CanFold = false;
- bool FoundUse = false;
- Ops.clear();
- if (spills[i].canFold) {
- CanFold = true;
- for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
- MachineOperand &MO = MI->getOperand(j);
- if (!MO.isReg() || MO.getReg() != VReg)
- continue;
-
- Ops.push_back(j);
- if (MO.isDef())
- continue;
- if (isReMat ||
- (!FoundUse && !alsoFoldARestore(Id, index, VReg,
- RestoreMBBs, RestoreIdxes))) {
- // MI has two-address uses of the same register. If the use
- // isn't the first and only use in the BB, then we can't fold
- // it. FIXME: Move this to rewriteInstructionsForSpills.
- CanFold = false;
- break;
- }
- FoundUse = true;
- }
- }
- // Fold the store into the def if possible.
- bool Folded = false;
- if (CanFold && !Ops.empty()) {
- if (tryFoldMemoryOperand(MI, vrm, NULL, index, Ops, true, Slot,VReg)){
- Folded = true;
- if (FoundUse) {
- // Also folded uses, do not issue a load.
- eraseRestoreInfo(Id, index, VReg, RestoreMBBs, RestoreIdxes);
- nI.removeRange(index.getLoadIndex(), index.getDefIndex());
- }
- nI.removeRange(index.getDefIndex(), index.getStoreIndex());
- }
- }
-
- // Otherwise tell the spiller to issue a spill.
- if (!Folded) {
- LiveRange *LR = &nI.ranges[nI.ranges.size()-1];
- bool isKill = LR->end == index.getStoreIndex();
- if (!MI->registerDefIsDead(nI.reg))
- // No need to spill a dead def.
- vrm.addSpillPoint(VReg, isKill, MI);
- if (isKill)
- AddedKill.insert(&nI);
- }
- }
- Id = SpillMBBs.find_next(Id);
- }
+ void moveAllExitingFrom(SlotIndex OldIdx, RangeSet& Exiting) {
+ for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end();
+ EI != EE; ++EI)
+ moveExitingFrom(OldIdx, *EI);
}
- int Id = RestoreMBBs.find_first();
- while (Id != -1) {
- std::vector<SRInfo> &restores = RestoreIdxes[Id];
- for (unsigned i = 0, e = restores.size(); i != e; ++i) {
- SlotIndex index = restores[i].index;
- if (index == SlotIndex())
- continue;
- unsigned VReg = restores[i].vreg;
- LiveInterval &nI = getOrCreateInterval(VReg);
- bool isReMat = vrm.isReMaterialized(VReg);
- MachineInstr *MI = getInstructionFromIndex(index);
- bool CanFold = false;
- Ops.clear();
- if (restores[i].canFold) {
- CanFold = true;
- for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
- MachineOperand &MO = MI->getOperand(j);
- if (!MO.isReg() || MO.getReg() != VReg)
- continue;
-
- if (MO.isDef()) {
- // If this restore were to be folded, it would have been folded
- // already.
- CanFold = false;
- break;
- }
- Ops.push_back(j);
- }
- }
+ void moveEnteringUpFromInto(SlotIndex OldIdx, IntRangePair& P,
+ BundleRanges& BR) {
+ LiveInterval* LI = P.first;
+ LiveRange* LR = P.second;
+ bool LiveThrough = LR->end > OldIdx.getRegSlot();
+ if (LiveThrough) {
+ assert((LR->start < NewIdx || BR[LI->reg].Def == LR) &&
+ "Def in bundle should be def range.");
+ assert((BR[LI->reg].Use == 0 || BR[LI->reg].Use == LR) &&
+ "If bundle has use for this reg it should be LR.");
+ BR[LI->reg].Use = LR;
+ return;
+ }
- // Fold the load into the use if possible.
- bool Folded = false;
- if (CanFold && !Ops.empty()) {
- if (!isReMat)
- Folded = tryFoldMemoryOperand(MI, vrm, NULL,index,Ops,true,Slot,VReg);
- else {
- MachineInstr *ReMatDefMI = vrm.getReMaterializedMI(VReg);
- int LdSlot = 0;
- bool isLoadSS = tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot);
- // If the rematerializable def is a load, also try to fold it.
- if (isLoadSS || ReMatDefMI->getDesc().canFoldAsLoad())
- Folded = tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index,
- Ops, isLoadSS, LdSlot, VReg);
- if (!Folded) {
- unsigned ImpUse = getReMatImplicitUse(li, ReMatDefMI);
- if (ImpUse) {
- // Re-matting an instruction with virtual register use. Add the
- // register as an implicit use on the use MI and mark the register
- // interval as unspillable.
- LiveInterval &ImpLi = getInterval(ImpUse);
- ImpLi.markNotSpillable();
- MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true));
- }
- }
- }
- }
- // If folding is not possible / failed, then tell the spiller to issue a
- // load / rematerialization for us.
- if (Folded)
- nI.removeRange(index.getLoadIndex(), index.getDefIndex());
- else
- vrm.addRestorePoint(VReg, MI);
+ SlotIndex LastUse = findLastUseBefore(LI->reg, OldIdx);
+ moveKillFlags(LI->reg, OldIdx, LastUse);
+
+ if (LR->start < NewIdx) {
+ // Becoming a new entering range.
+ assert(BR[LI->reg].Dead == 0 && BR[LI->reg].Def == 0 &&
+ "Bundle shouldn't be re-defining reg mid-range.");
+ assert((BR[LI->reg].Use == 0 || BR[LI->reg].Use == LR) &&
+ "Bundle shouldn't have different use range for same reg.");
+ LR->end = LastUse.getRegSlot();
+ BR[LI->reg].Use = LR;
+ } else {
+ // Becoming a new Dead-def.
+ assert(LR->start == NewIdx.getRegSlot(LR->start.isEarlyClobber()) &&
+ "Live range starting at unexpected slot.");
+ assert(BR[LI->reg].Def == LR && "Reg should have def range.");
+ assert(BR[LI->reg].Dead == 0 &&
+ "Can't have def and dead def of same reg in a bundle.");
+ LR->end = LastUse.getDeadSlot();
+ BR[LI->reg].Dead = BR[LI->reg].Def;
+ BR[LI->reg].Def = 0;
}
- Id = RestoreMBBs.find_next(Id);
}
- // Finalize intervals: add kills, finalize spill weights, and filter out
- // dead intervals.
- std::vector<LiveInterval*> RetNewLIs;
- for (unsigned i = 0, e = NewLIs.size(); i != e; ++i) {
- LiveInterval *LI = NewLIs[i];
- if (!LI->empty()) {
- if (!AddedKill.count(LI)) {
- LiveRange *LR = &LI->ranges[LI->ranges.size()-1];
- SlotIndex LastUseIdx = LR->end.getBaseIndex();
- MachineInstr *LastUse = getInstructionFromIndex(LastUseIdx);
- int UseIdx = LastUse->findRegisterUseOperandIdx(LI->reg, false);
- assert(UseIdx != -1);
- if (!LastUse->isRegTiedToDefOperand(UseIdx)) {
- LastUse->getOperand(UseIdx).setIsKill();
- vrm.addKillPoint(LI->reg, LastUseIdx);
- }
- }
- RetNewLIs.push_back(LI);
+ void moveEnteringDownFromInto(SlotIndex OldIdx, IntRangePair& P,
+ BundleRanges& BR) {
+ LiveInterval* LI = P.first;
+ LiveRange* LR = P.second;
+ if (NewIdx > LR->end) {
+ // Range extended to bundle. Add to bundle uses.
+ // Note: Currently adds kill flags to bundle start.
+ assert(BR[LI->reg].Use == 0 &&
+ "Bundle already has use range for reg.");
+ moveKillFlags(LI->reg, LR->end, NewIdx);
+ LR->end = NewIdx.getRegSlot();
+ BR[LI->reg].Use = LR;
+ } else {
+ assert(BR[LI->reg].Use != 0 &&
+ "Bundle should already have a use range for reg.");
}
}
- handleSpilledImpDefs(li, vrm, rc, RetNewLIs);
- normalizeSpillWeights(RetNewLIs);
- return RetNewLIs;
-}
-
-/// hasAllocatableSuperReg - Return true if the specified physical register has
-/// any super register that's allocatable.
-bool LiveIntervals::hasAllocatableSuperReg(unsigned Reg) const {
- for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS)
- if (allocatableRegs_[*AS] && hasInterval(*AS))
- return true;
- return false;
-}
+ void moveAllEnteringFromInto(SlotIndex OldIdx, RangeSet& Entering,
+ BundleRanges& BR) {
+ bool GoingUp = NewIdx < OldIdx;
-/// getRepresentativeReg - Find the largest super register of the specified
-/// physical register.
-unsigned LiveIntervals::getRepresentativeReg(unsigned Reg) const {
- // Find the largest super-register that is allocatable.
- unsigned BestReg = Reg;
- for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS) {
- unsigned SuperReg = *AS;
- if (!hasAllocatableSuperReg(SuperReg) && hasInterval(SuperReg)) {
- BestReg = SuperReg;
- break;
+ if (GoingUp) {
+ for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
+ EI != EE; ++EI)
+ moveEnteringUpFromInto(OldIdx, *EI, BR);
+ } else {
+ for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
+ EI != EE; ++EI)
+ moveEnteringDownFromInto(OldIdx, *EI, BR);
}
}
- return BestReg;
-}
-/// getNumConflictsWithPhysReg - Return the number of uses and defs of the
-/// specified interval that conflicts with the specified physical register.
-unsigned LiveIntervals::getNumConflictsWithPhysReg(const LiveInterval &li,
- unsigned PhysReg) const {
- unsigned NumConflicts = 0;
- const LiveInterval &pli = getInterval(getRepresentativeReg(PhysReg));
- for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg),
- E = mri_->reg_end(); I != E; ++I) {
- MachineOperand &O = I.getOperand();
- MachineInstr *MI = O.getParent();
- if (MI->isDebugValue())
- continue;
- SlotIndex Index = getInstructionIndex(MI);
- if (pli.liveAt(Index))
- ++NumConflicts;
+ void moveInternalFromInto(SlotIndex OldIdx, IntRangePair& P,
+ BundleRanges& BR) {
+ // TODO: Sane rules for moving ranges into bundles.
}
- return NumConflicts;
-}
-/// spillPhysRegAroundRegDefsUses - Spill the specified physical register
-/// around all defs and uses of the specified interval. Return true if it
-/// was able to cut its interval.
-bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li,
- unsigned PhysReg, VirtRegMap &vrm) {
- unsigned SpillReg = getRepresentativeReg(PhysReg);
-
- DEBUG(dbgs() << "spillPhysRegAroundRegDefsUses " << tri_->getName(PhysReg)
- << " represented by " << tri_->getName(SpillReg) << '\n');
-
- for (const unsigned *AS = tri_->getAliasSet(PhysReg); *AS; ++AS)
- // If there are registers which alias PhysReg, but which are not a
- // sub-register of the chosen representative super register. Assert
- // since we can't handle it yet.
- assert(*AS == SpillReg || !allocatableRegs_[*AS] || !hasInterval(*AS) ||
- tri_->isSuperRegister(*AS, SpillReg));
-
- bool Cut = false;
- SmallVector<unsigned, 4> PRegs;
- if (hasInterval(SpillReg))
- PRegs.push_back(SpillReg);
- for (const unsigned *SR = tri_->getSubRegisters(SpillReg); *SR; ++SR)
- if (hasInterval(*SR))
- PRegs.push_back(*SR);
-
- DEBUG({
- dbgs() << "Trying to spill:";
- for (unsigned i = 0, e = PRegs.size(); i != e; ++i)
- dbgs() << ' ' << tri_->getName(PRegs[i]);
- dbgs() << '\n';
- });
-
- SmallPtrSet<MachineInstr*, 8> SeenMIs;
- for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg),
- E = mri_->reg_end(); I != E; ++I) {
- MachineOperand &O = I.getOperand();
- MachineInstr *MI = O.getParent();
- if (MI->isDebugValue() || SeenMIs.count(MI))
- continue;
- SeenMIs.insert(MI);
- SlotIndex Index = getInstructionIndex(MI);
- bool LiveReg = false;
- for (unsigned i = 0, e = PRegs.size(); i != e; ++i) {
- unsigned PReg = PRegs[i];
- LiveInterval &pli = getInterval(PReg);
- if (!pli.liveAt(Index))
- continue;
- LiveReg = true;
- SlotIndex StartIdx = Index.getLoadIndex();
- SlotIndex EndIdx = Index.getNextIndex().getBaseIndex();
- if (!pli.isInOneLiveRange(StartIdx, EndIdx)) {
- std::string msg;
- raw_string_ostream Msg(msg);
- Msg << "Ran out of registers during register allocation!";
- if (MI->isInlineAsm()) {
- Msg << "\nPlease check your inline asm statement for invalid "
- << "constraints:\n";
- MI->print(Msg, tm_);
- }
- report_fatal_error(Msg.str());
+ void moveAllInternalFromInto(SlotIndex OldIdx, RangeSet& Internal,
+ BundleRanges& BR) {
+ for (RangeSet::iterator II = Internal.begin(), IE = Internal.end();
+ II != IE; ++II)
+ moveInternalFromInto(OldIdx, *II, BR);
+ }
+
+ void moveExitingFromInto(SlotIndex OldIdx, IntRangePair& P,
+ BundleRanges& BR) {
+ LiveInterval* LI = P.first;
+ LiveRange* LR = P.second;
+
+ assert(LR->start.isRegister() &&
+ "Don't know how to merge exiting ECs into bundles yet.");
+
+ if (LR->end > NewIdx.getDeadSlot()) {
+ // This range is becoming an exiting range on the bundle.
+ // If there was an old dead-def of this reg, delete it.
+ if (BR[LI->reg].Dead != 0) {
+ LI->removeRange(*BR[LI->reg].Dead);
+ BR[LI->reg].Dead = 0;
+ }
+ assert(BR[LI->reg].Def == 0 &&
+ "Can't have two defs for the same variable exiting a bundle.");
+ LR->start = NewIdx.getRegSlot();
+ LR->valno->def = LR->start;
+ BR[LI->reg].Def = LR;
+ } else {
+ // This range is becoming internal to the bundle.
+ assert(LR->end == NewIdx.getRegSlot() &&
+ "Can't bundle def whose kill is before the bundle");
+ if (BR[LI->reg].Dead || BR[LI->reg].Def) {
+ // Already have a def for this. Just delete range.
+ LI->removeRange(*LR);
+ } else {
+ // Make range dead, record.
+ LR->end = NewIdx.getDeadSlot();
+ BR[LI->reg].Dead = LR;
+ assert(BR[LI->reg].Use == LR &&
+ "Range becoming dead should currently be use.");
}
- pli.removeRange(StartIdx, EndIdx);
- LiveReg = true;
+ // In both cases the range is no longer a use on the bundle.
+ BR[LI->reg].Use = 0;
}
- if (!LiveReg)
- continue;
- DEBUG(dbgs() << "Emergency spill around " << Index << '\t' << *MI);
- vrm.addEmergencySpill(SpillReg, MI);
- Cut = true;
}
- return Cut;
-}
-LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
- MachineInstr* startInst) {
- LiveInterval& Interval = getOrCreateInterval(reg);
- VNInfo* VN = Interval.getNextValue(
- SlotIndex(getInstructionIndex(startInst).getDefIndex()),
- startInst, getVNInfoAllocator());
- VN->setHasPHIKill(true);
- LiveRange LR(
- SlotIndex(getInstructionIndex(startInst).getDefIndex()),
- getMBBEndIdx(startInst->getParent()), VN);
- Interval.addRange(LR);
+ void moveAllExitingFromInto(SlotIndex OldIdx, RangeSet& Exiting,
+ BundleRanges& BR) {
+ for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end();
+ EI != EE; ++EI)
+ moveExitingFromInto(OldIdx, *EI, BR);
+ }
- return LR;
+};
+
+void LiveIntervals::handleMove(MachineInstr* MI) {
+ SlotIndex OldIndex = indexes_->getInstructionIndex(MI);
+ indexes_->removeMachineInstrFromMaps(MI);
+ SlotIndex NewIndex = MI->isInsideBundle() ?
+ indexes_->getInstructionIndex(MI) :
+ indexes_->insertMachineInstrInMaps(MI);
+ assert(getMBBStartIdx(MI->getParent()) <= OldIndex &&
+ OldIndex < getMBBEndIdx(MI->getParent()) &&
+ "Cannot handle moves across basic block boundaries.");
+ assert(!MI->isBundled() && "Can't handle bundled instructions yet.");
+
+ HMEditor HME(*this, *mri_, *tri_, NewIndex);
+ HME.moveAllRangesFrom(MI, OldIndex);
}
+void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI, MachineInstr* BundleStart) {
+ SlotIndex NewIndex = indexes_->getInstructionIndex(BundleStart);
+ HMEditor HME(*this, *mri_, *tri_, NewIndex);
+ HME.moveAllRangesInto(MI, BundleStart);
+}
diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp
index 110fe1e62024..60a68806c55e 100644
--- a/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/lib/CodeGen/LiveIntervalUnion.cpp
@@ -21,6 +21,8 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+
using namespace llvm;
diff --git a/lib/CodeGen/LiveIntervalUnion.h b/lib/CodeGen/LiveIntervalUnion.h
index 5d64d285f39a..dbf5ac122d5d 100644
--- a/lib/CodeGen/LiveIntervalUnion.h
+++ b/lib/CodeGen/LiveIntervalUnion.h
@@ -20,8 +20,6 @@
#include "llvm/ADT/IntervalMap.h"
#include "llvm/CodeGen/LiveInterval.h"
-#include <algorithm>
-
namespace llvm {
class MachineLoopRange;
diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp
index a7d5af5198e5..d8ab7918ae25 100644
--- a/lib/CodeGen/LiveRangeCalc.cpp
+++ b/lib/CodeGen/LiveRangeCalc.cpp
@@ -65,7 +65,7 @@ void LiveRangeCalc::extend(LiveInterval *LI,
assert(DomTree && "Missing dominator tree");
MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill.getPrevSlot());
- assert(Kill && "No MBB at Kill");
+ assert(KillMBB && "No MBB at Kill");
// Is there a def in the same MBB we can extend?
if (LI->extendInBlock(Indexes->getMBBStartIdx(KillMBB), Kill))
@@ -237,7 +237,7 @@ void LiveRangeCalc::updateSSA(SlotIndexes *Indexes,
assert(Alloc && "Need VNInfo allocator to create PHI-defs");
SlotIndex Start, End;
tie(Start, End) = Indexes->getMBBRange(MBB);
- VNInfo *VNI = I->LI->getNextValue(Start, 0, *Alloc);
+ VNInfo *VNI = I->LI->getNextValue(Start, *Alloc);
VNI->setIsPHIDef(true);
I->Value = VNI;
// This block is done, we know the final value.
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index b23f85165360..695f53631e1b 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -1,4 +1,4 @@
-//===--- LiveRangeEdit.cpp - Basic tools for editing a register live range --===//
+//===-- LiveRangeEdit.cpp - Basic tools for editing a register live range -===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,12 +12,12 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "regalloc"
-#include "LiveRangeEdit.h"
#include "VirtRegMap.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Support/Debug.h"
@@ -29,13 +29,14 @@ STATISTIC(NumDCEDeleted, "Number of instructions deleted by DCE");
STATISTIC(NumDCEFoldedLoads, "Number of single use loads folded after DCE");
STATISTIC(NumFracRanges, "Number of live ranges fractured by DCE");
-LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg,
- LiveIntervals &LIS,
- VirtRegMap &VRM) {
- MachineRegisterInfo &MRI = VRM.getRegInfo();
+void LiveRangeEdit::Delegate::anchor() { }
+
+LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg) {
unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
- VRM.grow();
- VRM.setIsSplitFromReg(VReg, VRM.getOriginal(OldReg));
+ if (VRM) {
+ VRM->grow();
+ VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg));
+ }
LiveInterval &LI = LIS.getOrCreateInterval(VReg);
newRegs_.push_back(&LI);
return LI;
@@ -43,37 +44,32 @@ LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg,
bool LiveRangeEdit::checkRematerializable(VNInfo *VNI,
const MachineInstr *DefMI,
- const TargetInstrInfo &tii,
AliasAnalysis *aa) {
assert(DefMI && "Missing instruction");
scannedRemattable_ = true;
- if (!tii.isTriviallyReMaterializable(DefMI, aa))
+ if (!TII.isTriviallyReMaterializable(DefMI, aa))
return false;
remattable_.insert(VNI);
return true;
}
-void LiveRangeEdit::scanRemattable(LiveIntervals &lis,
- const TargetInstrInfo &tii,
- AliasAnalysis *aa) {
+void LiveRangeEdit::scanRemattable(AliasAnalysis *aa) {
for (LiveInterval::vni_iterator I = parent_.vni_begin(),
E = parent_.vni_end(); I != E; ++I) {
VNInfo *VNI = *I;
if (VNI->isUnused())
continue;
- MachineInstr *DefMI = lis.getInstructionFromIndex(VNI->def);
+ MachineInstr *DefMI = LIS.getInstructionFromIndex(VNI->def);
if (!DefMI)
continue;
- checkRematerializable(VNI, DefMI, tii, aa);
+ checkRematerializable(VNI, DefMI, aa);
}
scannedRemattable_ = true;
}
-bool LiveRangeEdit::anyRematerializable(LiveIntervals &lis,
- const TargetInstrInfo &tii,
- AliasAnalysis *aa) {
+bool LiveRangeEdit::anyRematerializable(AliasAnalysis *aa) {
if (!scannedRemattable_)
- scanRemattable(lis, tii, aa);
+ scanRemattable(aa);
return !remattable_.empty();
}
@@ -81,24 +77,18 @@ bool LiveRangeEdit::anyRematerializable(LiveIntervals &lis,
/// OrigIdx are also available with the same value at UseIdx.
bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
SlotIndex OrigIdx,
- SlotIndex UseIdx,
- LiveIntervals &lis) {
- OrigIdx = OrigIdx.getUseIndex();
- UseIdx = UseIdx.getUseIndex();
+ SlotIndex UseIdx) {
+ OrigIdx = OrigIdx.getRegSlot(true);
+ UseIdx = UseIdx.getRegSlot(true);
for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = OrigMI->getOperand(i);
if (!MO.isReg() || !MO.getReg() || MO.isDef())
continue;
// Reserved registers are OK.
- if (MO.isUndef() || !lis.hasInterval(MO.getReg()))
+ if (MO.isUndef() || !LIS.hasInterval(MO.getReg()))
continue;
- // We cannot depend on virtual registers in uselessRegs_.
- if (uselessRegs_)
- for (unsigned ui = 0, ue = uselessRegs_->size(); ui != ue; ++ui)
- if ((*uselessRegs_)[ui]->reg == MO.getReg())
- return false;
- LiveInterval &li = lis.getInterval(MO.getReg());
+ LiveInterval &li = LIS.getInterval(MO.getReg());
const VNInfo *OVNI = li.getVNInfoAt(OrigIdx);
if (!OVNI)
continue;
@@ -110,8 +100,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
bool LiveRangeEdit::canRematerializeAt(Remat &RM,
SlotIndex UseIdx,
- bool cheapAsAMove,
- LiveIntervals &lis) {
+ bool cheapAsAMove) {
assert(scannedRemattable_ && "Call anyRematerializable first");
// Use scanRemattable info.
@@ -121,19 +110,19 @@ bool LiveRangeEdit::canRematerializeAt(Remat &RM,
// No defining instruction provided.
SlotIndex DefIdx;
if (RM.OrigMI)
- DefIdx = lis.getInstructionIndex(RM.OrigMI);
+ DefIdx = LIS.getInstructionIndex(RM.OrigMI);
else {
DefIdx = RM.ParentVNI->def;
- RM.OrigMI = lis.getInstructionFromIndex(DefIdx);
+ RM.OrigMI = LIS.getInstructionFromIndex(DefIdx);
assert(RM.OrigMI && "No defining instruction for remattable value");
}
// If only cheap remats were requested, bail out early.
- if (cheapAsAMove && !RM.OrigMI->getDesc().isAsCheapAsAMove())
+ if (cheapAsAMove && !RM.OrigMI->isAsCheapAsAMove())
return false;
// Verify that all used registers are available with the same values.
- if (!allUsesAvailableAt(RM.OrigMI, DefIdx, UseIdx, lis))
+ if (!allUsesAvailableAt(RM.OrigMI, DefIdx, UseIdx))
return false;
return true;
@@ -143,27 +132,22 @@ SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg,
const Remat &RM,
- LiveIntervals &lis,
- const TargetInstrInfo &tii,
const TargetRegisterInfo &tri,
bool Late) {
assert(RM.OrigMI && "Invalid remat");
- tii.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri);
+ TII.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri);
rematted_.insert(RM.ParentVNI);
- return lis.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late)
- .getDefIndex();
+ return LIS.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late)
+ .getRegSlot();
}
-void LiveRangeEdit::eraseVirtReg(unsigned Reg, LiveIntervals &LIS) {
+void LiveRangeEdit::eraseVirtReg(unsigned Reg) {
if (delegate_ && delegate_->LRE_CanEraseVirtReg(Reg))
LIS.removeInterval(Reg);
}
bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
- SmallVectorImpl<MachineInstr*> &Dead,
- MachineRegisterInfo &MRI,
- LiveIntervals &LIS,
- const TargetInstrInfo &TII) {
+ SmallVectorImpl<MachineInstr*> &Dead) {
MachineInstr *DefMI = 0, *UseMI = 0;
// Check that there is a single def and a single use.
@@ -174,7 +158,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
if (MO.isDef()) {
if (DefMI && DefMI != MI)
return false;
- if (!MI->getDesc().canFoldAsLoad())
+ if (!MI->canFoldAsLoad())
return false;
DefMI = MI;
} else if (!MO.isUndef()) {
@@ -209,19 +193,17 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
}
void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
- LiveIntervals &LIS, VirtRegMap &VRM,
- const TargetInstrInfo &TII) {
+ ArrayRef<unsigned> RegsBeingSpilled) {
SetVector<LiveInterval*,
SmallVector<LiveInterval*, 8>,
SmallPtrSet<LiveInterval*, 8> > ToShrink;
- MachineRegisterInfo &MRI = VRM.getRegInfo();
for (;;) {
// Erase all dead defs.
while (!Dead.empty()) {
MachineInstr *MI = Dead.pop_back_val();
assert(MI->allDefsAreDead() && "Def isn't really dead");
- SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex();
+ SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
// Never delete inline asm.
if (MI->isInlineAsm()) {
@@ -265,7 +247,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
LI.removeValNo(VNI);
if (LI.empty()) {
ToShrink.remove(&LI);
- eraseVirtReg(Reg, LIS);
+ eraseVirtReg(Reg);
}
}
}
@@ -284,12 +266,26 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
// Shrink just one live interval. Then delete new dead defs.
LiveInterval *LI = ToShrink.back();
ToShrink.pop_back();
- if (foldAsLoad(LI, Dead, MRI, LIS, TII))
+ if (foldAsLoad(LI, Dead))
continue;
if (delegate_)
delegate_->LRE_WillShrinkVirtReg(LI->reg);
if (!LIS.shrinkToUses(LI, &Dead))
continue;
+
+ // Don't create new intervals for a register being spilled.
+ // The new intervals would have to be spilled anyway so its not worth it.
+ // Also they currently aren't spilled so creating them and not spilling
+ // them results in incorrect code.
+ bool BeingSpilled = false;
+ for (unsigned i = 0, e = RegsBeingSpilled.size(); i != e; ++i) {
+ if (LI->reg == RegsBeingSpilled[i]) {
+ BeingSpilled = true;
+ break;
+ }
+ }
+
+ if (BeingSpilled) continue;
// LI may have been separated, create new intervals.
LI->RenumberValues(LIS);
@@ -298,16 +294,16 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
if (NumComp <= 1)
continue;
++NumFracRanges;
- bool IsOriginal = VRM.getOriginal(LI->reg) == LI->reg;
+ bool IsOriginal = VRM && VRM->getOriginal(LI->reg) == LI->reg;
DEBUG(dbgs() << NumComp << " components: " << *LI << '\n');
SmallVector<LiveInterval*, 8> Dups(1, LI);
for (unsigned i = 1; i != NumComp; ++i) {
- Dups.push_back(&createFrom(LI->reg, LIS, VRM));
+ Dups.push_back(&createFrom(LI->reg));
// If LI is an original interval that hasn't been split yet, make the new
// intervals their own originals instead of referring to LI. The original
// interval must contain all the split products, and LI doesn't.
if (IsOriginal)
- VRM.setIsSplitFromReg(Dups.back()->reg, 0);
+ VRM->setIsSplitFromReg(Dups.back()->reg, 0);
if (delegate_)
delegate_->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg);
}
@@ -316,10 +312,8 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
}
void LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF,
- LiveIntervals &LIS,
const MachineLoopInfo &Loops) {
VirtRegAuxInfo VRAI(MF, LIS, Loops);
- MachineRegisterInfo &MRI = MF.getRegInfo();
for (iterator I = begin(), E = end(); I != E; ++I) {
LiveInterval &LI = **I;
if (MRI.recomputeRegClass(LI.reg, MF.getTarget()))
diff --git a/lib/CodeGen/LiveRangeEdit.h b/lib/CodeGen/LiveRangeEdit.h
deleted file mode 100644
index 9b0a671ea9e5..000000000000
--- a/lib/CodeGen/LiveRangeEdit.h
+++ /dev/null
@@ -1,206 +0,0 @@
-//===---- LiveRangeEdit.h - Basic tools for split and spill -----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// The LiveRangeEdit class represents changes done to a virtual register when it
-// is spilled or split.
-//
-// The parent register is never changed. Instead, a number of new virtual
-// registers are created and added to the newRegs vector.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_LIVERANGEEDIT_H
-#define LLVM_CODEGEN_LIVERANGEEDIT_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/CodeGen/LiveInterval.h"
-
-namespace llvm {
-
-class AliasAnalysis;
-class LiveIntervals;
-class MachineLoopInfo;
-class MachineRegisterInfo;
-class VirtRegMap;
-
-class LiveRangeEdit {
-public:
- /// Callback methods for LiveRangeEdit owners.
- struct Delegate {
- /// Called immediately before erasing a dead machine instruction.
- virtual void LRE_WillEraseInstruction(MachineInstr *MI) {}
-
- /// Called when a virtual register is no longer used. Return false to defer
- /// its deletion from LiveIntervals.
- virtual bool LRE_CanEraseVirtReg(unsigned) { return true; }
-
- /// Called before shrinking the live range of a virtual register.
- virtual void LRE_WillShrinkVirtReg(unsigned) {}
-
- /// Called after cloning a virtual register.
- /// This is used for new registers representing connected components of Old.
- virtual void LRE_DidCloneVirtReg(unsigned New, unsigned Old) {}
-
- virtual ~Delegate() {}
- };
-
-private:
- LiveInterval &parent_;
- SmallVectorImpl<LiveInterval*> &newRegs_;
- Delegate *const delegate_;
- const SmallVectorImpl<LiveInterval*> *uselessRegs_;
-
- /// firstNew_ - Index of the first register added to newRegs_.
- const unsigned firstNew_;
-
- /// scannedRemattable_ - true when remattable values have been identified.
- bool scannedRemattable_;
-
- /// remattable_ - Values defined by remattable instructions as identified by
- /// tii.isTriviallyReMaterializable().
- SmallPtrSet<const VNInfo*,4> remattable_;
-
- /// rematted_ - Values that were actually rematted, and so need to have their
- /// live range trimmed or entirely removed.
- SmallPtrSet<const VNInfo*,4> rematted_;
-
- /// scanRemattable - Identify the parent_ values that may rematerialize.
- void scanRemattable(LiveIntervals &lis,
- const TargetInstrInfo &tii,
- AliasAnalysis *aa);
-
- /// allUsesAvailableAt - Return true if all registers used by OrigMI at
- /// OrigIdx are also available with the same value at UseIdx.
- bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx,
- SlotIndex UseIdx, LiveIntervals &lis);
-
- /// foldAsLoad - If LI has a single use and a single def that can be folded as
- /// a load, eliminate the register by folding the def into the use.
- bool foldAsLoad(LiveInterval *LI, SmallVectorImpl<MachineInstr*> &Dead,
- MachineRegisterInfo&, LiveIntervals&, const TargetInstrInfo&);
-
-public:
- /// Create a LiveRangeEdit for breaking down parent into smaller pieces.
- /// @param parent The register being spilled or split.
- /// @param newRegs List to receive any new registers created. This needn't be
- /// empty initially, any existing registers are ignored.
- /// @param uselessRegs List of registers that can't be used when
- /// rematerializing values because they are about to be removed.
- LiveRangeEdit(LiveInterval &parent,
- SmallVectorImpl<LiveInterval*> &newRegs,
- Delegate *delegate = 0,
- const SmallVectorImpl<LiveInterval*> *uselessRegs = 0)
- : parent_(parent), newRegs_(newRegs),
- delegate_(delegate),
- uselessRegs_(uselessRegs),
- firstNew_(newRegs.size()),
- scannedRemattable_(false) {}
-
- LiveInterval &getParent() const { return parent_; }
- unsigned getReg() const { return parent_.reg; }
-
- /// Iterator for accessing the new registers added by this edit.
- typedef SmallVectorImpl<LiveInterval*>::const_iterator iterator;
- iterator begin() const { return newRegs_.begin()+firstNew_; }
- iterator end() const { return newRegs_.end(); }
- unsigned size() const { return newRegs_.size()-firstNew_; }
- bool empty() const { return size() == 0; }
- LiveInterval *get(unsigned idx) const { return newRegs_[idx+firstNew_]; }
-
- ArrayRef<LiveInterval*> regs() const {
- return makeArrayRef(newRegs_).slice(firstNew_);
- }
-
- /// FIXME: Temporary accessors until we can get rid of
- /// LiveIntervals::AddIntervalsForSpills
- SmallVectorImpl<LiveInterval*> *getNewVRegs() { return &newRegs_; }
- const SmallVectorImpl<LiveInterval*> *getUselessVRegs() {
- return uselessRegs_;
- }
-
- /// createFrom - Create a new virtual register based on OldReg.
- LiveInterval &createFrom(unsigned OldReg, LiveIntervals&, VirtRegMap&);
-
- /// create - Create a new register with the same class and original slot as
- /// parent.
- LiveInterval &create(LiveIntervals &LIS, VirtRegMap &VRM) {
- return createFrom(getReg(), LIS, VRM);
- }
-
- /// anyRematerializable - Return true if any parent values may be
- /// rematerializable.
- /// This function must be called before any rematerialization is attempted.
- bool anyRematerializable(LiveIntervals&, const TargetInstrInfo&,
- AliasAnalysis*);
-
- /// checkRematerializable - Manually add VNI to the list of rematerializable
- /// values if DefMI may be rematerializable.
- bool checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI,
- const TargetInstrInfo&, AliasAnalysis*);
-
- /// Remat - Information needed to rematerialize at a specific location.
- struct Remat {
- VNInfo *ParentVNI; // parent_'s value at the remat location.
- MachineInstr *OrigMI; // Instruction defining ParentVNI.
- explicit Remat(VNInfo *ParentVNI) : ParentVNI(ParentVNI), OrigMI(0) {}
- };
-
- /// canRematerializeAt - Determine if ParentVNI can be rematerialized at
- /// UseIdx. It is assumed that parent_.getVNINfoAt(UseIdx) == ParentVNI.
- /// When cheapAsAMove is set, only cheap remats are allowed.
- bool canRematerializeAt(Remat &RM,
- SlotIndex UseIdx,
- bool cheapAsAMove,
- LiveIntervals &lis);
-
- /// rematerializeAt - Rematerialize RM.ParentVNI into DestReg by inserting an
- /// instruction into MBB before MI. The new instruction is mapped, but
- /// liveness is not updated.
- /// Return the SlotIndex of the new instruction.
- SlotIndex rematerializeAt(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg,
- const Remat &RM,
- LiveIntervals&,
- const TargetInstrInfo&,
- const TargetRegisterInfo&,
- bool Late = false);
-
- /// markRematerialized - explicitly mark a value as rematerialized after doing
- /// it manually.
- void markRematerialized(const VNInfo *ParentVNI) {
- rematted_.insert(ParentVNI);
- }
-
- /// didRematerialize - Return true if ParentVNI was rematerialized anywhere.
- bool didRematerialize(const VNInfo *ParentVNI) const {
- return rematted_.count(ParentVNI);
- }
-
- /// eraseVirtReg - Notify the delegate that Reg is no longer in use, and try
- /// to erase it from LIS.
- void eraseVirtReg(unsigned Reg, LiveIntervals &LIS);
-
- /// eliminateDeadDefs - Try to delete machine instructions that are now dead
- /// (allDefsAreDead returns true). This may cause live intervals to be trimmed
- /// and further dead efs to be eliminated.
- void eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
- LiveIntervals&, VirtRegMap&,
- const TargetInstrInfo&);
-
- /// calculateRegClassAndHint - Recompute register class and hint for each new
- /// register.
- void calculateRegClassAndHint(MachineFunction&, LiveIntervals&,
- const MachineLoopInfo&);
-};
-
-}
-
-#endif
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 2ca90f9f05c0..5a0d97d132dd 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -14,7 +14,7 @@
// the instruction, but are never used after the instruction (i.e., they are
// killed).
//
-// This class computes live variables using are sparse implementation based on
+// This class computes live variables using a sparse implementation based on
// the machine code SSA form. This class computes live variable information for
// each virtual and _register allocatable_ physical register in a function. It
// uses the dominance properties of SSA form to efficiently compute live
@@ -33,6 +33,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
@@ -41,6 +42,7 @@
using namespace llvm;
char LiveVariables::ID = 0;
+char &llvm::LiveVariablesID = LiveVariables::ID;
INITIALIZE_PASS_BEGIN(LiveVariables, "livevars",
"Live Variable Analysis", false, false)
INITIALIZE_PASS_DEPENDENCY(UnreachableMachineBlockElim)
@@ -90,7 +92,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo,
MachineBasicBlock *MBB,
std::vector<MachineBasicBlock*> &WorkList) {
unsigned BBNum = MBB->getNumber();
-
+
// Check to see if this basic block is one of the killing blocks. If so,
// remove it.
for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i)
@@ -98,7 +100,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo,
VRInfo.Kills.erase(VRInfo.Kills.begin()+i); // Erase entry
break;
}
-
+
if (MBB == DefBlock) return; // Terminate recursion
if (VRInfo.AliveBlocks.test(BBNum))
@@ -107,6 +109,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo,
// Mark the variable known alive in this bb
VRInfo.AliveBlocks.set(BBNum);
+ assert(MBB != &MF->front() && "Can't find reaching def for virtreg");
WorkList.insert(WorkList.end(), MBB->pred_rbegin(), MBB->pred_rend());
}
@@ -130,7 +133,6 @@ void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB,
unsigned BBNum = MBB->getNumber();
VarInfo& VRInfo = getVarInfo(reg);
- VRInfo.NumUses++;
// Check to see if this basic block is already a kill block.
if (!VRInfo.Kills.empty() && VRInfo.Kills.back()->getParent() == MBB) {
@@ -190,7 +192,7 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
unsigned LastDefReg = 0;
unsigned LastDefDist = 0;
MachineInstr *LastDef = NULL;
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
unsigned SubReg = *SubRegs; ++SubRegs) {
MachineInstr *Def = PhysRegDef[SubReg];
if (!Def)
@@ -214,7 +216,7 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
unsigned DefReg = MO.getReg();
if (TRI->isSubRegister(Reg, DefReg)) {
PartDefRegs.insert(DefReg);
- for (const unsigned *SubRegs = TRI->getSubRegisters(DefReg);
+ for (const uint16_t *SubRegs = TRI->getSubRegisters(DefReg);
unsigned SubReg = *SubRegs; ++SubRegs)
PartDefRegs.insert(SubReg);
}
@@ -245,7 +247,7 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
true/*IsImp*/));
PhysRegDef[Reg] = LastPartialDef;
SmallSet<unsigned, 8> Processed;
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
unsigned SubReg = *SubRegs; ++SubRegs) {
if (Processed.count(SubReg))
continue;
@@ -257,20 +259,19 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
false/*IsDef*/,
true/*IsImp*/));
PhysRegDef[SubReg] = LastPartialDef;
- for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+ for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
Processed.insert(*SS);
}
}
- }
- else if (LastDef && !PhysRegUse[Reg] &&
- !LastDef->findRegisterDefOperand(Reg))
+ } else if (LastDef && !PhysRegUse[Reg] &&
+ !LastDef->findRegisterDefOperand(Reg))
// Last def defines the super register, add an implicit def of reg.
- LastDef->addOperand(MachineOperand::CreateReg(Reg,
- true/*IsDef*/, true/*IsImp*/));
+ LastDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/,
+ true/*IsImp*/));
// Remember this use.
PhysRegUse[Reg] = MI;
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
unsigned SubReg = *SubRegs; ++SubRegs)
PhysRegUse[SubReg] = MI;
}
@@ -286,7 +287,7 @@ MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) {
MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef;
unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
unsigned LastPartDefDist = 0;
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
unsigned SubReg = *SubRegs; ++SubRegs) {
MachineInstr *Def = PhysRegDef[SubReg];
if (Def && Def != LastDef) {
@@ -331,11 +332,11 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
// Or whole register is defined, but only partly used.
// AX<dead> = AL<imp-def>
// = AL<kill>
- // AX =
+ // AX =
MachineInstr *LastPartDef = 0;
unsigned LastPartDefDist = 0;
SmallSet<unsigned, 8> PartUses;
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
unsigned SubReg = *SubRegs; ++SubRegs) {
MachineInstr *Def = PhysRegDef[SubReg];
if (Def && Def != LastDef) {
@@ -350,7 +351,7 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
}
if (MachineInstr *Use = PhysRegUse[SubReg]) {
PartUses.insert(SubReg);
- for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+ for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
PartUses.insert(*SS);
unsigned Dist = DistanceMap[Use];
if (Dist > LastRefOrPartRefDist) {
@@ -366,7 +367,7 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
// EAX<dead> = op AL<imp-def>
// That is, EAX def is dead but AL def extends pass it.
PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true);
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
unsigned SubReg = *SubRegs; ++SubRegs) {
if (!PartUses.count(SubReg))
continue;
@@ -387,11 +388,11 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
else {
LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true);
PhysRegUse[SubReg] = LastRefOrPartRef;
- for (const unsigned *SSRegs = TRI->getSubRegisters(SubReg);
+ for (const uint16_t *SSRegs = TRI->getSubRegisters(SubReg);
unsigned SSReg = *SSRegs; ++SSRegs)
PhysRegUse[SSReg] = LastRefOrPartRef;
}
- for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+ for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
PartUses.erase(*SS);
}
} else if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) {
@@ -419,16 +420,37 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
return true;
}
+void LiveVariables::HandleRegMask(const MachineOperand &MO) {
+ // Call HandlePhysRegKill() for all live registers clobbered by Mask.
+ // Clobbered registers are always dead, sp there is no need to use
+ // HandlePhysRegDef().
+ for (unsigned Reg = 1, NumRegs = TRI->getNumRegs(); Reg != NumRegs; ++Reg) {
+ // Skip dead regs.
+ if (!PhysRegDef[Reg] && !PhysRegUse[Reg])
+ continue;
+ // Skip mask-preserved regs.
+ if (!MO.clobbersPhysReg(Reg))
+ continue;
+ // Kill the largest clobbered super-register.
+ // This avoids needless implicit operands.
+ unsigned Super = Reg;
+ for (const uint16_t *SR = TRI->getSuperRegisters(Reg); *SR; ++SR)
+ if ((PhysRegDef[*SR] || PhysRegUse[*SR]) && MO.clobbersPhysReg(*SR))
+ Super = *SR;
+ HandlePhysRegKill(Super, 0);
+ }
+}
+
void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
SmallVector<unsigned, 4> &Defs) {
// What parts of the register are previously defined?
SmallSet<unsigned, 32> Live;
if (PhysRegDef[Reg] || PhysRegUse[Reg]) {
Live.insert(Reg);
- for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS)
+ for (const uint16_t *SS = TRI->getSubRegisters(Reg); *SS; ++SS)
Live.insert(*SS);
} else {
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
unsigned SubReg = *SubRegs; ++SubRegs) {
// If a register isn't itself defined, but all parts that make up of it
// are defined, then consider it also defined.
@@ -440,7 +462,7 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
continue;
if (PhysRegDef[SubReg] || PhysRegUse[SubReg]) {
Live.insert(SubReg);
- for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+ for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
Live.insert(*SS);
}
}
@@ -450,7 +472,7 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
// is referenced.
HandlePhysRegKill(Reg, MI);
// Only some of the sub-registers are used.
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
unsigned SubReg = *SubRegs; ++SubRegs) {
if (!Live.count(SubReg))
// Skip if this sub-register isn't defined.
@@ -469,7 +491,7 @@ void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI,
Defs.pop_back();
PhysRegDef[Reg] = MI;
PhysRegUse[Reg] = NULL;
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
unsigned SubReg = *SubRegs; ++SubRegs) {
PhysRegDef[SubReg] = MI;
PhysRegUse[SubReg] = NULL;
@@ -492,6 +514,12 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0);
PHIJoins.clear();
+ // FIXME: LiveIntervals will be updated to remove its dependence on
+ // LiveVariables to improve compilation time and eliminate bizarre pass
+ // dependencies. Until then, we can't change much in -O0.
+ if (!MRI->isSSA())
+ report_fatal_error("regalloc=... not currently supported with -O0");
+
analyzePHINodes(mf);
// Calculate live variable information in depth first order on the CFG of the
@@ -536,8 +564,13 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
// Clear kill and dead markers. LV will recompute them.
SmallVector<unsigned, 4> UseRegs;
SmallVector<unsigned, 4> DefRegs;
+ SmallVector<unsigned, 1> RegMasks;
for (unsigned i = 0; i != NumOperandsToProcess; ++i) {
MachineOperand &MO = MI->getOperand(i);
+ if (MO.isRegMask()) {
+ RegMasks.push_back(i);
+ continue;
+ }
if (!MO.isReg() || MO.getReg() == 0)
continue;
unsigned MOReg = MO.getReg();
@@ -559,6 +592,10 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
HandlePhysRegUse(MOReg, MI);
}
+ // Process all masked registers. (Call clobbers).
+ for (unsigned i = 0, e = RegMasks.size(); i != e; ++i)
+ HandleRegMask(MI->getOperand(RegMasks[i]));
+
// Process all defs.
for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) {
unsigned MOReg = DefRegs[i];
@@ -590,8 +627,8 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
// them. The tail callee need not take the same registers as input
// that it produces as output, and there are dependencies for its input
// registers elsewhere.
- if (!MBB->empty() && MBB->back().getDesc().isReturn()
- && !MBB->back().getDesc().isCall()) {
+ if (!MBB->empty() && MBB->back().isReturn()
+ && !MBB->back().isCall()) {
MachineInstr *Ret = &MBB->back();
for (MachineRegisterInfo::liveout_iterator
@@ -607,10 +644,27 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
}
}
+ // MachineCSE may CSE instructions which write to non-allocatable physical
+ // registers across MBBs. Remember if any reserved register is liveout.
+ SmallSet<unsigned, 4> LiveOuts;
+ for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock *SuccMBB = *SI;
+ if (SuccMBB->isLandingPad())
+ continue;
+ for (MachineBasicBlock::livein_iterator LI = SuccMBB->livein_begin(),
+ LE = SuccMBB->livein_end(); LI != LE; ++LI) {
+ unsigned LReg = *LI;
+ if (!TRI->isInAllocatableClass(LReg))
+ // Ignore other live-ins, e.g. those that are live into landing pads.
+ LiveOuts.insert(LReg);
+ }
+ }
+
// Loop over PhysRegDef / PhysRegUse, killing any registers that are
// available at the end of the basic block.
for (unsigned i = 0; i != NumRegs; ++i)
- if (PhysRegDef[i] || PhysRegUse[i])
+ if ((PhysRegDef[i] || PhysRegUse[i]) && !LiveOuts.count(i))
HandlePhysRegDef(i, 0, Defs);
std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0);
@@ -754,7 +808,7 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB,
const unsigned NumNew = BB->getNumber();
// All registers used by PHI nodes in SuccBB must be live through BB.
- for (MachineBasicBlock::const_iterator BBI = SuccBB->begin(),
+ for (MachineBasicBlock::iterator BBI = SuccBB->begin(),
BBE = SuccBB->end(); BBI != BBE && BBI->isPHI(); ++BBI)
for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
if (BBI->getOperand(i+1).getMBB() == BB)
diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp
index 1318d6212497..238bf52dfed7 100644
--- a/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -71,19 +71,15 @@ namespace {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
- const char *getPassName() const {
- return "Local Stack Slot Allocation";
- }
private:
};
} // end anonymous namespace
char LocalStackSlotPass::ID = 0;
-
-FunctionPass *llvm::createLocalStackSlotAllocationPass() {
- return new LocalStackSlotPass();
-}
+char &llvm::LocalStackSlotAllocationID = LocalStackSlotPass::ID;
+INITIALIZE_PASS(LocalStackSlotPass, "localstackalloc",
+ "Local Stack Slot Allocation", false, false)
bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
MachineFrameInfo *MFI = MF.getFrameInfo();
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 4c5fe4c480a6..6c8a1072697c 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -73,7 +73,8 @@ void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock *N) {
// Make sure the instructions have their operands in the reginfo lists.
MachineRegisterInfo &RegInfo = MF.getRegInfo();
- for (MachineBasicBlock::iterator I = N->begin(), E = N->end(); I != E; ++I)
+ for (MachineBasicBlock::instr_iterator
+ I = N->instr_begin(), E = N->instr_end(); I != E; ++I)
I->AddRegOperandsToUseLists(RegInfo);
LeakDetector::removeGarbageObject(N);
@@ -120,8 +121,8 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
/// lists.
void ilist_traits<MachineInstr>::
transferNodesFromList(ilist_traits<MachineInstr> &fromList,
- MachineBasicBlock::iterator first,
- MachineBasicBlock::iterator last) {
+ ilist_iterator<MachineInstr> first,
+ ilist_iterator<MachineInstr> last) {
assert(Parent->getParent() == fromList.Parent->getParent() &&
"MachineInstr parent mismatch!");
@@ -140,33 +141,75 @@ void ilist_traits<MachineInstr>::deleteNode(MachineInstr* MI) {
}
MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() {
- iterator I = begin();
- while (I != end() && I->isPHI())
+ instr_iterator I = instr_begin(), E = instr_end();
+ while (I != E && I->isPHI())
++I;
+ assert(!I->isInsideBundle() && "First non-phi MI cannot be inside a bundle!");
return I;
}
MachineBasicBlock::iterator
MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
- while (I != end() && (I->isPHI() || I->isLabel() || I->isDebugValue()))
+ iterator E = end();
+ while (I != E && (I->isPHI() || I->isLabel() || I->isDebugValue()))
++I;
+ // FIXME: This needs to change if we wish to bundle labels / dbg_values
+ // inside the bundle.
+ assert(!I->isInsideBundle() &&
+ "First non-phi / non-label instruction is inside a bundle!");
return I;
}
MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {
- iterator I = end();
- while (I != begin() && ((--I)->getDesc().isTerminator() || I->isDebugValue()))
+ iterator B = begin(), E = end(), I = E;
+ while (I != B && ((--I)->isTerminator() || I->isDebugValue()))
; /*noop */
- while (I != end() && !I->getDesc().isTerminator())
+ while (I != E && !I->isTerminator())
+ ++I;
+ return I;
+}
+
+MachineBasicBlock::const_iterator
+MachineBasicBlock::getFirstTerminator() const {
+ const_iterator B = begin(), E = end(), I = E;
+ while (I != B && ((--I)->isTerminator() || I->isDebugValue()))
+ ; /*noop */
+ while (I != E && !I->isTerminator())
+ ++I;
+ return I;
+}
+
+MachineBasicBlock::instr_iterator MachineBasicBlock::getFirstInstrTerminator() {
+ instr_iterator B = instr_begin(), E = instr_end(), I = E;
+ while (I != B && ((--I)->isTerminator() || I->isDebugValue()))
+ ; /*noop */
+ while (I != E && !I->isTerminator())
++I;
return I;
}
MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() {
- iterator B = begin(), I = end();
+ // Skip over end-of-block dbg_value instructions.
+ instr_iterator B = instr_begin(), I = instr_end();
+ while (I != B) {
+ --I;
+ // Return instruction that starts a bundle.
+ if (I->isDebugValue() || I->isInsideBundle())
+ continue;
+ return I;
+ }
+ // The block is all debug values.
+ return end();
+}
+
+MachineBasicBlock::const_iterator
+MachineBasicBlock::getLastNonDebugInstr() const {
+ // Skip over end-of-block dbg_value instructions.
+ const_instr_iterator B = instr_begin(), I = instr_end();
while (I != B) {
--I;
- if (I->isDebugValue())
+ // Return instruction that starts a bundle.
+ if (I->isDebugValue() || I->isInsideBundle())
continue;
return I;
}
@@ -195,6 +238,18 @@ StringRef MachineBasicBlock::getName() const {
return "(null)";
}
+/// Return a hopefully unique identifier for this block.
+std::string MachineBasicBlock::getFullName() const {
+ std::string Name;
+ if (getParent())
+ Name = (getParent()->getFunction()->getName() + ":").str();
+ if (getBasicBlock())
+ Name += getBasicBlock()->getName();
+ else
+ Name += (Twine("BB") + Twine(getNumber())).str();
+ return Name;
+}
+
void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
const MachineFunction *MF = getParent();
if (!MF) {
@@ -203,8 +258,6 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
return;
}
- if (Alignment) { OS << "Alignment " << Alignment << "\n"; }
-
if (Indexes)
OS << Indexes->getMBBStartIdx(this) << '\t';
@@ -218,6 +271,12 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
}
if (isLandingPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; }
if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; }
+ if (Alignment) {
+ OS << Comma << "Align " << Alignment << " (" << (1u << Alignment)
+ << " bytes)";
+ Comma = ", ";
+ }
+
OS << '\n';
const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
@@ -237,13 +296,15 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
OS << '\n';
}
- for (const_iterator I = begin(); I != end(); ++I) {
+ for (const_instr_iterator I = instr_begin(); I != instr_end(); ++I) {
if (Indexes) {
if (Indexes->hasIndex(I))
OS << Indexes->getInstructionIndex(I);
OS << '\t';
}
OS << '\t';
+ if (I->isInsideBundle())
+ OS << " * ";
I->print(OS, &getParent()->getTarget());
}
@@ -260,8 +321,8 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
void MachineBasicBlock::removeLiveIn(unsigned Reg) {
std::vector<unsigned>::iterator I =
std::find(LiveIns.begin(), LiveIns.end(), Reg);
- assert(I != LiveIns.end() && "Not a live in!");
- LiveIns.erase(I);
+ if (I != LiveIns.end())
+ LiveIns.erase(I);
}
bool MachineBasicBlock::isLiveIn(unsigned Reg) const {
@@ -297,8 +358,22 @@ void MachineBasicBlock::updateTerminator() {
TII->RemoveBranch(*this);
} else {
// The block has an unconditional fallthrough. If its successor is not
- // its layout successor, insert a branch.
- TBB = *succ_begin();
+ // its layout successor, insert a branch. First we have to locate the
+ // only non-landing-pad successor, as that is the fallthrough block.
+ for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) {
+ if ((*SI)->isLandingPad())
+ continue;
+ assert(!TBB && "Found more than one non-landing-pad successor!");
+ TBB = *SI;
+ }
+
+ // If there is no non-landing-pad successor, the block has no
+ // fall-through edges to be concerned with.
+ if (!TBB)
+ return;
+
+ // Finally update the unconditional successor to be reached via a branch
+ // if it would not be reached by fallthrough.
if (!isLayoutSuccessor(TBB))
TII->InsertBranch(*this, TBB, 0, Cond, dl);
}
@@ -435,8 +510,8 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) {
fromMBB->removeSuccessor(Succ);
// Fix up any PHI nodes in the successor.
- for (MachineBasicBlock::iterator MI = Succ->begin(), ME = Succ->end();
- MI != ME && MI->isPHI(); ++MI)
+ for (MachineBasicBlock::instr_iterator MI = Succ->instr_begin(),
+ ME = Succ->instr_end(); MI != ME && MI->isPHI(); ++MI)
for (unsigned i = 2, e = MI->getNumOperands()+1; i != e; i += 2) {
MachineOperand &MO = MI->getOperand(i);
if (MO.getMBB() == fromMBB)
@@ -473,13 +548,10 @@ bool MachineBasicBlock::canFallThrough() {
if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) {
// If we couldn't analyze the branch, examine the last instruction.
// If the block doesn't end in a known control barrier, assume fallthrough
- // is possible. The isPredicable check is needed because this code can be
+ // is possible. The isPredicated check is needed because this code can be
// called during IfConversion, where an instruction which is normally a
- // Barrier is predicated and thus no longer an actual control barrier. This
- // is over-conservative though, because if an instruction isn't actually
- // predicated we could still treat it like a barrier.
- return empty() || !back().getDesc().isBarrier() ||
- back().getDesc().isPredicable();
+ // Barrier is predicated and thus no longer an actual control barrier.
+ return empty() || !back().isBarrier() || TII->isPredicated(&back());
}
// If there is no branch, control always falls through.
@@ -538,14 +610,16 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
// Collect a list of virtual registers killed by the terminators.
SmallVector<unsigned, 4> KilledRegs;
if (LV)
- for (iterator I = getFirstTerminator(), E = end(); I != E; ++I) {
+ for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
+ I != E; ++I) {
MachineInstr *MI = I;
for (MachineInstr::mop_iterator OI = MI->operands_begin(),
OE = MI->operands_end(); OI != OE; ++OI) {
- if (!OI->isReg() || !OI->isUse() || !OI->isKill() || OI->isUndef())
+ if (!OI->isReg() || OI->getReg() == 0 ||
+ !OI->isUse() || !OI->isKill() || OI->isUndef())
continue;
unsigned Reg = OI->getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg) &&
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
LV->getVarInfo(Reg).removeKill(MI)) {
KilledRegs.push_back(Reg);
DEBUG(dbgs() << "Removing terminator kill: " << *MI);
@@ -565,7 +639,8 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
}
// Fix PHI nodes in Succ so they refer to NMBB instead of this
- for (MachineBasicBlock::iterator i = Succ->begin(), e = Succ->end();
+ for (MachineBasicBlock::instr_iterator
+ i = Succ->instr_begin(),e = Succ->instr_end();
i != e && i->isPHI(); ++i)
for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2)
if (i->getOperand(ni+1).getMBB() == this)
@@ -577,14 +652,16 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
NMBB->addLiveIn(*I);
// Update LiveVariables.
+ const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
if (LV) {
// Restore kills of virtual registers that were killed by the terminators.
while (!KilledRegs.empty()) {
unsigned Reg = KilledRegs.pop_back_val();
- for (iterator I = end(), E = begin(); I != E;) {
- if (!(--I)->addRegisterKilled(Reg, NULL, /* addIfNotFound= */ false))
+ for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) {
+ if (!(--I)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false))
continue;
- LV->getVarInfo(Reg).Kills.push_back(I);
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ LV->getVarInfo(Reg).Kills.push_back(I);
DEBUG(dbgs() << "Restored terminator kill: " << *I);
break;
}
@@ -650,6 +727,42 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
return NMBB;
}
+MachineBasicBlock::iterator
+MachineBasicBlock::erase(MachineBasicBlock::iterator I) {
+ if (I->isBundle()) {
+ MachineBasicBlock::iterator E = llvm::next(I);
+ return Insts.erase(I.getInstrIterator(), E.getInstrIterator());
+ }
+
+ return Insts.erase(I.getInstrIterator());
+}
+
+MachineInstr *MachineBasicBlock::remove(MachineInstr *I) {
+ if (I->isBundle()) {
+ instr_iterator MII = llvm::next(I);
+ iterator E = end();
+ while (MII != E && MII->isInsideBundle()) {
+ MachineInstr *MI = &*MII++;
+ Insts.remove(MI);
+ }
+ }
+
+ return Insts.remove(I);
+}
+
+void MachineBasicBlock::splice(MachineBasicBlock::iterator where,
+ MachineBasicBlock *Other,
+ MachineBasicBlock::iterator From) {
+ if (From->isBundle()) {
+ MachineBasicBlock::iterator To = llvm::next(From);
+ Insts.splice(where.getInstrIterator(), Other->Insts,
+ From.getInstrIterator(), To.getInstrIterator());
+ return;
+ }
+
+ Insts.splice(where.getInstrIterator(), Other->Insts, From.getInstrIterator());
+}
+
/// removeFromParent - This method unlinks 'this' from the containing function,
/// and returns it, but does not delete it.
MachineBasicBlock *MachineBasicBlock::removeFromParent() {
@@ -673,10 +786,10 @@ void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old,
MachineBasicBlock *New) {
assert(Old != New && "Cannot replace self with self!");
- MachineBasicBlock::iterator I = end();
- while (I != begin()) {
+ MachineBasicBlock::instr_iterator I = instr_end();
+ while (I != instr_begin()) {
--I;
- if (!I->getDesc().isTerminator()) break;
+ if (!I->isTerminator()) break;
// Scan the operands of this machine instruction, replacing any uses of Old
// with New.
@@ -755,27 +868,27 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
/// findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping
/// any DBG_VALUE instructions. Return UnknownLoc if there is none.
DebugLoc
-MachineBasicBlock::findDebugLoc(MachineBasicBlock::iterator &MBBI) {
+MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
DebugLoc DL;
- MachineBasicBlock::iterator E = end();
- if (MBBI != E) {
- // Skip debug declarations, we don't want a DebugLoc from them.
- MachineBasicBlock::iterator MBBI2 = MBBI;
- while (MBBI2 != E && MBBI2->isDebugValue())
- MBBI2++;
- if (MBBI2 != E)
- DL = MBBI2->getDebugLoc();
- }
+ instr_iterator E = instr_end();
+ if (MBBI == E)
+ return DL;
+
+ // Skip debug declarations, we don't want a DebugLoc from them.
+ while (MBBI != E && MBBI->isDebugValue())
+ MBBI++;
+ if (MBBI != E)
+ DL = MBBI->getDebugLoc();
return DL;
}
/// getSuccWeight - Return weight of the edge from this block to MBB.
///
-uint32_t MachineBasicBlock::getSuccWeight(MachineBasicBlock *succ) {
+uint32_t MachineBasicBlock::getSuccWeight(const MachineBasicBlock *succ) const {
if (Weights.empty())
return 0;
- succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
+ const_succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
return *getWeightIterator(I);
}
@@ -789,6 +902,16 @@ getWeightIterator(MachineBasicBlock::succ_iterator I) {
return Weights.begin() + index;
}
+/// getWeightIterator - Return wight iterator corresonding to the I successor
+/// iterator
+MachineBasicBlock::const_weight_iterator MachineBasicBlock::
+getWeightIterator(MachineBasicBlock::const_succ_iterator I) const {
+ assert(Weights.size() == Successors.size() && "Async weight list!");
+ const size_t index = std::distance(Successors.begin(), I);
+ assert(index < Weights.size() && "Not a current successor!");
+ return Weights.begin() + index;
+}
+
void llvm::WriteAsOperand(raw_ostream &OS, const MachineBasicBlock *MBB,
bool t) {
OS << "BB#" << MBB->getNumber();
diff --git a/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index b92cda961474..a079d6e59139 100644
--- a/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -56,6 +56,6 @@ bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) {
/// the other block frequencies. We do this to avoid using of floating points.
///
BlockFrequency MachineBlockFrequencyInfo::
-getBlockFreq(MachineBasicBlock *MBB) const {
+getBlockFreq(const MachineBasicBlock *MBB) const {
return MBFI->getBlockFreq(MBB);
}
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
new file mode 100644
index 000000000000..22d7212007fc
--- /dev/null
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -0,0 +1,1001 @@
+//===-- MachineBlockPlacement.cpp - Basic Block Code Layout optimization --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements basic block placement transformations using the CFG
+// structure and branch probability estimates.
+//
+// The pass strives to preserve the structure of the CFG (that is, retain
+// a topological ordering of basic blocks) in the absense of a *strong* signal
+// to the contrary from probabilities. However, within the CFG structure, it
+// attempts to choose an ordering which favors placing more likely sequences of
+// blocks adjacent to each other.
+//
+// The algorithm works from the inner-most loop within a function outward, and
+// at each stage walks through the basic blocks, trying to coalesce them into
+// sequential chains where allowed by the CFG (or demanded by heavy
+// probabilities). Finally, it walks the blocks in topological order, and the
+// first time it reaches a chain of basic blocks, it schedules them in the
+// function in-order.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "block-placement2"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumCondBranches, "Number of conditional branches");
+STATISTIC(NumUncondBranches, "Number of uncondittional branches");
+STATISTIC(CondBranchTakenFreq,
+ "Potential frequency of taking conditional branches");
+STATISTIC(UncondBranchTakenFreq,
+ "Potential frequency of taking unconditional branches");
+
+namespace {
+class BlockChain;
+/// \brief Type for our function-wide basic block -> block chain mapping.
+typedef DenseMap<MachineBasicBlock *, BlockChain *> BlockToChainMapType;
+}
+
+namespace {
+/// \brief A chain of blocks which will be laid out contiguously.
+///
+/// This is the datastructure representing a chain of consecutive blocks that
+/// are profitable to layout together in order to maximize fallthrough
+/// probabilities. We also can use a block chain to represent a sequence of
+/// basic blocks which have some external (correctness) requirement for
+/// sequential layout.
+///
+/// Eventually, the block chains will form a directed graph over the function.
+/// We provide an SCC-supporting-iterator in order to quicky build and walk the
+/// SCCs of block chains within a function.
+///
+/// The block chains also have support for calculating and caching probability
+/// information related to the chain itself versus other chains. This is used
+/// for ranking during the final layout of block chains.
+class BlockChain {
+ /// \brief The sequence of blocks belonging to this chain.
+ ///
+ /// This is the sequence of blocks for a particular chain. These will be laid
+ /// out in-order within the function.
+ SmallVector<MachineBasicBlock *, 4> Blocks;
+
+ /// \brief A handle to the function-wide basic block to block chain mapping.
+ ///
+ /// This is retained in each block chain to simplify the computation of child
+ /// block chains for SCC-formation and iteration. We store the edges to child
+ /// basic blocks, and map them back to their associated chains using this
+ /// structure.
+ BlockToChainMapType &BlockToChain;
+
+public:
+ /// \brief Construct a new BlockChain.
+ ///
+ /// This builds a new block chain representing a single basic block in the
+ /// function. It also registers itself as the chain that block participates
+ /// in with the BlockToChain mapping.
+ BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB)
+ : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) {
+ assert(BB && "Cannot create a chain with a null basic block");
+ BlockToChain[BB] = this;
+ }
+
+ /// \brief Iterator over blocks within the chain.
+ typedef SmallVectorImpl<MachineBasicBlock *>::const_iterator iterator;
+
+ /// \brief Beginning of blocks within the chain.
+ iterator begin() const { return Blocks.begin(); }
+
+ /// \brief End of blocks within the chain.
+ iterator end() const { return Blocks.end(); }
+
+ /// \brief Merge a block chain into this one.
+ ///
+ /// This routine merges a block chain into this one. It takes care of forming
+ /// a contiguous sequence of basic blocks, updating the edge list, and
+ /// updating the block -> chain mapping. It does not free or tear down the
+ /// old chain, but the old chain's block list is no longer valid.
+ void merge(MachineBasicBlock *BB, BlockChain *Chain) {
+ assert(BB);
+ assert(!Blocks.empty());
+
+ // Fast path in case we don't have a chain already.
+ if (!Chain) {
+ assert(!BlockToChain[BB]);
+ Blocks.push_back(BB);
+ BlockToChain[BB] = this;
+ return;
+ }
+
+ assert(BB == *Chain->begin());
+ assert(Chain->begin() != Chain->end());
+
+ // Update the incoming blocks to point to this chain, and add them to the
+ // chain structure.
+ for (BlockChain::iterator BI = Chain->begin(), BE = Chain->end();
+ BI != BE; ++BI) {
+ Blocks.push_back(*BI);
+ assert(BlockToChain[*BI] == Chain && "Incoming blocks not in chain");
+ BlockToChain[*BI] = this;
+ }
+ }
+
+#ifndef NDEBUG
+ /// \brief Dump the blocks in this chain.
+ void dump() LLVM_ATTRIBUTE_USED {
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ (*I)->dump();
+ }
+#endif // NDEBUG
+
+ /// \brief Count of predecessors within the loop currently being processed.
+ ///
+ /// This count is updated at each loop we process to represent the number of
+ /// in-loop predecessors of this chain.
+ unsigned LoopPredecessors;
+};
+}
+
+namespace {
+class MachineBlockPlacement : public MachineFunctionPass {
+ /// \brief A typedef for a block filter set.
+ typedef SmallPtrSet<MachineBasicBlock *, 16> BlockFilterSet;
+
+ /// \brief A handle to the branch probability pass.
+ const MachineBranchProbabilityInfo *MBPI;
+
+ /// \brief A handle to the function-wide block frequency pass.
+ const MachineBlockFrequencyInfo *MBFI;
+
+ /// \brief A handle to the loop info.
+ const MachineLoopInfo *MLI;
+
+ /// \brief A handle to the target's instruction info.
+ const TargetInstrInfo *TII;
+
+ /// \brief A handle to the target's lowering info.
+ const TargetLowering *TLI;
+
+ /// \brief Allocator and owner of BlockChain structures.
+ ///
+ /// We build BlockChains lazily by merging together high probability BB
+ /// sequences acording to the "Algo2" in the paper mentioned at the top of
+ /// the file. To reduce malloc traffic, we allocate them using this slab-like
+ /// allocator, and destroy them after the pass completes.
+ SpecificBumpPtrAllocator<BlockChain> ChainAllocator;
+
+ /// \brief Function wide BasicBlock to BlockChain mapping.
+ ///
+ /// This mapping allows efficiently moving from any given basic block to the
+ /// BlockChain it participates in, if any. We use it to, among other things,
+ /// allow implicitly defining edges between chains as the existing edges
+ /// between basic blocks.
+ DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain;
+
+ void markChainSuccessors(BlockChain &Chain,
+ MachineBasicBlock *LoopHeaderBB,
+ SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
+ const BlockFilterSet *BlockFilter = 0);
+ MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB,
+ BlockChain &Chain,
+ const BlockFilterSet *BlockFilter);
+ MachineBasicBlock *selectBestCandidateBlock(
+ BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList,
+ const BlockFilterSet *BlockFilter);
+ MachineBasicBlock *getFirstUnplacedBlock(
+ MachineFunction &F,
+ const BlockChain &PlacedChain,
+ MachineFunction::iterator &PrevUnplacedBlockIt,
+ const BlockFilterSet *BlockFilter);
+ void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
+ SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
+ const BlockFilterSet *BlockFilter = 0);
+ MachineBasicBlock *findBestLoopTop(MachineFunction &F,
+ MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet);
+ void buildLoopChains(MachineFunction &F, MachineLoop &L);
+ void buildCFGChains(MachineFunction &F);
+ void AlignLoops(MachineFunction &F);
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ MachineBlockPlacement() : MachineFunctionPass(ID) {
+ initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &F);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+}
+
+char MachineBlockPlacement::ID = 0;
+char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID;
+INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement2",
+ "Branch Probability Basic Block Placement", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2",
+ "Branch Probability Basic Block Placement", false, false)
+
+#ifndef NDEBUG
+/// \brief Helper to print the name of a MBB.
+///
+/// Only used by debug logging.
+static std::string getBlockName(MachineBasicBlock *BB) {
+ std::string Result;
+ raw_string_ostream OS(Result);
+ OS << "BB#" << BB->getNumber()
+ << " (derived from LLVM BB '" << BB->getName() << "')";
+ OS.flush();
+ return Result;
+}
+
+/// \brief Helper to print the number of a MBB.
+///
+/// Only used by debug logging.
+static std::string getBlockNum(MachineBasicBlock *BB) {
+ std::string Result;
+ raw_string_ostream OS(Result);
+ OS << "BB#" << BB->getNumber();
+ OS.flush();
+ return Result;
+}
+#endif
+
+/// \brief Mark a chain's successors as having one fewer preds.
+///
+/// When a chain is being merged into the "placed" chain, this routine will
+/// quickly walk the successors of each block in the chain and mark them as
+/// having one fewer active predecessor. It also adds any successors of this
+/// chain which reach the zero-predecessor state to the worklist passed in.
+void MachineBlockPlacement::markChainSuccessors(
+ BlockChain &Chain,
+ MachineBasicBlock *LoopHeaderBB,
+ SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
+ const BlockFilterSet *BlockFilter) {
+ // Walk all the blocks in this chain, marking their successors as having
+ // a predecessor placed.
+ for (BlockChain::iterator CBI = Chain.begin(), CBE = Chain.end();
+ CBI != CBE; ++CBI) {
+ // Add any successors for which this is the only un-placed in-loop
+ // predecessor to the worklist as a viable candidate for CFG-neutral
+ // placement. No subsequent placement of this block will violate the CFG
+ // shape, so we get to use heuristics to choose a favorable placement.
+ for (MachineBasicBlock::succ_iterator SI = (*CBI)->succ_begin(),
+ SE = (*CBI)->succ_end();
+ SI != SE; ++SI) {
+ if (BlockFilter && !BlockFilter->count(*SI))
+ continue;
+ BlockChain &SuccChain = *BlockToChain[*SI];
+ // Disregard edges within a fixed chain, or edges to the loop header.
+ if (&Chain == &SuccChain || *SI == LoopHeaderBB)
+ continue;
+
+ // This is a cross-chain edge that is within the loop, so decrement the
+ // loop predecessor count of the destination chain.
+ if (SuccChain.LoopPredecessors > 0 && --SuccChain.LoopPredecessors == 0)
+ BlockWorkList.push_back(*SuccChain.begin());
+ }
+ }
+}
+
+/// \brief Select the best successor for a block.
+///
+/// This looks across all successors of a particular block and attempts to
+/// select the "best" one to be the layout successor. It only considers direct
+/// successors which also pass the block filter. It will attempt to avoid
+/// breaking CFG structure, but cave and break such structures in the case of
+/// very hot successor edges.
+///
+/// \returns The best successor block found, or null if none are viable.
+MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor(
+ MachineBasicBlock *BB, BlockChain &Chain,
+ const BlockFilterSet *BlockFilter) {
+ const BranchProbability HotProb(4, 5); // 80%
+
+ MachineBasicBlock *BestSucc = 0;
+ // FIXME: Due to the performance of the probability and weight routines in
+ // the MBPI analysis, we manually compute probabilities using the edge
+ // weights. This is suboptimal as it means that the somewhat subtle
+ // definition of edge weight semantics is encoded here as well. We should
+ // improve the MBPI interface to effeciently support query patterns such as
+ // this.
+ uint32_t BestWeight = 0;
+ uint32_t WeightScale = 0;
+ uint32_t SumWeight = MBPI->getSumForBlock(BB, WeightScale);
+ DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end();
+ SI != SE; ++SI) {
+ if (BlockFilter && !BlockFilter->count(*SI))
+ continue;
+ BlockChain &SuccChain = *BlockToChain[*SI];
+ if (&SuccChain == &Chain) {
+ DEBUG(dbgs() << " " << getBlockName(*SI) << " -> Already merged!\n");
+ continue;
+ }
+ if (*SI != *SuccChain.begin()) {
+ DEBUG(dbgs() << " " << getBlockName(*SI) << " -> Mid chain!\n");
+ continue;
+ }
+
+ uint32_t SuccWeight = MBPI->getEdgeWeight(BB, *SI);
+ BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
+
+ // Only consider successors which are either "hot", or wouldn't violate
+ // any CFG constraints.
+ if (SuccChain.LoopPredecessors != 0) {
+ if (SuccProb < HotProb) {
+ DEBUG(dbgs() << " " << getBlockName(*SI) << " -> CFG conflict\n");
+ continue;
+ }
+
+ // Make sure that a hot successor doesn't have a globally more important
+ // predecessor.
+ BlockFrequency CandidateEdgeFreq
+ = MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl();
+ bool BadCFGConflict = false;
+ for (MachineBasicBlock::pred_iterator PI = (*SI)->pred_begin(),
+ PE = (*SI)->pred_end();
+ PI != PE; ++PI) {
+ if (*PI == *SI || (BlockFilter && !BlockFilter->count(*PI)) ||
+ BlockToChain[*PI] == &Chain)
+ continue;
+ BlockFrequency PredEdgeFreq
+ = MBFI->getBlockFreq(*PI) * MBPI->getEdgeProbability(*PI, *SI);
+ if (PredEdgeFreq >= CandidateEdgeFreq) {
+ BadCFGConflict = true;
+ break;
+ }
+ }
+ if (BadCFGConflict) {
+ DEBUG(dbgs() << " " << getBlockName(*SI)
+ << " -> non-cold CFG conflict\n");
+ continue;
+ }
+ }
+
+ DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb
+ << " (prob)"
+ << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "")
+ << "\n");
+ if (BestSucc && BestWeight >= SuccWeight)
+ continue;
+ BestSucc = *SI;
+ BestWeight = SuccWeight;
+ }
+ return BestSucc;
+}
+
+namespace {
+/// \brief Predicate struct to detect blocks already placed.
+class IsBlockPlaced {
+ const BlockChain &PlacedChain;
+ const BlockToChainMapType &BlockToChain;
+
+public:
+ IsBlockPlaced(const BlockChain &PlacedChain,
+ const BlockToChainMapType &BlockToChain)
+ : PlacedChain(PlacedChain), BlockToChain(BlockToChain) {}
+
+ bool operator()(MachineBasicBlock *BB) const {
+ return BlockToChain.lookup(BB) == &PlacedChain;
+ }
+};
+}
+
+/// \brief Select the best block from a worklist.
+///
+/// This looks through the provided worklist as a list of candidate basic
+/// blocks and select the most profitable one to place. The definition of
+/// profitable only really makes sense in the context of a loop. This returns
+/// the most frequently visited block in the worklist, which in the case of
+/// a loop, is the one most desirable to be physically close to the rest of the
+/// loop body in order to improve icache behavior.
+///
+/// \returns The best block found, or null if none are viable.
+MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
+ BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList,
+ const BlockFilterSet *BlockFilter) {
+ // Once we need to walk the worklist looking for a candidate, cleanup the
+ // worklist of already placed entries.
+ // FIXME: If this shows up on profiles, it could be folded (at the cost of
+ // some code complexity) into the loop below.
+ WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(),
+ IsBlockPlaced(Chain, BlockToChain)),
+ WorkList.end());
+
+ MachineBasicBlock *BestBlock = 0;
+ BlockFrequency BestFreq;
+ for (SmallVectorImpl<MachineBasicBlock *>::iterator WBI = WorkList.begin(),
+ WBE = WorkList.end();
+ WBI != WBE; ++WBI) {
+ BlockChain &SuccChain = *BlockToChain[*WBI];
+ if (&SuccChain == &Chain) {
+ DEBUG(dbgs() << " " << getBlockName(*WBI)
+ << " -> Already merged!\n");
+ continue;
+ }
+ assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block");
+
+ BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI);
+ DEBUG(dbgs() << " " << getBlockName(*WBI) << " -> " << CandidateFreq
+ << " (freq)\n");
+ if (BestBlock && BestFreq >= CandidateFreq)
+ continue;
+ BestBlock = *WBI;
+ BestFreq = CandidateFreq;
+ }
+ return BestBlock;
+}
+
+/// \brief Retrieve the first unplaced basic block.
+///
+/// This routine is called when we are unable to use the CFG to walk through
+/// all of the basic blocks and form a chain due to unnatural loops in the CFG.
+/// We walk through the function's blocks in order, starting from the
+/// LastUnplacedBlockIt. We update this iterator on each call to avoid
+/// re-scanning the entire sequence on repeated calls to this routine.
+MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
+ MachineFunction &F, const BlockChain &PlacedChain,
+ MachineFunction::iterator &PrevUnplacedBlockIt,
+ const BlockFilterSet *BlockFilter) {
+ for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F.end(); I != E;
+ ++I) {
+ if (BlockFilter && !BlockFilter->count(I))
+ continue;
+ if (BlockToChain[I] != &PlacedChain) {
+ PrevUnplacedBlockIt = I;
+ // Now select the head of the chain to which the unplaced block belongs
+ // as the block to place. This will force the entire chain to be placed,
+ // and satisfies the requirements of merging chains.
+ return *BlockToChain[I]->begin();
+ }
+ }
+ return 0;
+}
+
+void MachineBlockPlacement::buildChain(
+ MachineBasicBlock *BB,
+ BlockChain &Chain,
+ SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
+ const BlockFilterSet *BlockFilter) {
+ assert(BB);
+ assert(BlockToChain[BB] == &Chain);
+ MachineFunction &F = *BB->getParent();
+ MachineFunction::iterator PrevUnplacedBlockIt = F.begin();
+
+ MachineBasicBlock *LoopHeaderBB = BB;
+ markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter);
+ BB = *llvm::prior(Chain.end());
+ for (;;) {
+ assert(BB);
+ assert(BlockToChain[BB] == &Chain);
+ assert(*llvm::prior(Chain.end()) == BB);
+ MachineBasicBlock *BestSucc = 0;
+
+ // Look for the best viable successor if there is one to place immediately
+ // after this block.
+ BestSucc = selectBestSuccessor(BB, Chain, BlockFilter);
+
+ // If an immediate successor isn't available, look for the best viable
+ // block among those we've identified as not violating the loop's CFG at
+ // this point. This won't be a fallthrough, but it will increase locality.
+ if (!BestSucc)
+ BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter);
+
+ if (!BestSucc) {
+ BestSucc = getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt,
+ BlockFilter);
+ if (!BestSucc)
+ break;
+
+ DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the "
+ "layout successor until the CFG reduces\n");
+ }
+
+ // Place this block, updating the datastructures to reflect its placement.
+ BlockChain &SuccChain = *BlockToChain[BestSucc];
+ // Zero out LoopPredecessors for the successor we're about to merge in case
+ // we selected a successor that didn't fit naturally into the CFG.
+ SuccChain.LoopPredecessors = 0;
+ DEBUG(dbgs() << "Merging from " << getBlockNum(BB)
+ << " to " << getBlockNum(BestSucc) << "\n");
+ markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter);
+ Chain.merge(BestSucc, &SuccChain);
+ BB = *llvm::prior(Chain.end());
+ }
+
+ DEBUG(dbgs() << "Finished forming chain for header block "
+ << getBlockNum(*Chain.begin()) << "\n");
+}
+
+/// \brief Find the best loop top block for layout.
+///
+/// This routine implements the logic to analyze the loop looking for the best
+/// block to layout at the top of the loop. Typically this is done to maximize
+/// fallthrough opportunities.
+MachineBasicBlock *
+MachineBlockPlacement::findBestLoopTop(MachineFunction &F,
+ MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet) {
+ // We don't want to layout the loop linearly in all cases. If the loop header
+ // is just a normal basic block in the loop, we want to look for what block
+ // within the loop is the best one to layout at the top. However, if the loop
+ // header has be pre-merged into a chain due to predecessors not having
+ // analyzable branches, *and* the predecessor it is merged with is *not* part
+ // of the loop, rotating the header into the middle of the loop will create
+ // a non-contiguous range of blocks which is Very Bad. So start with the
+ // header and only rotate if safe.
+ BlockChain &HeaderChain = *BlockToChain[L.getHeader()];
+ if (!LoopBlockSet.count(*HeaderChain.begin()))
+ return L.getHeader();
+
+ BlockFrequency BestExitEdgeFreq;
+ MachineBasicBlock *ExitingBB = 0;
+ MachineBasicBlock *LoopingBB = 0;
+ // If there are exits to outer loops, loop rotation can severely limit
+ // fallthrough opportunites unless it selects such an exit. Keep a set of
+ // blocks where rotating to exit with that block will reach an outer loop.
+ SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop;
+
+ DEBUG(dbgs() << "Finding best loop exit for: "
+ << getBlockName(L.getHeader()) << "\n");
+ for (MachineLoop::block_iterator I = L.block_begin(),
+ E = L.block_end();
+ I != E; ++I) {
+ BlockChain &Chain = *BlockToChain[*I];
+ // Ensure that this block is at the end of a chain; otherwise it could be
+ // mid-way through an inner loop or a successor of an analyzable branch.
+ if (*I != *llvm::prior(Chain.end()))
+ continue;
+
+ // Now walk the successors. We need to establish whether this has a viable
+ // exiting successor and whether it has a viable non-exiting successor.
+ // We store the old exiting state and restore it if a viable looping
+ // successor isn't found.
+ MachineBasicBlock *OldExitingBB = ExitingBB;
+ BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq;
+ // We also compute and store the best looping successor for use in layout.
+ MachineBasicBlock *BestLoopSucc = 0;
+ // FIXME: Due to the performance of the probability and weight routines in
+ // the MBPI analysis, we use the internal weights. This is only valid
+ // because it is purely a ranking function, we don't care about anything
+ // but the relative values.
+ uint32_t BestLoopSuccWeight = 0;
+ // FIXME: We also manually compute the probabilities to avoid quadratic
+ // behavior.
+ uint32_t WeightScale = 0;
+ uint32_t SumWeight = MBPI->getSumForBlock(*I, WeightScale);
+ for (MachineBasicBlock::succ_iterator SI = (*I)->succ_begin(),
+ SE = (*I)->succ_end();
+ SI != SE; ++SI) {
+ if ((*SI)->isLandingPad())
+ continue;
+ if (*SI == *I)
+ continue;
+ BlockChain &SuccChain = *BlockToChain[*SI];
+ // Don't split chains, either this chain or the successor's chain.
+ if (&Chain == &SuccChain || *SI != *SuccChain.begin()) {
+ DEBUG(dbgs() << " " << (LoopBlockSet.count(*SI) ? "looping: "
+ : "exiting: ")
+ << getBlockName(*I) << " -> "
+ << getBlockName(*SI) << " (chain conflict)\n");
+ continue;
+ }
+
+ uint32_t SuccWeight = MBPI->getEdgeWeight(*I, *SI);
+ if (LoopBlockSet.count(*SI)) {
+ DEBUG(dbgs() << " looping: " << getBlockName(*I) << " -> "
+ << getBlockName(*SI) << " (" << SuccWeight << ")\n");
+ if (BestLoopSucc && BestLoopSuccWeight >= SuccWeight)
+ continue;
+
+ BestLoopSucc = *SI;
+ BestLoopSuccWeight = SuccWeight;
+ continue;
+ }
+
+ BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
+ BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(*I) * SuccProb;
+ DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> "
+ << getBlockName(*SI) << " (" << ExitEdgeFreq << ")\n");
+ // Note that we slightly bias this toward an existing layout successor to
+ // retain incoming order in the absence of better information.
+ // FIXME: Should we bias this more strongly? It's pretty weak.
+ if (!ExitingBB || ExitEdgeFreq > BestExitEdgeFreq ||
+ ((*I)->isLayoutSuccessor(*SI) &&
+ !(ExitEdgeFreq < BestExitEdgeFreq))) {
+ BestExitEdgeFreq = ExitEdgeFreq;
+ ExitingBB = *I;
+ }
+
+ if (MachineLoop *ExitLoop = MLI->getLoopFor(*SI))
+ if (ExitLoop->contains(&L))
+ BlocksExitingToOuterLoop.insert(*I);
+ }
+
+ // Restore the old exiting state, no viable looping successor was found.
+ if (!BestLoopSucc) {
+ ExitingBB = OldExitingBB;
+ BestExitEdgeFreq = OldBestExitEdgeFreq;
+ continue;
+ }
+
+ // If this was best exiting block thus far, also record the looping block.
+ if (ExitingBB == *I)
+ LoopingBB = BestLoopSucc;
+ }
+ // Without a candidate exitting block or with only a single block in the
+ // loop, just use the loop header to layout the loop.
+ if (!ExitingBB || L.getNumBlocks() == 1)
+ return L.getHeader();
+
+ // Also, if we have exit blocks which lead to outer loops but didn't select
+ // one of them as the exiting block we are rotating toward, disable loop
+ // rotation altogether.
+ if (!BlocksExitingToOuterLoop.empty() &&
+ !BlocksExitingToOuterLoop.count(ExitingBB))
+ return L.getHeader();
+
+ assert(LoopingBB && "All successors of a loop block are exit blocks!");
+ DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB) << "\n");
+ DEBUG(dbgs() << " Best top block: " << getBlockName(LoopingBB) << "\n");
+ return LoopingBB;
+}
+
+/// \brief Forms basic block chains from the natural loop structures.
+///
+/// These chains are designed to preserve the existing *structure* of the code
+/// as much as possible. We can then stitch the chains together in a way which
+/// both preserves the topological structure and minimizes taken conditional
+/// branches.
+void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
+ MachineLoop &L) {
+ // First recurse through any nested loops, building chains for those inner
+ // loops.
+ for (MachineLoop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI)
+ buildLoopChains(F, **LI);
+
+ SmallVector<MachineBasicBlock *, 16> BlockWorkList;
+ BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end());
+
+ MachineBasicBlock *LayoutTop = findBestLoopTop(F, L, LoopBlockSet);
+ BlockChain &LoopChain = *BlockToChain[LayoutTop];
+
+ // FIXME: This is a really lame way of walking the chains in the loop: we
+ // walk the blocks, and use a set to prevent visiting a particular chain
+ // twice.
+ SmallPtrSet<BlockChain *, 4> UpdatedPreds;
+ assert(LoopChain.LoopPredecessors == 0);
+ UpdatedPreds.insert(&LoopChain);
+ for (MachineLoop::block_iterator BI = L.block_begin(),
+ BE = L.block_end();
+ BI != BE; ++BI) {
+ BlockChain &Chain = *BlockToChain[*BI];
+ if (!UpdatedPreds.insert(&Chain))
+ continue;
+
+ assert(Chain.LoopPredecessors == 0);
+ for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end();
+ BCI != BCE; ++BCI) {
+ assert(BlockToChain[*BCI] == &Chain);
+ for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(),
+ PE = (*BCI)->pred_end();
+ PI != PE; ++PI) {
+ if (BlockToChain[*PI] == &Chain || !LoopBlockSet.count(*PI))
+ continue;
+ ++Chain.LoopPredecessors;
+ }
+ }
+
+ if (Chain.LoopPredecessors == 0)
+ BlockWorkList.push_back(*Chain.begin());
+ }
+
+ buildChain(LayoutTop, LoopChain, BlockWorkList, &LoopBlockSet);
+
+ DEBUG({
+ // Crash at the end so we get all of the debugging output first.
+ bool BadLoop = false;
+ if (LoopChain.LoopPredecessors) {
+ BadLoop = true;
+ dbgs() << "Loop chain contains a block without its preds placed!\n"
+ << " Loop header: " << getBlockName(*L.block_begin()) << "\n"
+ << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n";
+ }
+ for (BlockChain::iterator BCI = LoopChain.begin(), BCE = LoopChain.end();
+ BCI != BCE; ++BCI)
+ if (!LoopBlockSet.erase(*BCI)) {
+ // We don't mark the loop as bad here because there are real situations
+ // where this can occur. For example, with an unanalyzable fallthrough
+ // from a loop block to a non-loop block or vice versa.
+ dbgs() << "Loop chain contains a block not contained by the loop!\n"
+ << " Loop header: " << getBlockName(*L.block_begin()) << "\n"
+ << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
+ << " Bad block: " << getBlockName(*BCI) << "\n";
+ }
+
+ if (!LoopBlockSet.empty()) {
+ BadLoop = true;
+ for (BlockFilterSet::iterator LBI = LoopBlockSet.begin(),
+ LBE = LoopBlockSet.end();
+ LBI != LBE; ++LBI)
+ dbgs() << "Loop contains blocks never placed into a chain!\n"
+ << " Loop header: " << getBlockName(*L.block_begin()) << "\n"
+ << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
+ << " Bad block: " << getBlockName(*LBI) << "\n";
+ }
+ assert(!BadLoop && "Detected problems with the placement of this loop.");
+ });
+}
+
+void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
+ // Ensure that every BB in the function has an associated chain to simplify
+ // the assumptions of the remaining algorithm.
+ SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+ MachineBasicBlock *BB = FI;
+ BlockChain *Chain
+ = new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
+ // Also, merge any blocks which we cannot reason about and must preserve
+ // the exact fallthrough behavior for.
+ for (;;) {
+ Cond.clear();
+ MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
+ if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough())
+ break;
+
+ MachineFunction::iterator NextFI(llvm::next(FI));
+ MachineBasicBlock *NextBB = NextFI;
+ // Ensure that the layout successor is a viable block, as we know that
+ // fallthrough is a possibility.
+ assert(NextFI != FE && "Can't fallthrough past the last block.");
+ DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: "
+ << getBlockName(BB) << " -> " << getBlockName(NextBB)
+ << "\n");
+ Chain->merge(NextBB, 0);
+ FI = NextFI;
+ BB = NextBB;
+ }
+ }
+
+ // Build any loop-based chains.
+ for (MachineLoopInfo::iterator LI = MLI->begin(), LE = MLI->end(); LI != LE;
+ ++LI)
+ buildLoopChains(F, **LI);
+
+ SmallVector<MachineBasicBlock *, 16> BlockWorkList;
+
+ SmallPtrSet<BlockChain *, 4> UpdatedPreds;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+ MachineBasicBlock *BB = &*FI;
+ BlockChain &Chain = *BlockToChain[BB];
+ if (!UpdatedPreds.insert(&Chain))
+ continue;
+
+ assert(Chain.LoopPredecessors == 0);
+ for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end();
+ BCI != BCE; ++BCI) {
+ assert(BlockToChain[*BCI] == &Chain);
+ for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(),
+ PE = (*BCI)->pred_end();
+ PI != PE; ++PI) {
+ if (BlockToChain[*PI] == &Chain)
+ continue;
+ ++Chain.LoopPredecessors;
+ }
+ }
+
+ if (Chain.LoopPredecessors == 0)
+ BlockWorkList.push_back(*Chain.begin());
+ }
+
+ BlockChain &FunctionChain = *BlockToChain[&F.front()];
+ buildChain(&F.front(), FunctionChain, BlockWorkList);
+
+ typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType;
+ DEBUG({
+ // Crash at the end so we get all of the debugging output first.
+ bool BadFunc = false;
+ FunctionBlockSetType FunctionBlockSet;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
+ FunctionBlockSet.insert(FI);
+
+ for (BlockChain::iterator BCI = FunctionChain.begin(),
+ BCE = FunctionChain.end();
+ BCI != BCE; ++BCI)
+ if (!FunctionBlockSet.erase(*BCI)) {
+ BadFunc = true;
+ dbgs() << "Function chain contains a block not in the function!\n"
+ << " Bad block: " << getBlockName(*BCI) << "\n";
+ }
+
+ if (!FunctionBlockSet.empty()) {
+ BadFunc = true;
+ for (FunctionBlockSetType::iterator FBI = FunctionBlockSet.begin(),
+ FBE = FunctionBlockSet.end();
+ FBI != FBE; ++FBI)
+ dbgs() << "Function contains blocks never placed into a chain!\n"
+ << " Bad block: " << getBlockName(*FBI) << "\n";
+ }
+ assert(!BadFunc && "Detected problems with the block placement.");
+ });
+
+ // Splice the blocks into place.
+ MachineFunction::iterator InsertPos = F.begin();
+ for (BlockChain::iterator BI = FunctionChain.begin(),
+ BE = FunctionChain.end();
+ BI != BE; ++BI) {
+ DEBUG(dbgs() << (BI == FunctionChain.begin() ? "Placing chain "
+ : " ... ")
+ << getBlockName(*BI) << "\n");
+ if (InsertPos != MachineFunction::iterator(*BI))
+ F.splice(InsertPos, *BI);
+ else
+ ++InsertPos;
+
+ // Update the terminator of the previous block.
+ if (BI == FunctionChain.begin())
+ continue;
+ MachineBasicBlock *PrevBB = llvm::prior(MachineFunction::iterator(*BI));
+
+ // FIXME: It would be awesome of updateTerminator would just return rather
+ // than assert when the branch cannot be analyzed in order to remove this
+ // boiler plate.
+ Cond.clear();
+ MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
+ if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond))
+ PrevBB->updateTerminator();
+ }
+
+ // Fixup the last block.
+ Cond.clear();
+ MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
+ if (!TII->AnalyzeBranch(F.back(), TBB, FBB, Cond))
+ F.back().updateTerminator();
+}
+
+/// \brief Recursive helper to align a loop and any nested loops.
+static void AlignLoop(MachineFunction &F, MachineLoop *L, unsigned Align) {
+ // Recurse through nested loops.
+ for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ AlignLoop(F, *I, Align);
+
+ L->getTopBlock()->setAlignment(Align);
+}
+
+/// \brief Align loop headers to target preferred alignments.
+void MachineBlockPlacement::AlignLoops(MachineFunction &F) {
+ if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+ return;
+
+ unsigned Align = TLI->getPrefLoopAlignment();
+ if (!Align)
+ return; // Don't care about loop alignment.
+
+ for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I)
+ AlignLoop(F, *I, Align);
+}
+
+bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
+ // Check for single-block functions and skip them.
+ if (llvm::next(F.begin()) == F.end())
+ return false;
+
+ MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ MLI = &getAnalysis<MachineLoopInfo>();
+ TII = F.getTarget().getInstrInfo();
+ TLI = F.getTarget().getTargetLowering();
+ assert(BlockToChain.empty());
+
+ buildCFGChains(F);
+ AlignLoops(F);
+
+ BlockToChain.clear();
+ ChainAllocator.DestroyAll();
+
+ // We always return true as we have no way to track whether the final order
+ // differs from the original order.
+ return true;
+}
+
+namespace {
+/// \brief A pass to compute block placement statistics.
+///
+/// A separate pass to compute interesting statistics for evaluating block
+/// placement. This is separate from the actual placement pass so that they can
+/// be computed in the absense of any placement transformations or when using
+/// alternative placement strategies.
+class MachineBlockPlacementStats : public MachineFunctionPass {
+ /// \brief A handle to the branch probability pass.
+ const MachineBranchProbabilityInfo *MBPI;
+
+ /// \brief A handle to the function-wide block frequency pass.
+ const MachineBlockFrequencyInfo *MBFI;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ MachineBlockPlacementStats() : MachineFunctionPass(ID) {
+ initializeMachineBlockPlacementStatsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &F);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+}
+
+char MachineBlockPlacementStats::ID = 0;
+char &llvm::MachineBlockPlacementStatsID = MachineBlockPlacementStats::ID;
+INITIALIZE_PASS_BEGIN(MachineBlockPlacementStats, "block-placement-stats",
+ "Basic Block Placement Stats", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_END(MachineBlockPlacementStats, "block-placement-stats",
+ "Basic Block Placement Stats", false, false)
+
+bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) {
+ // Check for single-block functions and skip them.
+ if (llvm::next(F.begin()) == F.end())
+ return false;
+
+ MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+
+ for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ BlockFrequency BlockFreq = MBFI->getBlockFreq(I);
+ Statistic &NumBranches = (I->succ_size() > 1) ? NumCondBranches
+ : NumUncondBranches;
+ Statistic &BranchTakenFreq = (I->succ_size() > 1) ? CondBranchTakenFreq
+ : UncondBranchTakenFreq;
+ for (MachineBasicBlock::succ_iterator SI = I->succ_begin(),
+ SE = I->succ_end();
+ SI != SE; ++SI) {
+ // Skip if this successor is a fallthrough.
+ if (I->isLayoutSuccessor(*SI))
+ continue;
+
+ BlockFrequency EdgeFreq = BlockFreq * MBPI->getEdgeProbability(I, *SI);
+ ++NumBranches;
+ BranchTakenFreq += EdgeFreq.getFrequency();
+ }
+ }
+
+ return false;
+}
+
diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index c13fa6bc5333..0cc1af07952d 100644
--- a/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -26,26 +26,43 @@ INITIALIZE_PASS_END(MachineBranchProbabilityInfo, "machine-branch-prob",
char MachineBranchProbabilityInfo::ID = 0;
-uint32_t MachineBranchProbabilityInfo::
-getSumForBlock(MachineBasicBlock *MBB) const {
- uint32_t Sum = 0;
+void MachineBranchProbabilityInfo::anchor() { }
+uint32_t MachineBranchProbabilityInfo::
+getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const {
+ // First we compute the sum with 64-bits of precision, ensuring that cannot
+ // overflow by bounding the number of weights considered. Hopefully no one
+ // actually needs 2^32 successors.
+ assert(MBB->succ_size() < UINT32_MAX);
+ uint64_t Sum = 0;
+ Scale = 1;
for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
E = MBB->succ_end(); I != E; ++I) {
- MachineBasicBlock *Succ = *I;
- uint32_t Weight = getEdgeWeight(MBB, Succ);
- uint32_t PrevSum = Sum;
-
+ uint32_t Weight = getEdgeWeight(MBB, *I);
Sum += Weight;
- assert(Sum > PrevSum); (void) PrevSum;
}
+ // If the computed sum fits in 32-bits, we're done.
+ if (Sum <= UINT32_MAX)
+ return Sum;
+
+ // Otherwise, compute the scale necessary to cause the weights to fit, and
+ // re-sum with that scale applied.
+ assert((Sum / UINT32_MAX) < UINT32_MAX);
+ Scale = (Sum / UINT32_MAX) + 1;
+ Sum = 0;
+ for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I) {
+ uint32_t Weight = getEdgeWeight(MBB, *I);
+ Sum += Weight / Scale;
+ }
+ assert(Sum <= UINT32_MAX);
return Sum;
}
uint32_t
-MachineBranchProbabilityInfo::getEdgeWeight(MachineBasicBlock *Src,
- MachineBasicBlock *Dst) const {
+MachineBranchProbabilityInfo::getEdgeWeight(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const {
uint32_t Weight = Src->getSuccWeight(Dst);
if (!Weight)
return DEFAULT_WEIGHT;
@@ -55,37 +72,24 @@ MachineBranchProbabilityInfo::getEdgeWeight(MachineBasicBlock *Src,
bool MachineBranchProbabilityInfo::isEdgeHot(MachineBasicBlock *Src,
MachineBasicBlock *Dst) const {
// Hot probability is at least 4/5 = 80%
- uint32_t Weight = getEdgeWeight(Src, Dst);
- uint32_t Sum = getSumForBlock(Src);
-
- // FIXME: Implement BranchProbability::compare then change this code to
- // compare this BranchProbability against a static "hot" BranchProbability.
- return (uint64_t)Weight * 5 > (uint64_t)Sum * 4;
+ // FIXME: Compare against a static "hot" BranchProbability.
+ return getEdgeProbability(Src, Dst) > BranchProbability(4, 5);
}
MachineBasicBlock *
MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const {
- uint32_t Sum = 0;
uint32_t MaxWeight = 0;
MachineBasicBlock *MaxSucc = 0;
-
for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
E = MBB->succ_end(); I != E; ++I) {
- MachineBasicBlock *Succ = *I;
- uint32_t Weight = getEdgeWeight(MBB, Succ);
- uint32_t PrevSum = Sum;
-
- Sum += Weight;
- assert(Sum > PrevSum); (void) PrevSum;
-
+ uint32_t Weight = getEdgeWeight(MBB, *I);
if (Weight > MaxWeight) {
MaxWeight = Weight;
- MaxSucc = Succ;
+ MaxSucc = *I;
}
}
- // FIXME: Use BranchProbability::compare.
- if ((uint64_t)MaxWeight * 5 >= (uint64_t)Sum * 4)
+ if (getEdgeProbability(MBB, MaxSucc) >= BranchProbability(4, 5))
return MaxSucc;
return 0;
@@ -94,8 +98,9 @@ MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const {
BranchProbability
MachineBranchProbabilityInfo::getEdgeProbability(MachineBasicBlock *Src,
MachineBasicBlock *Dst) const {
- uint32_t N = getEdgeWeight(Src, Dst);
- uint32_t D = getSumForBlock(Src);
+ uint32_t Scale = 1;
+ uint32_t D = getSumForBlock(Src, Scale);
+ uint32_t N = getEdgeWeight(Src, Dst) / Scale;
return BranchProbability(N, D);
}
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 7eda8c129dc4..a63688e9ec62 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -26,13 +26,14 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/RecyclingAllocator.h"
-
using namespace llvm;
STATISTIC(NumCoalesces, "Number of copies coalesced");
STATISTIC(NumCSEs, "Number of common subexpression eliminated");
STATISTIC(NumPhysCSEs,
"Number of physreg referencing common subexpr eliminated");
+STATISTIC(NumCrossBBCSEs,
+ "Number of cross-MBB physreg referencing CS eliminated");
STATISTIC(NumCommutes, "Number of copies coalesced after commuting");
namespace {
@@ -49,7 +50,7 @@ namespace {
}
virtual bool runOnMachineFunction(MachineFunction &MF);
-
+
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
@@ -62,6 +63,8 @@ namespace {
virtual void releaseMemory() {
ScopeMap.clear();
Exps.clear();
+ AllocatableRegs.clear();
+ ReservedRegs.clear();
}
private:
@@ -75,6 +78,8 @@ namespace {
ScopedHTType VNT;
SmallVector<MachineInstr*, 64> Exps;
unsigned CurrVN;
+ BitVector AllocatableRegs;
+ BitVector ReservedRegs;
bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB);
bool isPhysDefTriviallyDead(unsigned Reg,
@@ -82,9 +87,12 @@ namespace {
MachineBasicBlock::const_iterator E) const ;
bool hasLivePhysRegDefUses(const MachineInstr *MI,
const MachineBasicBlock *MBB,
- SmallSet<unsigned,8> &PhysRefs) const;
+ SmallSet<unsigned,8> &PhysRefs,
+ SmallVector<unsigned,2> &PhysDefs) const;
bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
- SmallSet<unsigned,8> &PhysRefs) const;
+ SmallSet<unsigned,8> &PhysRefs,
+ SmallVector<unsigned,2> &PhysDefs,
+ bool &NonLocal) const;
bool isCSECandidate(MachineInstr *MI);
bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
MachineInstr *CSMI, MachineInstr *MI);
@@ -99,6 +107,7 @@ namespace {
} // end anonymous namespace
char MachineCSE::ID = 0;
+char &llvm::MachineCSEID = MachineCSE::ID;
INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse",
"Machine Common Subexpression Elimination", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
@@ -106,8 +115,6 @@ INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(MachineCSE, "machine-cse",
"Machine Common Subexpression Elimination", false, false)
-FunctionPass *llvm::createMachineCSEPass() { return new MachineCSE(); }
-
bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
MachineBasicBlock *MBB) {
bool Changed = false;
@@ -163,6 +170,8 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
bool SeenDef = false;
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = I->getOperand(i);
+ if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
+ SeenDef = true;
if (!MO.isReg() || !MO.getReg())
continue;
if (!TRI->regsOverlap(MO.getReg(), Reg))
@@ -173,7 +182,7 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
SeenDef = true;
}
if (SeenDef)
- // See a def of Reg (or an alias) before encountering any use, it's
+ // See a def of Reg (or an alias) before encountering any use, it's
// trivially dead.
return true;
@@ -189,7 +198,8 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
/// instruction does not uses a physical register.
bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
const MachineBasicBlock *MBB,
- SmallSet<unsigned,8> &PhysRefs) const {
+ SmallSet<unsigned,8> &PhysRefs,
+ SmallVector<unsigned,2> &PhysDefs) const{
MachineBasicBlock::const_iterator I = MI; I = llvm::next(I);
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
@@ -207,7 +217,9 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
(MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end())))
continue;
PhysRefs.insert(Reg);
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
+ if (MO.isDef())
+ PhysDefs.push_back(Reg);
+ for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
PhysRefs.insert(*Alias);
}
@@ -215,25 +227,56 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
}
bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
- SmallSet<unsigned,8> &PhysRefs) const {
+ SmallSet<unsigned,8> &PhysRefs,
+ SmallVector<unsigned,2> &PhysDefs,
+ bool &NonLocal) const {
// For now conservatively returns false if the common subexpression is
- // not in the same basic block as the given instruction.
- MachineBasicBlock *MBB = MI->getParent();
- if (CSMI->getParent() != MBB)
- return false;
+ // not in the same basic block as the given instruction. The only exception
+ // is if the common subexpression is in the sole predecessor block.
+ const MachineBasicBlock *MBB = MI->getParent();
+ const MachineBasicBlock *CSMBB = CSMI->getParent();
+
+ bool CrossMBB = false;
+ if (CSMBB != MBB) {
+ if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB)
+ return false;
+
+ for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
+ if (AllocatableRegs.test(PhysDefs[i]) || ReservedRegs.test(PhysDefs[i]))
+ // Avoid extending live range of physical registers if they are
+ //allocatable or reserved.
+ return false;
+ }
+ CrossMBB = true;
+ }
MachineBasicBlock::const_iterator I = CSMI; I = llvm::next(I);
MachineBasicBlock::const_iterator E = MI;
+ MachineBasicBlock::const_iterator EE = CSMBB->end();
unsigned LookAheadLeft = LookAheadLimit;
while (LookAheadLeft) {
// Skip over dbg_value's.
- while (I != E && I->isDebugValue())
+ while (I != E && I != EE && I->isDebugValue())
++I;
+ if (I == EE) {
+ assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
+ (void)CrossMBB;
+ CrossMBB = false;
+ NonLocal = true;
+ I = MBB->begin();
+ EE = MBB->end();
+ continue;
+ }
+
if (I == E)
return true;
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = I->getOperand(i);
+ // RegMasks go on instructions like calls that clobber lots of physregs.
+ // Don't attempt to CSE across such an instruction.
+ if (MO.isRegMask())
+ return false;
if (!MO.isReg() || !MO.isDef())
continue;
unsigned MOReg = MO.getReg();
@@ -260,12 +303,11 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) {
return false;
// Ignore stuff that we obviously can't move.
- const MCInstrDesc &MCID = MI->getDesc();
- if (MCID.mayStore() || MCID.isCall() || MCID.isTerminator() ||
+ if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
MI->hasUnmodeledSideEffects())
return false;
- if (MCID.mayLoad()) {
+ if (MI->mayLoad()) {
// Okay, this instruction does a load. As a refinement, we allow the target
// to decide whether the loaded value is actually a constant. If so, we can
// actually use it as a load.
@@ -287,7 +329,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
// Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
// an immediate predecessor. We don't want to increase register pressure and
// end up causing other computation to be spilled.
- if (MI->getDesc().isAsCheapAsAMove()) {
+ if (MI->isAsCheapAsAMove()) {
MachineBasicBlock *CSBB = CSMI->getParent();
MachineBasicBlock *BB = MI->getParent();
if (CSBB != BB && !CSBB->isSuccessor(BB))
@@ -376,7 +418,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// Commute commutable instructions.
bool Commuted = false;
- if (!FoundCSE && MI->getDesc().isCommutable()) {
+ if (!FoundCSE && MI->isCommutable()) {
MachineInstr *NewMI = TII->commuteInstruction(MI);
if (NewMI) {
Commuted = true;
@@ -394,16 +436,18 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// If the instruction defines physical registers and the values *may* be
// used, then it's not safe to replace it with a common subexpression.
// It's also not safe if the instruction uses physical registers.
+ bool CrossMBBPhysDef = false;
SmallSet<unsigned,8> PhysRefs;
- if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs)) {
+ SmallVector<unsigned, 2> PhysDefs;
+ if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, PhysDefs)) {
FoundCSE = false;
- // ... Unless the CS is local and it also defines the physical register
- // which is not clobbered in between and the physical register uses
- // were not clobbered.
+ // ... Unless the CS is local or is in the sole predecessor block
+ // and it also defines the physical register which is not clobbered
+ // in between and the physical register uses were not clobbered.
unsigned CSVN = VNT.lookup(MI);
MachineInstr *CSMI = Exps[CSVN];
- if (PhysRegDefsReach(CSMI, MI, PhysRefs))
+ if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
FoundCSE = true;
}
@@ -458,6 +502,18 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second);
MRI->clearKillFlags(CSEPairs[i].second);
}
+
+ if (CrossMBBPhysDef) {
+ // Add physical register defs now coming in from a predecessor to MBB
+ // livein list.
+ while (!PhysDefs.empty()) {
+ unsigned LiveIn = PhysDefs.pop_back_val();
+ if (!MBB->isLiveIn(LiveIn))
+ MBB->addLiveIn(LiveIn);
+ }
+ ++NumCrossBBCSEs;
+ }
+
MI->eraseFromParent();
++NumCSEs;
if (!PhysRefs.empty())
@@ -542,5 +598,7 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
AA = &getAnalysis<AliasAnalysis>();
DT = &getAnalysis<MachineDominatorTree>();
+ AllocatableRegs = TRI->getAllocatableSet(MF);
+ ReservedRegs = TRI->getReservedRegs(MF);
return PerformCSE(DT->getRootNode());
}
diff --git a/lib/CodeGen/MachineCodeEmitter.cpp b/lib/CodeGen/MachineCodeEmitter.cpp
new file mode 100644
index 000000000000..81b49784c052
--- /dev/null
+++ b/lib/CodeGen/MachineCodeEmitter.cpp
@@ -0,0 +1,14 @@
+//===-- llvm/CodeGen/MachineCodeEmitter.cpp - Code emission -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+
+using namespace llvm;
+
+void MachineCodeEmitter::anchor() { }
diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp
new file mode 100644
index 000000000000..9730eaacf6e4
--- /dev/null
+++ b/lib/CodeGen/MachineCopyPropagation.cpp
@@ -0,0 +1,340 @@
+//===- MachineCopyPropagation.cpp - Machine Copy Propagation Pass ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an extremely simple MachineInstr-level copy propagation pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "codegen-cp"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Pass.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumDeletes, "Number of dead copies deleted");
+
+namespace {
+ class MachineCopyPropagation : public MachineFunctionPass {
+ const TargetRegisterInfo *TRI;
+ BitVector ReservedRegs;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ MachineCopyPropagation() : MachineFunctionPass(ID) {
+ initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ private:
+ typedef SmallVector<unsigned, 4> DestList;
+ typedef DenseMap<unsigned, DestList> SourceMap;
+
+ void SourceNoLongerAvailable(unsigned Reg,
+ SourceMap &SrcMap,
+ DenseMap<unsigned, MachineInstr*> &AvailCopyMap);
+ bool CopyPropagateBlock(MachineBasicBlock &MBB);
+ };
+}
+char MachineCopyPropagation::ID = 0;
+char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID;
+
+INITIALIZE_PASS(MachineCopyPropagation, "machine-cp",
+ "Machine Copy Propagation Pass", false, false)
+
+void
+MachineCopyPropagation::SourceNoLongerAvailable(unsigned Reg,
+ SourceMap &SrcMap,
+ DenseMap<unsigned, MachineInstr*> &AvailCopyMap) {
+ SourceMap::iterator SI = SrcMap.find(Reg);
+ if (SI != SrcMap.end()) {
+ const DestList& Defs = SI->second;
+ for (DestList::const_iterator I = Defs.begin(), E = Defs.end();
+ I != E; ++I) {
+ unsigned MappedDef = *I;
+ // Source of copy is no longer available for propagation.
+ if (AvailCopyMap.erase(MappedDef)) {
+ for (const uint16_t *SR = TRI->getSubRegisters(MappedDef); *SR; ++SR)
+ AvailCopyMap.erase(*SR);
+ }
+ }
+ }
+ for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+ SI = SrcMap.find(*AS);
+ if (SI != SrcMap.end()) {
+ const DestList& Defs = SI->second;
+ for (DestList::const_iterator I = Defs.begin(), E = Defs.end();
+ I != E; ++I) {
+ unsigned MappedDef = *I;
+ if (AvailCopyMap.erase(MappedDef)) {
+ for (const uint16_t *SR = TRI->getSubRegisters(MappedDef); *SR; ++SR)
+ AvailCopyMap.erase(*SR);
+ }
+ }
+ }
+ }
+}
+
+static bool NoInterveningSideEffect(const MachineInstr *CopyMI,
+ const MachineInstr *MI) {
+ const MachineBasicBlock *MBB = CopyMI->getParent();
+ if (MI->getParent() != MBB)
+ return false;
+ MachineBasicBlock::const_iterator I = CopyMI;
+ MachineBasicBlock::const_iterator E = MBB->end();
+ MachineBasicBlock::const_iterator E2 = MI;
+
+ ++I;
+ while (I != E && I != E2) {
+ if (I->hasUnmodeledSideEffects() || I->isCall() ||
+ I->isTerminator())
+ return false;
+ ++I;
+ }
+ return true;
+}
+
+/// isNopCopy - Return true if the specified copy is really a nop. That is
+/// if the source of the copy is the same of the definition of the copy that
+/// supplied the source. If the source of the copy is a sub-register than it
+/// must check the sub-indices match. e.g.
+/// ecx = mov eax
+/// al = mov cl
+/// But not
+/// ecx = mov eax
+/// al = mov ch
+static bool isNopCopy(MachineInstr *CopyMI, unsigned Def, unsigned Src,
+ const TargetRegisterInfo *TRI) {
+ unsigned SrcSrc = CopyMI->getOperand(1).getReg();
+ if (Def == SrcSrc)
+ return true;
+ if (TRI->isSubRegister(SrcSrc, Def)) {
+ unsigned SrcDef = CopyMI->getOperand(0).getReg();
+ unsigned SubIdx = TRI->getSubRegIndex(SrcSrc, Def);
+ if (!SubIdx)
+ return false;
+ return SubIdx == TRI->getSubRegIndex(SrcDef, Src);
+ }
+
+ return false;
+}
+
+bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
+ SmallSetVector<MachineInstr*, 8> MaybeDeadCopies; // Candidates for deletion
+ DenseMap<unsigned, MachineInstr*> AvailCopyMap; // Def -> available copies map
+ DenseMap<unsigned, MachineInstr*> CopyMap; // Def -> copies map
+ SourceMap SrcMap; // Src -> Def map
+
+ bool Changed = false;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) {
+ MachineInstr *MI = &*I;
+ ++I;
+
+ if (MI->isCopy()) {
+ unsigned Def = MI->getOperand(0).getReg();
+ unsigned Src = MI->getOperand(1).getReg();
+
+ if (TargetRegisterInfo::isVirtualRegister(Def) ||
+ TargetRegisterInfo::isVirtualRegister(Src))
+ report_fatal_error("MachineCopyPropagation should be run after"
+ " register allocation!");
+
+ DenseMap<unsigned, MachineInstr*>::iterator CI = AvailCopyMap.find(Src);
+ if (CI != AvailCopyMap.end()) {
+ MachineInstr *CopyMI = CI->second;
+ if (!ReservedRegs.test(Def) &&
+ (!ReservedRegs.test(Src) || NoInterveningSideEffect(CopyMI, MI)) &&
+ isNopCopy(CopyMI, Def, Src, TRI)) {
+ // The two copies cancel out and the source of the first copy
+ // hasn't been overridden, eliminate the second one. e.g.
+ // %ECX<def> = COPY %EAX<kill>
+ // ... nothing clobbered EAX.
+ // %EAX<def> = COPY %ECX
+ // =>
+ // %ECX<def> = COPY %EAX
+ //
+ // Also avoid eliminating a copy from reserved registers unless the
+ // definition is proven not clobbered. e.g.
+ // %RSP<def> = COPY %RAX
+ // CALL
+ // %RAX<def> = COPY %RSP
+
+ // Clear any kills of Def between CopyMI and MI. This extends the
+ // live range.
+ for (MachineBasicBlock::iterator I = CopyMI, E = MI; I != E; ++I)
+ I->clearRegisterKills(Def, TRI);
+
+ MI->eraseFromParent();
+ Changed = true;
+ ++NumDeletes;
+ continue;
+ }
+ }
+
+ // If Src is defined by a previous copy, it cannot be eliminated.
+ CI = CopyMap.find(Src);
+ if (CI != CopyMap.end())
+ MaybeDeadCopies.remove(CI->second);
+ for (const uint16_t *AS = TRI->getAliasSet(Src); *AS; ++AS) {
+ CI = CopyMap.find(*AS);
+ if (CI != CopyMap.end())
+ MaybeDeadCopies.remove(CI->second);
+ }
+
+ // Copy is now a candidate for deletion.
+ MaybeDeadCopies.insert(MI);
+
+ // If 'Src' is previously source of another copy, then this earlier copy's
+ // source is no longer available. e.g.
+ // %xmm9<def> = copy %xmm2
+ // ...
+ // %xmm2<def> = copy %xmm0
+ // ...
+ // %xmm2<def> = copy %xmm9
+ SourceNoLongerAvailable(Def, SrcMap, AvailCopyMap);
+
+ // Remember Def is defined by the copy.
+ // ... Make sure to clear the def maps of aliases first.
+ for (const uint16_t *AS = TRI->getAliasSet(Def); *AS; ++AS) {
+ CopyMap.erase(*AS);
+ AvailCopyMap.erase(*AS);
+ }
+ CopyMap[Def] = MI;
+ AvailCopyMap[Def] = MI;
+ for (const uint16_t *SR = TRI->getSubRegisters(Def); *SR; ++SR) {
+ CopyMap[*SR] = MI;
+ AvailCopyMap[*SR] = MI;
+ }
+
+ // Remember source that's copied to Def. Once it's clobbered, then
+ // it's no longer available for copy propagation.
+ if (std::find(SrcMap[Src].begin(), SrcMap[Src].end(), Def) ==
+ SrcMap[Src].end()) {
+ SrcMap[Src].push_back(Def);
+ }
+
+ continue;
+ }
+
+ // Not a copy.
+ SmallVector<unsigned, 2> Defs;
+ int RegMaskOpNum = -1;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isRegMask())
+ RegMaskOpNum = i;
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ report_fatal_error("MachineCopyPropagation should be run after"
+ " register allocation!");
+
+ if (MO.isDef()) {
+ Defs.push_back(Reg);
+ continue;
+ }
+
+ // If 'Reg' is defined by a copy, the copy is no longer a candidate
+ // for elimination.
+ DenseMap<unsigned, MachineInstr*>::iterator CI = CopyMap.find(Reg);
+ if (CI != CopyMap.end())
+ MaybeDeadCopies.remove(CI->second);
+ for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+ CI = CopyMap.find(*AS);
+ if (CI != CopyMap.end())
+ MaybeDeadCopies.remove(CI->second);
+ }
+ }
+
+ // The instruction has a register mask operand which means that it clobbers
+ // a large set of registers. It is possible to use the register mask to
+ // prune the available copies, but treat it like a basic block boundary for
+ // now.
+ if (RegMaskOpNum >= 0) {
+ // Erase any MaybeDeadCopies whose destination register is clobbered.
+ const MachineOperand &MaskMO = MI->getOperand(RegMaskOpNum);
+ for (SmallSetVector<MachineInstr*, 8>::iterator
+ DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
+ DI != DE; ++DI) {
+ unsigned Reg = (*DI)->getOperand(0).getReg();
+ if (ReservedRegs.test(Reg) || !MaskMO.clobbersPhysReg(Reg))
+ continue;
+ (*DI)->eraseFromParent();
+ Changed = true;
+ ++NumDeletes;
+ }
+
+ // Clear all data structures as if we were beginning a new basic block.
+ MaybeDeadCopies.clear();
+ AvailCopyMap.clear();
+ CopyMap.clear();
+ SrcMap.clear();
+ continue;
+ }
+
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ unsigned Reg = Defs[i];
+
+ // No longer defined by a copy.
+ CopyMap.erase(Reg);
+ AvailCopyMap.erase(Reg);
+ for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+ CopyMap.erase(*AS);
+ AvailCopyMap.erase(*AS);
+ }
+
+ // If 'Reg' is previously source of a copy, it is no longer available for
+ // copy propagation.
+ SourceNoLongerAvailable(Reg, SrcMap, AvailCopyMap);
+ }
+ }
+
+ // If MBB doesn't have successors, delete the copies whose defs are not used.
+ // If MBB does have successors, then conservative assume the defs are live-out
+ // since we don't want to trust live-in lists.
+ if (MBB.succ_empty()) {
+ for (SmallSetVector<MachineInstr*, 8>::iterator
+ DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
+ DI != DE; ++DI) {
+ if (!ReservedRegs.test((*DI)->getOperand(0).getReg())) {
+ (*DI)->eraseFromParent();
+ Changed = true;
+ ++NumDeletes;
+ }
+ }
+ }
+
+ return Changed;
+}
+
+bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
+ bool Changed = false;
+
+ TRI = MF.getTarget().getRegisterInfo();
+ ReservedRegs = TRI->getReservedRegs(MF);
+
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+ Changed |= CopyPropagateBlock(*I);
+
+ return Changed;
+}
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 20066a067b8f..d8c2f6a2eaef 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -13,12 +13,9 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Config/config.h"
#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -28,6 +25,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetData.h"
@@ -197,9 +195,10 @@ MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) {
MachineMemOperand *
MachineFunction::getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f,
uint64_t s, unsigned base_alignment,
- const MDNode *TBAAInfo) {
+ const MDNode *TBAAInfo,
+ const MDNode *Ranges) {
return new (Allocator) MachineMemOperand(PtrInfo, f, s, base_alignment,
- TBAAInfo);
+ TBAAInfo, Ranges);
}
MachineMemOperand *
@@ -286,7 +285,13 @@ void MachineFunction::dump() const {
}
void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const {
- OS << "# Machine code for function " << Fn->getName() << ":\n";
+ OS << "# Machine code for function " << Fn->getName() << ": ";
+ if (RegInfo) {
+ OS << (RegInfo->isSSA() ? "SSA" : "Post SSA");
+ if (!RegInfo->tracksLiveness())
+ OS << ", not tracking liveness";
+ }
+ OS << '\n';
// Print Frame Information
FrameInfo->print(*this, OS);
@@ -335,7 +340,7 @@ namespace llvm {
DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
static std::string getGraphName(const MachineFunction *F) {
- return "CFG for '" + F->getFunction()->getNameStr() + "' function";
+ return "CFG for '" + F->getFunction()->getName().str() + "' function";
}
std::string getNodeLabel(const MachineBasicBlock *Node,
@@ -368,7 +373,7 @@ namespace llvm {
void MachineFunction::viewCFG() const
{
#ifndef NDEBUG
- ViewGraph(this, "mf" + getFunction()->getNameStr());
+ ViewGraph(this, "mf" + getFunction()->getName());
#else
errs() << "MachineFunction::viewCFG is only available in debug builds on "
<< "systems with Graphviz or gv!\n";
@@ -378,7 +383,7 @@ void MachineFunction::viewCFG() const
void MachineFunction::viewCFGOnly() const
{
#ifndef NDEBUG
- ViewGraph(this, "mf" + getFunction()->getNameStr(), true);
+ ViewGraph(this, "mf" + getFunction()->getName(), true);
#else
errs() << "MachineFunction::viewCFGOnly is only available in debug builds on "
<< "systems with Graphviz or gv!\n";
@@ -464,7 +469,7 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
if (!isCalleeSavedInfoValid())
return BV;
- for (const unsigned *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR)
+ for (const uint16_t *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR)
BV.set(*CSR);
// The entry MBB always has all CSRs pristine.
@@ -532,6 +537,8 @@ unsigned MachineJumpTableInfo::getEntrySize(const TargetData &TD) const {
switch (getEntryKind()) {
case MachineJumpTableInfo::EK_BlockAddress:
return TD.getPointerSize();
+ case MachineJumpTableInfo::EK_GPRel64BlockAddress:
+ return 8;
case MachineJumpTableInfo::EK_GPRel32BlockAddress:
case MachineJumpTableInfo::EK_LabelDifference32:
case MachineJumpTableInfo::EK_Custom32:
@@ -539,8 +546,7 @@ unsigned MachineJumpTableInfo::getEntrySize(const TargetData &TD) const {
case MachineJumpTableInfo::EK_Inline:
return 0;
}
- assert(0 && "Unknown jump table encoding!");
- return ~0;
+ llvm_unreachable("Unknown jump table encoding!");
}
/// getEntryAlignment - Return the alignment of each entry in the jump table.
@@ -551,6 +557,8 @@ unsigned MachineJumpTableInfo::getEntryAlignment(const TargetData &TD) const {
switch (getEntryKind()) {
case MachineJumpTableInfo::EK_BlockAddress:
return TD.getPointerABIAlignment();
+ case MachineJumpTableInfo::EK_GPRel64BlockAddress:
+ return TD.getABIIntegerTypeAlignment(64);
case MachineJumpTableInfo::EK_GPRel32BlockAddress:
case MachineJumpTableInfo::EK_LabelDifference32:
case MachineJumpTableInfo::EK_Custom32:
@@ -558,8 +566,7 @@ unsigned MachineJumpTableInfo::getEntryAlignment(const TargetData &TD) const {
case MachineJumpTableInfo::EK_Inline:
return 1;
}
- assert(0 && "Unknown jump table encoding!");
- return ~0;
+ llvm_unreachable("Unknown jump table encoding!");
}
/// createJumpTableIndex - Create a new jump table entry in the jump table info.
@@ -619,6 +626,8 @@ void MachineJumpTableInfo::dump() const { print(dbgs()); }
// MachineConstantPool implementation
//===----------------------------------------------------------------------===//
+void MachineConstantPoolValue::anchor() { }
+
Type *MachineConstantPoolEntry::getType() const {
if (isMachineConstantPoolEntry())
return Val.MachineCPVal->getType();
@@ -653,35 +662,37 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
// reject them.
if (A->getType() == B->getType()) return false;
+ // We can't handle structs or arrays.
+ if (isa<StructType>(A->getType()) || isa<ArrayType>(A->getType()) ||
+ isa<StructType>(B->getType()) || isa<ArrayType>(B->getType()))
+ return false;
+
// For now, only support constants with the same size.
- if (TD->getTypeStoreSize(A->getType()) != TD->getTypeStoreSize(B->getType()))
+ uint64_t StoreSize = TD->getTypeStoreSize(A->getType());
+ if (StoreSize != TD->getTypeStoreSize(B->getType()) ||
+ StoreSize > 128)
return false;
- // If a floating-point value and an integer value have the same encoding,
- // they can share a constant-pool entry.
- if (const ConstantFP *AFP = dyn_cast<ConstantFP>(A))
- if (const ConstantInt *BI = dyn_cast<ConstantInt>(B))
- return AFP->getValueAPF().bitcastToAPInt() == BI->getValue();
- if (const ConstantFP *BFP = dyn_cast<ConstantFP>(B))
- if (const ConstantInt *AI = dyn_cast<ConstantInt>(A))
- return BFP->getValueAPF().bitcastToAPInt() == AI->getValue();
-
- // Two vectors can share an entry if each pair of corresponding
- // elements could.
- if (const ConstantVector *AV = dyn_cast<ConstantVector>(A))
- if (const ConstantVector *BV = dyn_cast<ConstantVector>(B)) {
- if (AV->getType()->getNumElements() != BV->getType()->getNumElements())
- return false;
- for (unsigned i = 0, e = AV->getType()->getNumElements(); i != e; ++i)
- if (!CanShareConstantPoolEntry(AV->getOperand(i),
- BV->getOperand(i), TD))
- return false;
- return true;
- }
-
- // TODO: Handle other cases.
-
- return false;
+ Type *IntTy = IntegerType::get(A->getContext(), StoreSize*8);
+
+ // Try constant folding a bitcast of both instructions to an integer. If we
+ // get two identical ConstantInt's, then we are good to share them. We use
+ // the constant folding APIs to do this so that we get the benefit of
+ // TargetData.
+ if (isa<PointerType>(A->getType()))
+ A = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy,
+ const_cast<Constant*>(A), TD);
+ else if (A->getType() != IntTy)
+ A = ConstantFoldInstOperands(Instruction::BitCast, IntTy,
+ const_cast<Constant*>(A), TD);
+ if (isa<PointerType>(B->getType()))
+ B = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy,
+ const_cast<Constant*>(B), TD);
+ else if (B->getType() != IntTy)
+ B = ConstantFoldInstOperands(Instruction::BitCast, IntTy,
+ const_cast<Constant*>(B), TD);
+
+ return A == B;
}
/// getConstantPoolIndex - Create a new entry in the constant pool or return
diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp
index 054c750c9f2b..35591e1649d3 100644
--- a/lib/CodeGen/MachineFunctionAnalysis.cpp
+++ b/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -19,9 +19,8 @@ using namespace llvm;
char MachineFunctionAnalysis::ID = 0;
-MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm,
- CodeGenOpt::Level OL) :
- FunctionPass(ID), TM(tm), OptLevel(OL), MF(0) {
+MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm) :
+ FunctionPass(ID), TM(tm), MF(0) {
initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
}
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index a240667f7d6a..e553a0463a2a 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -40,6 +40,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Hashing.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -178,6 +179,7 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
IsKill = isKill;
IsDead = isDead;
IsUndef = isUndef;
+ IsInternalRead = false;
IsEarlyClobber = false;
IsDebug = isDebug;
SubReg = 0;
@@ -191,7 +193,6 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
return false;
switch (getType()) {
- default: llvm_unreachable("Unrecognized operand type");
case MachineOperand::MO_Register:
return getReg() == Other.getReg() && isDef() == Other.isDef() &&
getSubReg() == Other.getSubReg();
@@ -216,11 +217,14 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
getOffset() == Other.getOffset();
case MachineOperand::MO_BlockAddress:
return getBlockAddress() == Other.getBlockAddress();
+ case MO_RegisterMask:
+ return getRegMask() == Other.getRegMask();
case MachineOperand::MO_MCSymbol:
return getMCSymbol() == Other.getMCSymbol();
case MachineOperand::MO_Metadata:
return getMetadata() == Other.getMetadata();
}
+ llvm_unreachable("Invalid machine operand type");
}
/// print - Print the specified machine operand.
@@ -240,7 +244,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
OS << PrintReg(getReg(), TRI, getSubReg());
if (isDef() || isKill() || isDead() || isImplicit() || isUndef() ||
- isEarlyClobber()) {
+ isInternalRead() || isEarlyClobber()) {
OS << '<';
bool NeedComma = false;
if (isDef()) {
@@ -256,14 +260,26 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
NeedComma = true;
}
- if (isKill() || isDead() || isUndef()) {
+ if (isKill() || isDead() || isUndef() || isInternalRead()) {
if (NeedComma) OS << ',';
- if (isKill()) OS << "kill";
- if (isDead()) OS << "dead";
+ NeedComma = false;
+ if (isKill()) {
+ OS << "kill";
+ NeedComma = true;
+ }
+ if (isDead()) {
+ OS << "dead";
+ NeedComma = true;
+ }
if (isUndef()) {
- if (isKill() || isDead())
- OS << ',';
+ if (NeedComma) OS << ',';
OS << "undef";
+ NeedComma = true;
+ }
+ if (isInternalRead()) {
+ if (NeedComma) OS << ',';
+ OS << "internal";
+ NeedComma = true;
}
}
OS << '>';
@@ -311,6 +327,9 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
WriteAsOperand(OS, getBlockAddress(), /*PrintType=*/false);
OS << '>';
break;
+ case MachineOperand::MO_RegisterMask:
+ OS << "<regmask>";
+ break;
case MachineOperand::MO_Metadata:
OS << '<';
WriteAsOperand(OS, getMetadata(), /*PrintType=*/false);
@@ -319,8 +338,6 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
case MachineOperand::MO_MCSymbol:
OS << "<MCSym=" << *getMCSymbol() << '>';
break;
- default:
- llvm_unreachable("Unrecognized operand type");
}
if (unsigned TF = getTargetFlags())
@@ -364,10 +381,11 @@ MachinePointerInfo MachinePointerInfo::getStack(int64_t Offset) {
MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f,
uint64_t s, unsigned int a,
- const MDNode *TBAAInfo)
+ const MDNode *TBAAInfo,
+ const MDNode *Ranges)
: PtrInfo(ptrinfo), Size(s),
Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)),
- TBAAInfo(TBAAInfo) {
+ TBAAInfo(TBAAInfo), Ranges(Ranges) {
assert((PtrInfo.V == 0 || isa<PointerType>(PtrInfo.V->getType())) &&
"invalid pointer value");
assert(getBaseAlignment() == a && "Alignment is not a power of 2!");
@@ -465,7 +483,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
/// MCID NULL and no operands.
MachineInstr::MachineInstr()
: MCID(0), Flags(0), AsmPrinterFlags(0),
- MemRefs(0), MemRefsEnd(0),
+ NumMemRefs(0), MemRefs(0),
Parent(0) {
// Make sure that we get added to a machine basicblock
LeakDetector::addGarbageObject(this);
@@ -473,10 +491,10 @@ MachineInstr::MachineInstr()
void MachineInstr::addImplicitDefUseOperands() {
if (MCID->ImplicitDefs)
- for (const unsigned *ImpDefs = MCID->ImplicitDefs; *ImpDefs; ++ImpDefs)
+ for (const uint16_t *ImpDefs = MCID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
addOperand(MachineOperand::CreateReg(*ImpDefs, true, true));
if (MCID->ImplicitUses)
- for (const unsigned *ImpUses = MCID->ImplicitUses; *ImpUses; ++ImpUses)
+ for (const uint16_t *ImpUses = MCID->getImplicitUses(); *ImpUses; ++ImpUses)
addOperand(MachineOperand::CreateReg(*ImpUses, false, true));
}
@@ -485,7 +503,7 @@ void MachineInstr::addImplicitDefUseOperands() {
/// the MCInstrDesc.
MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp)
: MCID(&tid), Flags(0), AsmPrinterFlags(0),
- MemRefs(0), MemRefsEnd(0), Parent(0) {
+ NumMemRefs(0), MemRefs(0), Parent(0) {
unsigned NumImplicitOps = 0;
if (!NoImp)
NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
@@ -500,7 +518,7 @@ MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp)
MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl,
bool NoImp)
: MCID(&tid), Flags(0), AsmPrinterFlags(0),
- MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) {
+ NumMemRefs(0), MemRefs(0), Parent(0), debugLoc(dl) {
unsigned NumImplicitOps = 0;
if (!NoImp)
NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
@@ -516,7 +534,7 @@ MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl,
/// basic block.
MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid)
: MCID(&tid), Flags(0), AsmPrinterFlags(0),
- MemRefs(0), MemRefsEnd(0), Parent(0) {
+ NumMemRefs(0), MemRefs(0), Parent(0) {
assert(MBB && "Cannot use inserting ctor with null basic block!");
unsigned NumImplicitOps =
MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
@@ -532,7 +550,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid)
MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
const MCInstrDesc &tid)
: MCID(&tid), Flags(0), AsmPrinterFlags(0),
- MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) {
+ NumMemRefs(0), MemRefs(0), Parent(0), debugLoc(dl) {
assert(MBB && "Cannot use inserting ctor with null basic block!");
unsigned NumImplicitOps =
MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
@@ -547,7 +565,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
///
MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
: MCID(&MI.getDesc()), Flags(0), AsmPrinterFlags(0),
- MemRefs(MI.MemRefs), MemRefsEnd(MI.MemRefsEnd),
+ NumMemRefs(MI.NumMemRefs), MemRefs(MI.MemRefs),
Parent(0), debugLoc(MI.getDebugLoc()) {
Operands.reserve(MI.getNumOperands());
@@ -722,17 +740,33 @@ void MachineInstr::RemoveOperand(unsigned OpNo) {
void MachineInstr::addMemOperand(MachineFunction &MF,
MachineMemOperand *MO) {
mmo_iterator OldMemRefs = MemRefs;
- mmo_iterator OldMemRefsEnd = MemRefsEnd;
+ uint16_t OldNumMemRefs = NumMemRefs;
- size_t NewNum = (MemRefsEnd - MemRefs) + 1;
+ uint16_t NewNum = NumMemRefs + 1;
mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NewNum);
- mmo_iterator NewMemRefsEnd = NewMemRefs + NewNum;
- std::copy(OldMemRefs, OldMemRefsEnd, NewMemRefs);
+ std::copy(OldMemRefs, OldMemRefs + OldNumMemRefs, NewMemRefs);
NewMemRefs[NewNum - 1] = MO;
MemRefs = NewMemRefs;
- MemRefsEnd = NewMemRefsEnd;
+ NumMemRefs = NewNum;
+}
+
+bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const {
+ const MachineBasicBlock *MBB = getParent();
+ MachineBasicBlock::const_instr_iterator MII = *this; ++MII;
+ while (MII != MBB->end() && MII->isInsideBundle()) {
+ if (MII->getDesc().getFlags() & Mask) {
+ if (Type == AnyInBundle)
+ return true;
+ } else {
+ if (Type == AllInBundle)
+ return false;
+ }
+ ++MII;
+ }
+
+ return Type == AllInBundle;
}
bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
@@ -743,6 +777,19 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
Other->getNumOperands() != getNumOperands())
return false;
+ if (isBundle()) {
+ // Both instructions are bundles, compare MIs inside the bundle.
+ MachineBasicBlock::const_instr_iterator I1 = *this;
+ MachineBasicBlock::const_instr_iterator E1 = getParent()->instr_end();
+ MachineBasicBlock::const_instr_iterator I2 = *Other;
+ MachineBasicBlock::const_instr_iterator E2= Other->getParent()->instr_end();
+ while (++I1 != E1 && I1->isInsideBundle()) {
+ ++I2;
+ if (I2 == E2 || !I2->isInsideBundle() || !I1->isIdenticalTo(I2, Check))
+ return false;
+ }
+ }
+
// Check operands to make sure they match.
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
const MachineOperand &MO = getOperand(i);
@@ -789,6 +836,18 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
/// block, and returns it, but does not delete it.
MachineInstr *MachineInstr::removeFromParent() {
assert(getParent() && "Not embedded in a basic block!");
+
+ // If it's a bundle then remove the MIs inside the bundle as well.
+ if (isBundle()) {
+ MachineBasicBlock *MBB = getParent();
+ MachineBasicBlock::instr_iterator MII = *this; ++MII;
+ MachineBasicBlock::instr_iterator E = MBB->instr_end();
+ while (MII != E && MII->isInsideBundle()) {
+ MachineInstr *MI = &*MII;
+ ++MII;
+ MBB->remove(MI);
+ }
+ }
getParent()->remove(this);
return this;
}
@@ -798,6 +857,17 @@ MachineInstr *MachineInstr::removeFromParent() {
/// block, and deletes it.
void MachineInstr::eraseFromParent() {
assert(getParent() && "Not embedded in a basic block!");
+ // If it's a bundle then remove the MIs inside the bundle as well.
+ if (isBundle()) {
+ MachineBasicBlock *MBB = getParent();
+ MachineBasicBlock::instr_iterator MII = *this; ++MII;
+ MachineBasicBlock::instr_iterator E = MBB->instr_end();
+ while (MII != E && MII->isInsideBundle()) {
+ MachineInstr *MI = &*MII;
+ ++MII;
+ MBB->erase(MI);
+ }
+ }
getParent()->erase(this);
}
@@ -817,6 +887,16 @@ unsigned MachineInstr::getNumExplicitOperands() const {
return NumOperands;
}
+/// isBundled - Return true if this instruction part of a bundle. This is true
+/// if either itself or its following instruction is marked "InsideBundle".
+bool MachineInstr::isBundled() const {
+ if (isInsideBundle())
+ return true;
+ MachineBasicBlock::const_instr_iterator nextMI = this;
+ ++nextMI;
+ return nextMI != Parent->instr_end() && nextMI->isInsideBundle();
+}
+
bool MachineInstr::isStackAligningInlineAsm() const {
if (isInlineAsm()) {
unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
@@ -887,6 +967,20 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx,
return NULL;
}
+/// getBundleSize - Return the number of instructions inside the MI bundle.
+unsigned MachineInstr::getBundleSize() const {
+ assert(isBundle() && "Expecting a bundle");
+
+ MachineBasicBlock::const_instr_iterator I = *this;
+ unsigned Size = 0;
+ while ((++I)->isInsideBundle()) {
+ ++Size;
+ }
+ assert(Size > 1 && "Malformed bundle");
+
+ return Size;
+}
+
/// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of
/// the specific register or -1 if it is not found. It further tightens
/// the search criteria to a use that kills the register if isKill is true.
@@ -948,6 +1042,10 @@ MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, bool Overlap,
bool isPhys = TargetRegisterInfo::isPhysicalRegister(Reg);
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
const MachineOperand &MO = getOperand(i);
+ // Accept regmask operands when Overlap is set.
+ // Ignore them when looking for a specific def operand (Overlap == false).
+ if (isPhys && Overlap && MO.isRegMask() && MO.clobbersPhysReg(Reg))
+ return i;
if (!MO.isReg() || !MO.isDef())
continue;
unsigned MOReg = MO.getReg();
@@ -1118,6 +1216,8 @@ void MachineInstr::copyKillDeadInfo(const MachineInstr *MI) {
/// copyPredicates - Copies predicate operand(s) from MI.
void MachineInstr::copyPredicates(const MachineInstr *MI) {
+ assert(!isBundle() && "MachineInstr::copyPredicates() can't handle bundles");
+
const MCInstrDesc &MCID = MI->getDesc();
if (!MCID.isPredicable())
return;
@@ -1159,13 +1259,13 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
AliasAnalysis *AA,
bool &SawStore) const {
// Ignore stuff that we obviously can't move.
- if (MCID->mayStore() || MCID->isCall()) {
+ if (mayStore() || isCall()) {
SawStore = true;
return false;
}
if (isLabel() || isDebugValue() ||
- MCID->isTerminator() || hasUnmodeledSideEffects())
+ isTerminator() || hasUnmodeledSideEffects())
return false;
// See if this instruction does a load. If so, we have to guarantee that the
@@ -1173,7 +1273,7 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
// destination. The check for isInvariantLoad gives the targe the chance to
// classify the load as always returning a constant, e.g. a constant pool
// load.
- if (MCID->mayLoad() && !isInvariantLoad(AA))
+ if (mayLoad() && !isInvariantLoad(AA))
// Otherwise, this is a real load. If there is a store between the load and
// end of block, or if the load is volatile, we can't move it.
return !SawStore && !hasVolatileMemoryRef();
@@ -1213,9 +1313,9 @@ bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII,
/// have no volatile memory references.
bool MachineInstr::hasVolatileMemoryRef() const {
// An instruction known never to access memory won't have a volatile access.
- if (!MCID->mayStore() &&
- !MCID->mayLoad() &&
- !MCID->isCall() &&
+ if (!mayStore() &&
+ !mayLoad() &&
+ !isCall() &&
!hasUnmodeledSideEffects())
return false;
@@ -1239,7 +1339,7 @@ bool MachineInstr::hasVolatileMemoryRef() const {
/// *all* loads the instruction does are invariant (if it does multiple loads).
bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
// If the instruction doesn't load at all, it isn't an invariant load.
- if (!MCID->mayLoad())
+ if (!mayLoad())
return false;
// If the instruction has lost its memoperands, conservatively assume that
@@ -1253,6 +1353,7 @@ bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
E = memoperands_end(); I != E; ++I) {
if ((*I)->isVolatile()) return false;
if ((*I)->isStore()) return false;
+ if ((*I)->isInvariant()) return true;
if (const Value *V = (*I)->getValue()) {
// A load from a constant PseudoSourceValue is invariant.
@@ -1291,7 +1392,7 @@ unsigned MachineInstr::isConstantValuePHI() const {
}
bool MachineInstr::hasUnmodeledSideEffects() const {
- if (getDesc().hasUnmodeledSideEffects())
+ if (hasProperty(MCID::UnmodeledSideEffects))
return true;
if (isInlineAsm()) {
unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
@@ -1384,7 +1485,10 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
OS << " = ";
// Print the opcode name.
- OS << getDesc().getName();
+ if (TM && TM->getInstrInfo())
+ OS << TM->getInstrInfo()->getName(getOpcode());
+ else
+ OS << "UNKNOWN";
// Print the rest of the operands.
bool OmittedAnyCallClobbers = false;
@@ -1419,14 +1523,14 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
// call instructions much less noisy on targets where calls clobber lots
// of registers. Don't rely on MO.isDead() because we may be called before
// LiveVariables is run, or we may be looking at a non-allocatable reg.
- if (MF && getDesc().isCall() &&
+ if (MF && isCall() &&
MO.isReg() && MO.isImplicit() && MO.isDef()) {
unsigned Reg = MO.getReg();
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
const MachineRegisterInfo &MRI = MF->getRegInfo();
if (MRI.use_empty(Reg) && !MRI.isLiveOut(Reg)) {
bool HasAliasLive = false;
- for (const unsigned *Alias = TM->getRegisterInfo()->getAliasSet(Reg);
+ for (const uint16_t *Alias = TM->getRegisterInfo()->getAliasSet(Reg);
unsigned AliasReg = *Alias; ++Alias)
if (!MRI.use_empty(AliasReg) || MRI.isLiveOut(AliasReg)) {
HasAliasLive = true;
@@ -1617,6 +1721,20 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg,
return Found;
}
+void MachineInstr::clearRegisterKills(unsigned Reg,
+ const TargetRegisterInfo *RegInfo) {
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ RegInfo = 0;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || !MO.isKill())
+ continue;
+ unsigned OpReg = MO.getReg();
+ if (OpReg == Reg || (RegInfo && RegInfo->isSuperRegister(Reg, OpReg)))
+ MO.setIsKill(false);
+ }
+}
+
bool MachineInstr::addRegisterDead(unsigned IncomingReg,
const TargetRegisterInfo *RegInfo,
bool AddIfNotFound) {
@@ -1689,16 +1807,21 @@ void MachineInstr::addRegisterDefined(unsigned IncomingReg,
true /*IsImp*/));
}
-void MachineInstr::setPhysRegsDeadExcept(const SmallVectorImpl<unsigned> &UsedRegs,
+void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs,
const TargetRegisterInfo &TRI) {
+ bool HasRegMask = false;
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
MachineOperand &MO = getOperand(i);
+ if (MO.isRegMask()) {
+ HasRegMask = true;
+ continue;
+ }
if (!MO.isReg() || !MO.isDef()) continue;
unsigned Reg = MO.getReg();
- if (Reg == 0) continue;
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
bool Dead = true;
- for (SmallVectorImpl<unsigned>::const_iterator I = UsedRegs.begin(),
- E = UsedRegs.end(); I != E; ++I)
+ for (ArrayRef<unsigned>::iterator I = UsedRegs.begin(), E = UsedRegs.end();
+ I != E; ++I)
if (TRI.regsOverlap(*I, Reg)) {
Dead = false;
break;
@@ -1706,53 +1829,66 @@ void MachineInstr::setPhysRegsDeadExcept(const SmallVectorImpl<unsigned> &UsedRe
// If there are no uses, including partial uses, the def is dead.
if (Dead) MO.setIsDead();
}
+
+ // This is a call with a register mask operand.
+ // Mask clobbers are always dead, so add defs for the non-dead defines.
+ if (HasRegMask)
+ for (ArrayRef<unsigned>::iterator I = UsedRegs.begin(), E = UsedRegs.end();
+ I != E; ++I)
+ addRegisterDefined(*I, &TRI);
}
unsigned
MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
- unsigned Hash = MI->getOpcode() * 37;
+ // Build up a buffer of hash code components.
+ //
+ // FIXME: This is a total hack. We should have a hash_value overload for
+ // MachineOperand, but currently that doesn't work because there are many
+ // different ideas of "equality" and thus different sets of information that
+ // contribute to the hash code. This one happens to want to take a specific
+ // subset. And it's still not clear that this routine uses the *correct*
+ // subset of information when computing the hash code. The goal is to use the
+ // same inputs for the hash code here that MachineInstr::isIdenticalTo uses to
+ // test for equality when passed the 'IgnoreVRegDefs' filter flag. It would
+ // be very useful to factor the selection of relevant inputs out of the two
+ // functions and into a common routine, but it's not clear how that can be
+ // done.
+ SmallVector<size_t, 8> HashComponents;
+ HashComponents.reserve(MI->getNumOperands() + 1);
+ HashComponents.push_back(MI->getOpcode());
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
- uint64_t Key = (uint64_t)MO.getType() << 32;
switch (MO.getType()) {
default: break;
case MachineOperand::MO_Register:
if (MO.isDef() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
continue; // Skip virtual register defs.
- Key |= MO.getReg();
+ HashComponents.push_back(hash_combine(MO.getType(), MO.getReg()));
break;
case MachineOperand::MO_Immediate:
- Key |= MO.getImm();
+ HashComponents.push_back(hash_combine(MO.getType(), MO.getImm()));
break;
case MachineOperand::MO_FrameIndex:
case MachineOperand::MO_ConstantPoolIndex:
case MachineOperand::MO_JumpTableIndex:
- Key |= MO.getIndex();
+ HashComponents.push_back(hash_combine(MO.getType(), MO.getIndex()));
break;
case MachineOperand::MO_MachineBasicBlock:
- Key |= DenseMapInfo<void*>::getHashValue(MO.getMBB());
+ HashComponents.push_back(hash_combine(MO.getType(), MO.getMBB()));
break;
case MachineOperand::MO_GlobalAddress:
- Key |= DenseMapInfo<void*>::getHashValue(MO.getGlobal());
+ HashComponents.push_back(hash_combine(MO.getType(), MO.getGlobal()));
break;
case MachineOperand::MO_BlockAddress:
- Key |= DenseMapInfo<void*>::getHashValue(MO.getBlockAddress());
+ HashComponents.push_back(hash_combine(MO.getType(),
+ MO.getBlockAddress()));
break;
case MachineOperand::MO_MCSymbol:
- Key |= DenseMapInfo<void*>::getHashValue(MO.getMCSymbol());
+ HashComponents.push_back(hash_combine(MO.getType(), MO.getMCSymbol()));
break;
}
- Key += ~(Key << 32);
- Key ^= (Key >> 22);
- Key += ~(Key << 13);
- Key ^= (Key >> 8);
- Key += (Key << 3);
- Key ^= (Key >> 15);
- Key += ~(Key << 27);
- Key ^= (Key >> 31);
- Hash = (unsigned)Key + Hash * 37;
- }
- return Hash;
+ }
+ return hash_combine_range(HashComponents.begin(), HashComponents.end());
}
void MachineInstr::emitError(StringRef Msg) const {
diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp
new file mode 100644
index 000000000000..73489a7160bf
--- /dev/null
+++ b/lib/CodeGen/MachineInstrBundle.cpp
@@ -0,0 +1,278 @@
+//===-- lib/CodeGen/MachineInstrBundle.cpp --------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+namespace {
+ class UnpackMachineBundles : public MachineFunctionPass {
+ public:
+ static char ID; // Pass identification
+ UnpackMachineBundles() : MachineFunctionPass(ID) {
+ initializeUnpackMachineBundlesPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ };
+} // end anonymous namespace
+
+char UnpackMachineBundles::ID = 0;
+char &llvm::UnpackMachineBundlesID = UnpackMachineBundles::ID;
+INITIALIZE_PASS(UnpackMachineBundles, "unpack-mi-bundles",
+ "Unpack machine instruction bundles", false, false)
+
+bool UnpackMachineBundles::runOnMachineFunction(MachineFunction &MF) {
+ bool Changed = false;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = &*I;
+
+ for (MachineBasicBlock::instr_iterator MII = MBB->instr_begin(),
+ MIE = MBB->instr_end(); MII != MIE; ) {
+ MachineInstr *MI = &*MII;
+
+ // Remove BUNDLE instruction and the InsideBundle flags from bundled
+ // instructions.
+ if (MI->isBundle()) {
+ while (++MII != MIE && MII->isInsideBundle()) {
+ MII->setIsInsideBundle(false);
+ for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MII->getOperand(i);
+ if (MO.isReg() && MO.isInternalRead())
+ MO.setIsInternalRead(false);
+ }
+ }
+ MI->eraseFromParent();
+
+ Changed = true;
+ continue;
+ }
+
+ ++MII;
+ }
+ }
+
+ return Changed;
+}
+
+
+namespace {
+ class FinalizeMachineBundles : public MachineFunctionPass {
+ public:
+ static char ID; // Pass identification
+ FinalizeMachineBundles() : MachineFunctionPass(ID) {
+ initializeFinalizeMachineBundlesPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ };
+} // end anonymous namespace
+
+char FinalizeMachineBundles::ID = 0;
+char &llvm::FinalizeMachineBundlesID = FinalizeMachineBundles::ID;
+INITIALIZE_PASS(FinalizeMachineBundles, "finalize-mi-bundles",
+ "Finalize machine instruction bundles", false, false)
+
+bool FinalizeMachineBundles::runOnMachineFunction(MachineFunction &MF) {
+ return llvm::finalizeBundles(MF);
+}
+
+
+/// finalizeBundle - Finalize a machine instruction bundle which includes
+/// a sequence of instructions starting from FirstMI to LastMI (exclusive).
+/// This routine adds a BUNDLE instruction to represent the bundle, it adds
+/// IsInternalRead markers to MachineOperands which are defined inside the
+/// bundle, and it copies externally visible defs and uses to the BUNDLE
+/// instruction.
+void llvm::finalizeBundle(MachineBasicBlock &MBB,
+ MachineBasicBlock::instr_iterator FirstMI,
+ MachineBasicBlock::instr_iterator LastMI) {
+ assert(FirstMI != LastMI && "Empty bundle?");
+
+ const TargetMachine &TM = MBB.getParent()->getTarget();
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
+ MachineInstrBuilder MIB = BuildMI(MBB, FirstMI, FirstMI->getDebugLoc(),
+ TII->get(TargetOpcode::BUNDLE));
+
+ SmallVector<unsigned, 8> LocalDefs;
+ SmallSet<unsigned, 8> LocalDefSet;
+ SmallSet<unsigned, 8> DeadDefSet;
+ SmallSet<unsigned, 8> KilledDefSet;
+ SmallVector<unsigned, 8> ExternUses;
+ SmallSet<unsigned, 8> ExternUseSet;
+ SmallSet<unsigned, 8> KilledUseSet;
+ SmallSet<unsigned, 8> UndefUseSet;
+ SmallVector<MachineOperand*, 4> Defs;
+ for (; FirstMI != LastMI; ++FirstMI) {
+ for (unsigned i = 0, e = FirstMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = FirstMI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ if (MO.isDef()) {
+ Defs.push_back(&MO);
+ continue;
+ }
+
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+ if (LocalDefSet.count(Reg)) {
+ MO.setIsInternalRead();
+ if (MO.isKill())
+ // Internal def is now killed.
+ KilledDefSet.insert(Reg);
+ } else {
+ if (ExternUseSet.insert(Reg)) {
+ ExternUses.push_back(Reg);
+ if (MO.isUndef())
+ UndefUseSet.insert(Reg);
+ }
+ if (MO.isKill())
+ // External def is now killed.
+ KilledUseSet.insert(Reg);
+ }
+ }
+
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ MachineOperand &MO = *Defs[i];
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+
+ if (LocalDefSet.insert(Reg)) {
+ LocalDefs.push_back(Reg);
+ if (MO.isDead()) {
+ DeadDefSet.insert(Reg);
+ }
+ } else {
+ // Re-defined inside the bundle, it's no longer killed.
+ KilledDefSet.erase(Reg);
+ if (!MO.isDead())
+ // Previously defined but dead.
+ DeadDefSet.erase(Reg);
+ }
+
+ if (!MO.isDead()) {
+ for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ if (LocalDefSet.insert(SubReg))
+ LocalDefs.push_back(SubReg);
+ }
+ }
+ }
+
+ FirstMI->setIsInsideBundle();
+ Defs.clear();
+ }
+
+ SmallSet<unsigned, 8> Added;
+ for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
+ unsigned Reg = LocalDefs[i];
+ if (Added.insert(Reg)) {
+ // If it's not live beyond end of the bundle, mark it dead.
+ bool isDead = DeadDefSet.count(Reg) || KilledDefSet.count(Reg);
+ MIB.addReg(Reg, getDefRegState(true) | getDeadRegState(isDead) |
+ getImplRegState(true));
+ }
+ }
+
+ for (unsigned i = 0, e = ExternUses.size(); i != e; ++i) {
+ unsigned Reg = ExternUses[i];
+ bool isKill = KilledUseSet.count(Reg);
+ bool isUndef = UndefUseSet.count(Reg);
+ MIB.addReg(Reg, getKillRegState(isKill) | getUndefRegState(isUndef) |
+ getImplRegState(true));
+ }
+}
+
+/// finalizeBundle - Same functionality as the previous finalizeBundle except
+/// the last instruction in the bundle is not provided as an input. This is
+/// used in cases where bundles are pre-determined by marking instructions
+/// with 'InsideBundle' marker. It returns the MBB instruction iterator that
+/// points to the end of the bundle.
+MachineBasicBlock::instr_iterator
+llvm::finalizeBundle(MachineBasicBlock &MBB,
+ MachineBasicBlock::instr_iterator FirstMI) {
+ MachineBasicBlock::instr_iterator E = MBB.instr_end();
+ MachineBasicBlock::instr_iterator LastMI = llvm::next(FirstMI);
+ while (LastMI != E && LastMI->isInsideBundle())
+ ++LastMI;
+ finalizeBundle(MBB, FirstMI, LastMI);
+ return LastMI;
+}
+
+/// finalizeBundles - Finalize instruction bundles in the specified
+/// MachineFunction. Return true if any bundles are finalized.
+bool llvm::finalizeBundles(MachineFunction &MF) {
+ bool Changed = false;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock &MBB = *I;
+
+ MachineBasicBlock::instr_iterator MII = MBB.instr_begin();
+ assert(!MII->isInsideBundle() &&
+ "First instr cannot be inside bundle before finalization!");
+
+ MachineBasicBlock::instr_iterator MIE = MBB.instr_end();
+ if (MII == MIE)
+ continue;
+ for (++MII; MII != MIE; ) {
+ if (!MII->isInsideBundle())
+ ++MII;
+ else {
+ MII = finalizeBundle(MBB, llvm::prior(MII));
+ Changed = true;
+ }
+ }
+ }
+
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// MachineOperand iterator
+//===----------------------------------------------------------------------===//
+
+MachineOperandIteratorBase::RegInfo
+MachineOperandIteratorBase::analyzeVirtReg(unsigned Reg,
+ SmallVectorImpl<std::pair<MachineInstr*, unsigned> > *Ops) {
+ RegInfo RI = { false, false, false };
+ for(; isValid(); ++*this) {
+ MachineOperand &MO = deref();
+ if (!MO.isReg() || MO.getReg() != Reg)
+ continue;
+
+ // Remember each (MI, OpNo) that refers to Reg.
+ if (Ops)
+ Ops->push_back(std::make_pair(MO.getParent(), getOperandNo()));
+
+ // Both defs and uses can read virtual registers.
+ if (MO.readsReg()) {
+ RI.Reads = true;
+ if (MO.isDef())
+ RI.Tied = true;
+ }
+
+ // Only defs can write.
+ if (MO.isDef())
+ RI.Writes = true;
+ else if (!RI.Tied && MO.getParent()->isRegTiedToDefOperand(getOperandNo()))
+ RI.Tied = true;
+ }
+ return RI;
+}
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index a1f80d5282e0..8c562cc4454a 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -45,7 +45,7 @@ using namespace llvm;
static cl::opt<bool>
AvoidSpeculation("avoid-speculation",
cl::desc("MachineLICM should avoid speculation"),
- cl::init(false), cl::Hidden);
+ cl::init(true), cl::Hidden);
STATISTIC(NumHoisted,
"Number of machine instructions hoisted out of loops");
@@ -60,8 +60,6 @@ STATISTIC(NumPostRAHoisted,
namespace {
class MachineLICM : public MachineFunctionPass {
- bool PreRegAlloc;
-
const TargetMachine *TM;
const TargetInstrInfo *TII;
const TargetLowering *TLI;
@@ -69,6 +67,7 @@ namespace {
const MachineFrameInfo *MFI;
MachineRegisterInfo *MRI;
const InstrItineraryData *InstrItins;
+ bool PreRegAlloc;
// Various analyses that we use...
AliasAnalysis *AA; // Alias analysis info.
@@ -81,7 +80,13 @@ namespace {
MachineLoop *CurLoop; // The current loop we are working on.
MachineBasicBlock *CurPreheader; // The preheader for CurLoop.
- BitVector AllocatableSet;
+ // Exit blocks for CurLoop.
+ SmallVector<MachineBasicBlock*, 8> ExitBlocks;
+
+ bool isExitBlock(const MachineBasicBlock *MBB) const {
+ return std::find(ExitBlocks.begin(), ExitBlocks.end(), MBB) !=
+ ExitBlocks.end();
+ }
// Track 'estimated' register pressure.
SmallSet<unsigned, 32> RegSeen;
@@ -122,8 +127,6 @@ namespace {
virtual bool runOnMachineFunction(MachineFunction &MF);
- const char *getPassName() const { return "Machine Instruction LICM"; }
-
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineLoopInfo>();
AU.addRequired<MachineDominatorTree>();
@@ -165,7 +168,9 @@ namespace {
/// ProcessMI - Examine the instruction for potentai LICM candidate. Also
/// gather register def and frame object update information.
- void ProcessMI(MachineInstr *MI, unsigned *PhysRegDefs,
+ void ProcessMI(MachineInstr *MI,
+ BitVector &PhysRegDefs,
+ BitVector &PhysRegClobbers,
SmallSet<int, 32> &StoredFIs,
SmallVector<CandidateInfo, 32> &Candidates);
@@ -182,12 +187,12 @@ namespace {
/// invariant. I.e., all virtual register operands are defined outside of
/// the loop, physical registers aren't accessed (explicitly or implicitly),
/// and the instruction is hoistable.
- ///
+ ///
bool IsLoopInvariantInst(MachineInstr &I);
- /// HasAnyPHIUse - Return true if the specified register is used by any
- /// phi node.
- bool HasAnyPHIUse(unsigned Reg) const;
+ /// HasLoopPHIUse - Return true if the specified instruction is used by any
+ /// phi node in the current loop.
+ bool HasLoopPHIUse(const MachineInstr *MI) const;
/// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
/// and an use in the current loop, return true if the target considered
@@ -200,7 +205,7 @@ namespace {
/// CanCauseHighRegPressure - Visit BBs from header to current BB,
/// check if hoisting an instruction of the given cost matrix can cause high
/// register pressure.
- bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost);
+ bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, bool Cheap);
/// UpdateBackTraceRegPressure - Traverse the back trace from header to
/// the current block and update their register pressures to reflect the
@@ -215,13 +220,25 @@ namespace {
/// If not then a load from this mbb may not be safe to hoist.
bool IsGuaranteedToExecute(MachineBasicBlock *BB);
- /// HoistRegion - Walk the specified region of the CFG (defined by all
- /// blocks dominated by the specified block, and that are in the current
- /// loop) in depth first order w.r.t the DominatorTree. This allows us to
- /// visit definitions before uses, allowing us to hoist a loop body in one
- /// pass without iteration.
+ void EnterScope(MachineBasicBlock *MBB);
+
+ void ExitScope(MachineBasicBlock *MBB);
+
+ /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to given
+ /// dominator tree node if its a leaf or all of its children are done. Walk
+ /// up the dominator tree to destroy ancestors which are now done.
+ void ExitScopeIfDone(MachineDomTreeNode *Node,
+ DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
+ DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap);
+
+ /// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all
+ /// blocks dominated by the specified header block, and that are in the
+ /// current loop) in depth first order w.r.t the DominatorTree. This allows
+ /// us to visit definitions before uses, allowing us to hoist a loop body in
+ /// one pass without iteration.
///
- void HoistRegion(MachineDomTreeNode *N, bool IsHeader = false);
+ void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode);
+ void HoistRegion(MachineDomTreeNode *N, bool IsHeader);
/// getRegisterClassIDAndCost - For a given MI, register, and the operand
/// index, return the ID and cost of its representative register class by
@@ -278,6 +295,7 @@ namespace {
} // end anonymous namespace
char MachineLICM::ID = 0;
+char &llvm::MachineLICMID = MachineLICM::ID;
INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm",
"Machine Loop Invariant Code Motion", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
@@ -286,10 +304,6 @@ INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(MachineLICM, "machinelicm",
"Machine Loop Invariant Code Motion", false, false)
-FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) {
- return new MachineLICM(PreRegAlloc);
-}
-
/// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most
/// loop that has a unique predecessor.
static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
@@ -305,12 +319,6 @@ static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
}
bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
- if (PreRegAlloc)
- DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
- else
- DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
- DEBUG(dbgs() << MF.getFunction()->getName() << " ********\n");
-
Changed = FirstInLoop = false;
TM = &MF.getTarget();
TII = TM->getInstrInfo();
@@ -319,7 +327,14 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
MFI = MF.getFrameInfo();
MRI = &MF.getRegInfo();
InstrItins = TM->getInstrItineraryData();
- AllocatableSet = TRI->getAllocatableSet(MF);
+
+ PreRegAlloc = MRI->isSSA();
+
+ if (PreRegAlloc)
+ DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
+ else
+ DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
+ DEBUG(dbgs() << MF.getFunction()->getName() << " ********\n");
if (PreRegAlloc) {
// Estimate register pressure during pre-regalloc pass.
@@ -341,6 +356,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
while (!Worklist.empty()) {
CurLoop = Worklist.pop_back_val();
CurPreheader = 0;
+ ExitBlocks.clear();
// If this is done before regalloc, only visit outer-most preheader-sporting
// loops.
@@ -349,6 +365,8 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
continue;
}
+ CurLoop->getExitBlocks(ExitBlocks);
+
if (!PreRegAlloc)
HoistRegionPostRA();
else {
@@ -356,7 +374,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
// being hoisted.
MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
FirstInLoop = true;
- HoistRegion(N, true);
+ HoistOutOfLoop(N);
CSEMap.clear();
}
}
@@ -383,7 +401,8 @@ static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
/// ProcessMI - Examine the instruction for potentai LICM candidate. Also
/// gather register def and frame object update information.
void MachineLICM::ProcessMI(MachineInstr *MI,
- unsigned *PhysRegDefs,
+ BitVector &PhysRegDefs,
+ BitVector &PhysRegClobbers,
SmallSet<int, 32> &StoredFIs,
SmallVector<CandidateInfo, 32> &Candidates) {
bool RuledOut = false;
@@ -402,6 +421,13 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
continue;
}
+ // We can't hoist an instruction defining a physreg that is clobbered in
+ // the loop.
+ if (MO.isRegMask()) {
+ PhysRegClobbers.setBitsNotInMask(MO.getRegMask());
+ continue;
+ }
+
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
@@ -411,7 +437,7 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
"Not expecting virtual register!");
if (!MO.isDef()) {
- if (Reg && PhysRegDefs[Reg])
+ if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg)))
// If it's using a non-loop-invariant register, then it's obviously not
// safe to hoist.
HasNonInvariantUse = true;
@@ -419,9 +445,8 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
}
if (MO.isImplicit()) {
- ++PhysRegDefs[Reg];
- for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
- ++PhysRegDefs[*AS];
+ for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS)
+ PhysRegClobbers.set(*AS);
if (!MO.isDead())
// Non-dead implicit def? This cannot be hoisted.
RuledOut = true;
@@ -438,14 +463,17 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
Def = Reg;
// If we have already seen another instruction that defines the same
- // register, then this is not safe.
- if (++PhysRegDefs[Reg] > 1)
- // MI defined register is seen defined by another instruction in
- // the loop, it cannot be a LICM candidate.
- RuledOut = true;
- for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
- if (++PhysRegDefs[*AS] > 1)
+ // register, then this is not safe. Two defs is indicated by setting a
+ // PhysRegClobbers bit.
+ for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS) {
+ if (PhysRegDefs.test(*AS))
+ PhysRegClobbers.set(*AS);
+ if (PhysRegClobbers.test(*AS))
+ // MI defined register is seen defined by another instruction in
+ // the loop, it cannot be a LICM candidate.
RuledOut = true;
+ PhysRegDefs.set(*AS);
+ }
}
// Only consider reloads for now and remats which do not have register
@@ -461,9 +489,13 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
/// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
/// invariants out to the preheader.
void MachineLICM::HoistRegionPostRA() {
+ MachineBasicBlock *Preheader = getCurPreheader();
+ if (!Preheader)
+ return;
+
unsigned NumRegs = TRI->getNumRegs();
- unsigned *PhysRegDefs = new unsigned[NumRegs];
- std::fill(PhysRegDefs, PhysRegDefs + NumRegs, 0);
+ BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop.
+ BitVector PhysRegClobbers(NumRegs); // Regs defined more than once.
SmallVector<CandidateInfo, 32> Candidates;
SmallSet<int, 32> StoredFIs;
@@ -485,16 +517,31 @@ void MachineLICM::HoistRegionPostRA() {
for (MachineBasicBlock::livein_iterator I = BB->livein_begin(),
E = BB->livein_end(); I != E; ++I) {
unsigned Reg = *I;
- ++PhysRegDefs[Reg];
- for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
- ++PhysRegDefs[*AS];
+ for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS)
+ PhysRegDefs.set(*AS);
}
SpeculationState = SpeculateUnknown;
for (MachineBasicBlock::iterator
MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
MachineInstr *MI = &*MII;
- ProcessMI(MI, PhysRegDefs, StoredFIs, Candidates);
+ ProcessMI(MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates);
+ }
+ }
+
+ // Gather the registers read / clobbered by the terminator.
+ BitVector TermRegs(NumRegs);
+ MachineBasicBlock::iterator TI = Preheader->getFirstTerminator();
+ if (TI != Preheader->end()) {
+ for (unsigned i = 0, e = TI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = TI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS)
+ TermRegs.set(*AS);
}
}
@@ -503,19 +550,25 @@ void MachineLICM::HoistRegionPostRA() {
// instruction in the loop.
// 2. If the candidate is a load from stack slot (always true for now),
// check if the slot is stored anywhere in the loop.
+ // 3. Make sure candidate def should not clobber
+ // registers read by the terminator. Similarly its def should not be
+ // clobbered by the terminator.
for (unsigned i = 0, e = Candidates.size(); i != e; ++i) {
if (Candidates[i].FI != INT_MIN &&
StoredFIs.count(Candidates[i].FI))
continue;
- if (PhysRegDefs[Candidates[i].Def] == 1) {
+ unsigned Def = Candidates[i].Def;
+ if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) {
bool Safe = true;
MachineInstr *MI = Candidates[i].MI;
for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
const MachineOperand &MO = MI->getOperand(j);
if (!MO.isReg() || MO.isDef() || !MO.getReg())
continue;
- if (PhysRegDefs[MO.getReg()]) {
+ unsigned Reg = MO.getReg();
+ if (PhysRegDefs.test(Reg) ||
+ PhysRegClobbers.test(Reg)) {
// If it's using a non-loop-invariant register, then it's obviously
// not safe to hoist.
Safe = false;
@@ -526,8 +579,6 @@ void MachineLICM::HoistRegionPostRA() {
HoistPostRA(MI, Candidates[i].Def);
}
}
-
- delete[] PhysRegDefs;
}
/// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current
@@ -556,26 +607,17 @@ void MachineLICM::AddToLiveIns(unsigned Reg) {
/// dirty work.
void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
MachineBasicBlock *Preheader = getCurPreheader();
- if (!Preheader) return;
// Now move the instructions to the predecessor, inserting it before any
// terminator instructions.
- DEBUG({
- dbgs() << "Hoisting " << *MI;
- if (Preheader->getBasicBlock())
- dbgs() << " to MachineBasicBlock "
- << Preheader->getName();
- if (MI->getParent()->getBasicBlock())
- dbgs() << " from MachineBasicBlock "
- << MI->getParent()->getName();
- dbgs() << "\n";
- });
+ DEBUG(dbgs() << "Hoisting to BB#" << Preheader->getNumber() << " from BB#"
+ << MI->getParent()->getNumber() << ": " << *MI);
// Splice the instruction to the preheader.
MachineBasicBlock *MBB = MI->getParent();
Preheader->splice(Preheader->getFirstTerminator(), MBB, MI);
- // Add register to livein list to all the BBs in the current loop since a
+ // Add register to livein list to all the BBs in the current loop since a
// loop invariant must be kept live throughout the whole loop. This is
// important to ensure later passes do not scavenge the def register.
AddToLiveIns(Def);
@@ -589,7 +631,7 @@ void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) {
if (SpeculationState != SpeculateUnknown)
return SpeculationState == SpeculateFalse;
-
+
if (BB != CurLoop->getHeader()) {
// Check loop exiting blocks.
SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks;
@@ -605,57 +647,126 @@ bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) {
return true;
}
-/// HoistRegion - Walk the specified region of the CFG (defined by all blocks
-/// dominated by the specified block, and that are in the current loop) in depth
-/// first order w.r.t the DominatorTree. This allows us to visit definitions
-/// before uses, allowing us to hoist a loop body in one pass without iteration.
-///
-void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) {
- assert(N != 0 && "Null dominator tree node?");
- MachineBasicBlock *BB = N->getBlock();
+void MachineLICM::EnterScope(MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
- // If the header of the loop containing this basic block is a landing pad,
- // then don't try to hoist instructions out of this loop.
- const MachineLoop *ML = MLI->getLoopFor(BB);
- if (ML && ML->getHeader()->isLandingPad()) return;
+ // Remember livein register pressure.
+ BackTrace.push_back(RegPressure);
+}
- // If this subregion is not in the top level loop at all, exit.
- if (!CurLoop->contains(BB)) return;
+void MachineLICM::ExitScope(MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
+ BackTrace.pop_back();
+}
- MachineBasicBlock *Preheader = getCurPreheader();
- if (!Preheader)
+/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
+/// dominator tree node if its a leaf or all of its children are done. Walk
+/// up the dominator tree to destroy ancestors which are now done.
+void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node,
+ DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
+ DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) {
+ if (OpenChildren[Node])
return;
- if (IsHeader) {
+ // Pop scope.
+ ExitScope(Node->getBlock());
+
+ // Now traverse upwards to pop ancestors whose offsprings are all done.
+ while (MachineDomTreeNode *Parent = ParentMap[Node]) {
+ unsigned Left = --OpenChildren[Parent];
+ if (Left != 0)
+ break;
+ ExitScope(Parent->getBlock());
+ Node = Parent;
+ }
+}
+
+/// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all
+/// blocks dominated by the specified header block, and that are in the
+/// current loop) in depth first order w.r.t the DominatorTree. This allows
+/// us to visit definitions before uses, allowing us to hoist a loop body in
+/// one pass without iteration.
+///
+void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
+ SmallVector<MachineDomTreeNode*, 32> Scopes;
+ SmallVector<MachineDomTreeNode*, 8> WorkList;
+ DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
+ DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
+
+ // Perform a DFS walk to determine the order of visit.
+ WorkList.push_back(HeaderN);
+ do {
+ MachineDomTreeNode *Node = WorkList.pop_back_val();
+ assert(Node != 0 && "Null dominator tree node?");
+ MachineBasicBlock *BB = Node->getBlock();
+
+ // If the header of the loop containing this basic block is a landing pad,
+ // then don't try to hoist instructions out of this loop.
+ const MachineLoop *ML = MLI->getLoopFor(BB);
+ if (ML && ML->getHeader()->isLandingPad())
+ continue;
+
+ // If this subregion is not in the top level loop at all, exit.
+ if (!CurLoop->contains(BB))
+ continue;
+
+ Scopes.push_back(Node);
+ const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
+ unsigned NumChildren = Children.size();
+
+ // Don't hoist things out of a large switch statement. This often causes
+ // code to be hoisted that wasn't going to be executed, and increases
+ // register pressure in a situation where it's likely to matter.
+ if (BB->succ_size() >= 25)
+ NumChildren = 0;
+
+ OpenChildren[Node] = NumChildren;
+ // Add children in reverse order as then the next popped worklist node is
+ // the first child of this node. This means we ultimately traverse the
+ // DOM tree in exactly the same order as if we'd recursed.
+ for (int i = (int)NumChildren-1; i >= 0; --i) {
+ MachineDomTreeNode *Child = Children[i];
+ ParentMap[Child] = Node;
+ WorkList.push_back(Child);
+ }
+ } while (!WorkList.empty());
+
+ if (Scopes.size() != 0) {
+ MachineBasicBlock *Preheader = getCurPreheader();
+ if (!Preheader)
+ return;
+
// Compute registers which are livein into the loop headers.
RegSeen.clear();
BackTrace.clear();
InitRegPressure(Preheader);
}
- // Remember livein register pressure.
- BackTrace.push_back(RegPressure);
+ // Now perform LICM.
+ for (unsigned i = 0, e = Scopes.size(); i != e; ++i) {
+ MachineDomTreeNode *Node = Scopes[i];
+ MachineBasicBlock *MBB = Node->getBlock();
- SpeculationState = SpeculateUnknown;
- for (MachineBasicBlock::iterator
- MII = BB->begin(), E = BB->end(); MII != E; ) {
- MachineBasicBlock::iterator NextMII = MII; ++NextMII;
- MachineInstr *MI = &*MII;
- if (!Hoist(MI, Preheader))
- UpdateRegPressure(MI);
- MII = NextMII;
- }
+ MachineBasicBlock *Preheader = getCurPreheader();
+ if (!Preheader)
+ continue;
- // Don't hoist things out of a large switch statement. This often causes
- // code to be hoisted that wasn't going to be executed, and increases
- // register pressure in a situation where it's likely to matter.
- if (BB->succ_size() < 25) {
- const std::vector<MachineDomTreeNode*> &Children = N->getChildren();
- for (unsigned I = 0, E = Children.size(); I != E; ++I)
- HoistRegion(Children[I]);
- }
+ EnterScope(MBB);
- BackTrace.pop_back();
+ // Process the block
+ SpeculationState = SpeculateUnknown;
+ for (MachineBasicBlock::iterator
+ MII = MBB->begin(), E = MBB->end(); MII != E; ) {
+ MachineBasicBlock::iterator NextMII = MII; ++NextMII;
+ MachineInstr *MI = &*MII;
+ if (!Hoist(MI, Preheader))
+ UpdateRegPressure(MI);
+ MII = NextMII;
+ }
+
+ // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
+ ExitScopeIfDone(Node, OpenChildren, ParentMap);
+ }
}
static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
@@ -670,7 +781,7 @@ MachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI,
unsigned &RCId, unsigned &RCCost) const {
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
EVT VT = *RC->vt_begin();
- if (VT == MVT::untyped) {
+ if (VT == MVT::Untyped) {
RCId = RC->getID();
RCCost = 1;
} else {
@@ -678,7 +789,7 @@ MachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI,
RCCost = TLI->getRepRegClassCostFor(VT);
}
}
-
+
/// InitRegPressure - Find all virtual register references that are liveout of
/// the preheader to initialize the starting "register pressure". Note this
/// does not count live through (livein but not used) registers.
@@ -762,6 +873,21 @@ void MachineLICM::UpdateRegPressure(const MachineInstr *MI) {
}
}
+/// isLoadFromGOTOrConstantPool - Return true if this machine instruction
+/// loads from global offset table or constant pool.
+static bool isLoadFromGOTOrConstantPool(MachineInstr &MI) {
+ assert (MI.mayLoad() && "Expected MI that loads!");
+ for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
+ E = MI.memoperands_end(); I != E; ++I) {
+ if (const Value *V = (*I)->getValue()) {
+ if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V))
+ if (PSV == PSV->getGOT() || PSV == PSV->getConstantPool())
+ return true;
+ }
+ }
+ return false;
+}
+
/// IsLICMCandidate - Returns true if the instruction may be a suitable
/// candidate for LICM. e.g. If the instruction is a call, then it's obviously
/// not safe to hoist it.
@@ -773,9 +899,12 @@ bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
// If it is load then check if it is guaranteed to execute by making sure that
// it dominates all exiting blocks. If it doesn't, then there is a path out of
- // the loop which does not execute this load, so we can't hoist it.
+ // the loop which does not execute this load, so we can't hoist it. Loads
+ // from constant memory are not safe to speculate all the time, for example
+ // indexed load from a jump table.
// Stores and side effects are already checked by isSafeToMove.
- if (I.getDesc().mayLoad() && !IsGuaranteedToExecute(I.getParent()))
+ if (I.mayLoad() && !isLoadFromGOTOrConstantPool(I) &&
+ !IsGuaranteedToExecute(I.getParent()))
return false;
return true;
@@ -785,7 +914,7 @@ bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
/// invariant. I.e., all virtual register operands are defined outside of the
/// loop, physical registers aren't accessed explicitly, and there are no side
/// effects that aren't captured by the operands or other flags.
-///
+///
bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
if (!IsLICMCandidate(I))
return false;
@@ -806,18 +935,8 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
// If the physreg has no defs anywhere, it's just an ambient register
// and we can freely move its uses. Alternatively, if it's allocatable,
// it could get allocated to something with a def during allocation.
- if (!MRI->def_empty(Reg))
- return false;
- if (AllocatableSet.test(Reg))
+ if (!MRI->isConstantPhysReg(Reg, *I.getParent()->getParent()))
return false;
- // Check for a def among the register's aliases too.
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
- unsigned AliasReg = *Alias;
- if (!MRI->def_empty(AliasReg))
- return false;
- if (AllocatableSet.test(AliasReg))
- return false;
- }
// Otherwise it's safe to move.
continue;
} else if (!MO.isDead()) {
@@ -847,22 +966,40 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
}
-/// HasAnyPHIUse - Return true if the specified register is used by any
-/// phi node.
-bool MachineLICM::HasAnyPHIUse(unsigned Reg) const {
- for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
- UE = MRI->use_end(); UI != UE; ++UI) {
- MachineInstr *UseMI = &*UI;
- if (UseMI->isPHI())
- return true;
- // Look pass copies as well.
- if (UseMI->isCopy()) {
- unsigned Def = UseMI->getOperand(0).getReg();
- if (TargetRegisterInfo::isVirtualRegister(Def) &&
- HasAnyPHIUse(Def))
- return true;
+/// HasLoopPHIUse - Return true if the specified instruction is used by a
+/// phi node and hoisting it could cause a copy to be inserted.
+bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const {
+ SmallVector<const MachineInstr*, 8> Work(1, MI);
+ do {
+ MI = Work.pop_back_val();
+ for (ConstMIOperands MO(MI); MO.isValid(); ++MO) {
+ if (!MO->isReg() || !MO->isDef())
+ continue;
+ unsigned Reg = MO->getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
+ MachineInstr *UseMI = &*UI;
+ // A PHI may cause a copy to be inserted.
+ if (UseMI->isPHI()) {
+ // A PHI inside the loop causes a copy because the live range of Reg is
+ // extended across the PHI.
+ if (CurLoop->contains(UseMI))
+ return true;
+ // A PHI in an exit block can cause a copy to be inserted if the PHI
+ // has multiple predecessors in the loop with different values.
+ // For now, approximate by rejecting all exit blocks.
+ if (isExitBlock(UseMI->getParent()))
+ return true;
+ continue;
+ }
+ // Look past copies as well.
+ if (UseMI->isCopy() && CurLoop->contains(UseMI))
+ Work.push_back(UseMI);
+ }
}
- }
+ } while (!Work.empty());
return false;
}
@@ -903,7 +1040,7 @@ bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
/// IsCheapInstruction - Return true if the instruction is marked "cheap" or
/// the operand latency between its def and a use is one or less.
bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
- if (MI.getDesc().isAsCheapAsAMove() || MI.isCopyLike())
+ if (MI.isAsCheapAsAMove() || MI.isCopyLike())
return true;
if (!InstrItins || InstrItins->isEmpty())
return false;
@@ -930,16 +1067,25 @@ bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
/// CanCauseHighRegPressure - Visit BBs from header to current BB, check
/// if hoisting an instruction of the given cost matrix can cause high
/// register pressure.
-bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost) {
+bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost,
+ bool CheapInstr) {
for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
CI != CE; ++CI) {
- if (CI->second <= 0)
+ if (CI->second <= 0)
continue;
unsigned RCId = CI->first;
+ unsigned Limit = RegLimit[RCId];
+ int Cost = CI->second;
+
+ // Don't hoist cheap instructions if they would increase register pressure,
+ // even if we're under the limit.
+ if (CheapInstr)
+ return true;
+
for (unsigned i = BackTrace.size(); i != 0; --i) {
SmallVector<unsigned, 8> &RP = BackTrace[i-1];
- if (RP[RCId] + CI->second >= RegLimit[RCId])
+ if (RP[RCId] + Cost >= Limit)
return true;
}
}
@@ -999,87 +1145,95 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
if (MI.isImplicitDef())
return true;
- // If the instruction is cheap, only hoist if it is re-materilizable. LICM
- // will increase register pressure. It's probably not worth it if the
- // instruction is cheap.
- // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting
- // these tend to help performance in low register pressure situation. The
- // trade off is it may cause spill in high pressure situation. It will end up
- // adding a store in the loop preheader. But the reload is no more expensive.
- // The side benefit is these loads are frequently CSE'ed.
- if (IsCheapInstruction(MI)) {
- if (!TII->isTriviallyReMaterializable(&MI, AA))
- return false;
- } else {
- // Estimate register pressure to determine whether to LICM the instruction.
- // In low register pressure situation, we can be more aggressive about
- // hoisting. Also, favors hoisting long latency instructions even in
- // moderately high pressure situation.
- // FIXME: If there are long latency loop-invariant instructions inside the
- // loop at this point, why didn't the optimizer's LICM hoist them?
- DenseMap<unsigned, int> Cost;
- for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
- if (!MO.isReg() || MO.isImplicit())
- continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
- continue;
+ // Besides removing computation from the loop, hoisting an instruction has
+ // these effects:
+ //
+ // - The value defined by the instruction becomes live across the entire
+ // loop. This increases register pressure in the loop.
+ //
+ // - If the value is used by a PHI in the loop, a copy will be required for
+ // lowering the PHI after extending the live range.
+ //
+ // - When hoisting the last use of a value in the loop, that value no longer
+ // needs to be live in the loop. This lowers register pressure in the loop.
+
+ bool CheapInstr = IsCheapInstruction(MI);
+ bool CreatesCopy = HasLoopPHIUse(&MI);
+
+ // Don't hoist a cheap instruction if it would create a copy in the loop.
+ if (CheapInstr && CreatesCopy) {
+ DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI);
+ return false;
+ }
- unsigned RCId, RCCost;
- getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost);
- if (MO.isDef()) {
- if (HasHighOperandLatency(MI, i, Reg)) {
- ++NumHighLatency;
- return true;
- }
+ // Rematerializable instructions should always be hoisted since the register
+ // allocator can just pull them down again when needed.
+ if (TII->isTriviallyReMaterializable(&MI, AA))
+ return true;
+
+ // Estimate register pressure to determine whether to LICM the instruction.
+ // In low register pressure situation, we can be more aggressive about
+ // hoisting. Also, favors hoisting long latency instructions even in
+ // moderately high pressure situation.
+ // Cheap instructions will only be hoisted if they don't increase register
+ // pressure at all.
+ // FIXME: If there are long latency loop-invariant instructions inside the
+ // loop at this point, why didn't the optimizer's LICM hoist them?
+ DenseMap<unsigned, int> Cost;
+ for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
- DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
- if (CI != Cost.end())
- CI->second += RCCost;
- else
- Cost.insert(std::make_pair(RCId, RCCost));
- } else if (isOperandKill(MO, MRI)) {
- // Is a virtual register use is a kill, hoisting it out of the loop
- // may actually reduce register pressure or be register pressure
- // neutral.
- DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
- if (CI != Cost.end())
- CI->second -= RCCost;
- else
- Cost.insert(std::make_pair(RCId, -RCCost));
+ unsigned RCId, RCCost;
+ getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost);
+ if (MO.isDef()) {
+ if (HasHighOperandLatency(MI, i, Reg)) {
+ DEBUG(dbgs() << "Hoist High Latency: " << MI);
+ ++NumHighLatency;
+ return true;
}
+ Cost[RCId] += RCCost;
+ } else if (isOperandKill(MO, MRI)) {
+ // Is a virtual register use is a kill, hoisting it out of the loop
+ // may actually reduce register pressure or be register pressure
+ // neutral.
+ Cost[RCId] -= RCCost;
}
+ }
- // Visit BBs from header to current BB, if hoisting this doesn't cause
- // high register pressure, then it's safe to proceed.
- if (!CanCauseHighRegPressure(Cost)) {
- ++NumLowRP;
- return true;
- }
+ // Visit BBs from header to current BB, if hoisting this doesn't cause
+ // high register pressure, then it's safe to proceed.
+ if (!CanCauseHighRegPressure(Cost, CheapInstr)) {
+ DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI);
+ ++NumLowRP;
+ return true;
+ }
- // Do not "speculate" in high register pressure situation. If an
- // instruction is not guaranteed to be executed in the loop, it's best to be
- // conservative.
- if (AvoidSpeculation &&
- (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI)))
- return false;
+ // Don't risk increasing register pressure if it would create copies.
+ if (CreatesCopy) {
+ DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI);
+ return false;
+ }
- // High register pressure situation, only hoist if the instruction is going to
- // be remat'ed.
- if (!TII->isTriviallyReMaterializable(&MI, AA) &&
- !MI.isInvariantLoad(AA))
- return false;
+ // Do not "speculate" in high register pressure situation. If an
+ // instruction is not guaranteed to be executed in the loop, it's best to be
+ // conservative.
+ if (AvoidSpeculation &&
+ (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) {
+ DEBUG(dbgs() << "Won't speculate: " << MI);
+ return false;
}
- // If result(s) of this instruction is used by PHIs outside of the loop, then
- // don't hoist it if the instruction because it will introduce an extra copy.
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
- if (!MO.isReg() || !MO.isDef())
- continue;
- if (HasAnyPHIUse(MO.getReg()))
- return false;
+ // High register pressure situation, only hoist if the instruction is going
+ // to be remat'ed.
+ if (!TII->isTriviallyReMaterializable(&MI, AA) &&
+ !MI.isInvariantLoad(AA)) {
+ DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
+ return false;
}
return true;
@@ -1087,7 +1241,7 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
// Don't unfold simple loads.
- if (MI->getDesc().canFoldAsLoad())
+ if (MI->canFoldAsLoad())
return 0;
// If not, we may be able to unfold a load and hoist that.
@@ -1123,8 +1277,9 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
assert(NewMIs.size() == 2 &&
"Unfolded a load into multiple instructions!");
MachineBasicBlock *MBB = MI->getParent();
- MBB->insert(MI, NewMIs[0]);
- MBB->insert(MI, NewMIs[1]);
+ MachineBasicBlock::iterator Pos = MI;
+ MBB->insert(Pos, NewMIs[0]);
+ MBB->insert(Pos, NewMIs[1]);
// If unfolding produced a load that wasn't loop-invariant or profitable to
// hoist, discard the new instructions and bail.
if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) {
@@ -1180,6 +1335,7 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI,
// Replace virtual registers defined by MI by their counterparts defined
// by Dup.
+ SmallVector<unsigned, 2> Defs;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
@@ -1190,11 +1346,33 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI,
"Instructions with different phys regs are not identical!");
if (MO.isReg() && MO.isDef() &&
- !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
- MRI->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
- MRI->clearKillFlags(Dup->getOperand(i).getReg());
+ !TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ Defs.push_back(i);
+ }
+
+ SmallVector<const TargetRegisterClass*, 2> OrigRCs;
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ unsigned Idx = Defs[i];
+ unsigned Reg = MI->getOperand(Idx).getReg();
+ unsigned DupReg = Dup->getOperand(Idx).getReg();
+ OrigRCs.push_back(MRI->getRegClass(DupReg));
+
+ if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) {
+ // Restore old RCs if more than one defs.
+ for (unsigned j = 0; j != i; ++j)
+ MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]);
+ return false;
}
}
+
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ unsigned Idx = Defs[i];
+ unsigned Reg = MI->getOperand(Idx).getReg();
+ unsigned DupReg = Dup->getOperand(Idx).getReg();
+ MRI->replaceRegWith(Reg, DupReg);
+ MRI->clearKillFlags(DupReg);
+ }
+
MI->eraseFromParent();
++NumCSEed;
return true;
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 80c4854238af..ea98b23c6d57 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -257,7 +257,7 @@ MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI,
: ImmutablePass(ID), Context(MAI, MRI, MOFI),
ObjFileMMI(0), CompactUnwindEncoding(0), CurCallSite(0), CallsEHReturn(0),
CallsUnwindInit(0), DbgInfoAvailable(false),
- CallsExternalVAFunctionWithFloatingPointArguments(false) {
+ UsesVAFloatArgument(false) {
initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
// Always emit some info, by default "no personality" info.
Personalities.push_back(NULL);
@@ -268,9 +268,9 @@ MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI,
MachineModuleInfo::MachineModuleInfo()
: ImmutablePass(ID),
Context(*(MCAsmInfo*)0, *(MCRegisterInfo*)0, (MCObjectFileInfo*)0) {
- assert(0 && "This MachineModuleInfo constructor should never be called, MMI "
- "should always be explicitly constructed by LLVMTargetMachine");
- abort();
+ llvm_unreachable("This MachineModuleInfo constructor should never be called, "
+ "MMI should always be explicitly constructed by "
+ "LLVMTargetMachine");
}
MachineModuleInfo::~MachineModuleInfo() {
@@ -503,8 +503,7 @@ void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
/// indexes.
void MachineModuleInfo::setCallSiteLandingPad(MCSymbol *Sym,
ArrayRef<unsigned> Sites) {
- for (unsigned I = 0, E = Sites.size(); I != E; ++I)
- LPadToCallSiteMap[Sym].push_back(Sites[I]);
+ LPadToCallSiteMap[Sym].append(Sites.begin(), Sites.end());
}
/// getTypeIDFor - Return the type id for the specified typeinfo. This is
@@ -541,8 +540,7 @@ try_next:;
// Add the new filter.
int FilterID = -(1 + FilterIds.size());
FilterIds.reserve(FilterIds.size() + TyIds.size() + 1);
- for (unsigned I = 0, N = TyIds.size(); I != N; ++I)
- FilterIds.push_back(TyIds[I]);
+ FilterIds.insert(FilterIds.end(), TyIds.begin(), TyIds.end());
FilterEnds.push_back(FilterIds.size());
FilterIds.push_back(0); // terminator
return FilterID;
@@ -561,13 +559,13 @@ unsigned MachineModuleInfo::getPersonalityIndex() const {
const Function* Personality = NULL;
// Scan landing pads. If there is at least one non-NULL personality - use it.
- for (unsigned i = 0; i != LandingPads.size(); ++i)
+ for (unsigned i = 0, e = LandingPads.size(); i != e; ++i)
if (LandingPads[i].Personality) {
Personality = LandingPads[i].Personality;
break;
}
- for (unsigned i = 0; i < Personalities.size(); ++i) {
+ for (unsigned i = 0, e = Personalities.size(); i < e; ++i) {
if (Personalities[i] == Personality)
return i;
}
diff --git a/lib/CodeGen/MachinePassRegistry.cpp b/lib/CodeGen/MachinePassRegistry.cpp
index 9f4ef1287803..58e067bcb9b2 100644
--- a/lib/CodeGen/MachinePassRegistry.cpp
+++ b/lib/CodeGen/MachinePassRegistry.cpp
@@ -16,6 +16,7 @@
using namespace llvm;
+void MachinePassRegistryListener::anchor() { }
/// Add - Adds a function pass to the registration list.
///
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 266ebf64a3fc..7ea151713a6d 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -18,11 +18,12 @@
using namespace llvm;
MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI)
- : TRI(&TRI), IsSSA(true) {
+ : TRI(&TRI), IsSSA(true), TracksLiveness(true) {
VRegInfo.reserve(256);
RegAllocHints.reserve(256);
UsedPhysRegs.resize(TRI.getNumRegs());
-
+ UsedPhysRegMask.resize(TRI.getNumRegs());
+
// Create the physreg use/def lists.
PhysRegUseDefLists = new MachineOperand*[TRI.getNumRegs()];
memset(PhysRegUseDefLists, 0, sizeof(MachineOperand*)*TRI.getNumRegs());
@@ -30,9 +31,7 @@ MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI)
MachineRegisterInfo::~MachineRegisterInfo() {
#ifndef NDEBUG
- for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i)
- assert(VRegInfo[TargetRegisterInfo::index2VirtReg(i)].second == 0 &&
- "Vreg use list non-empty still?");
+ clearVirtRegs();
for (unsigned i = 0, e = UsedPhysRegs.size(); i != e; ++i)
assert(!PhysRegUseDefLists[i] &&
"PhysRegUseDefLists has entries after all instructions are deleted");
@@ -76,12 +75,14 @@ MachineRegisterInfo::recomputeRegClass(unsigned Reg, const TargetMachine &TM) {
// Accumulate constraints from all uses.
for (reg_nodbg_iterator I = reg_nodbg_begin(Reg), E = reg_nodbg_end(); I != E;
++I) {
- // TRI doesn't have accurate enough information to model this yet.
- if (I.getOperand().getSubReg())
- return false;
const TargetRegisterClass *OpRC =
I->getRegClassConstraint(I.getOperandNo(), TII, TRI);
- if (OpRC)
+ if (unsigned SubIdx = I.getOperand().getSubReg()) {
+ if (OpRC)
+ NewRC = TRI->getMatchingSuperRegClass(NewRC, OpRC, SubIdx);
+ else
+ NewRC = TRI->getSubClassWithSubReg(NewRC, SubIdx);
+ } else if (OpRC)
NewRC = TRI->getCommonSubClass(NewRC, OpRC);
if (!NewRC || NewRC == OldRC)
return false;
@@ -115,6 +116,16 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
return Reg;
}
+/// clearVirtRegs - Remove all virtual registers (after physreg assignment).
+void MachineRegisterInfo::clearVirtRegs() {
+#ifndef NDEBUG
+ for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i)
+ assert(VRegInfo[TargetRegisterInfo::index2VirtReg(i)].second == 0 &&
+ "Vreg use list non-empty still?");
+#endif
+ VRegInfo.clear();
+}
+
/// HandleVRegListReallocation - We just added a virtual register to the
/// VRegInfo info list and it reallocated. Update the use/def lists info
/// pointers.
@@ -150,9 +161,8 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) {
/// form, so there should only be one definition.
MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const {
// Since we are in SSA form, we can use the first definition.
- if (!def_empty(Reg))
- return &*def_begin(Reg);
- return 0;
+ def_iterator I = def_begin(Reg);
+ return !I.atEnd() ? &*I : 0;
}
bool MachineRegisterInfo::hasOneUse(unsigned RegNo) const {
@@ -242,18 +252,31 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB,
}
}
-void MachineRegisterInfo::closePhysRegsUsed(const TargetRegisterInfo &TRI) {
- for (int i = UsedPhysRegs.find_first(); i >= 0;
- i = UsedPhysRegs.find_next(i))
- for (const unsigned *SS = TRI.getSubRegisters(i);
- unsigned SubReg = *SS; ++SS)
- if (SubReg > unsigned(i))
- UsedPhysRegs.set(SubReg);
-}
-
#ifndef NDEBUG
void MachineRegisterInfo::dumpUses(unsigned Reg) const {
for (use_iterator I = use_begin(Reg), E = use_end(); I != E; ++I)
I.getOperand().getParent()->dump();
}
#endif
+
+void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) {
+ ReservedRegs = TRI->getReservedRegs(MF);
+}
+
+bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg,
+ const MachineFunction &MF) const {
+ assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
+
+ // Check if any overlapping register is modified.
+ for (const uint16_t *R = TRI->getOverlaps(PhysReg); *R; ++R)
+ if (!def_empty(*R))
+ return false;
+
+ // Check if any overlapping register is allocatable so it may be used later.
+ if (AllocatableRegs.empty())
+ AllocatableRegs = TRI->getAllocatableSet(MF);
+ for (const uint16_t *R = TRI->getOverlaps(PhysReg); *R; ++R)
+ if (AllocatableRegs.test(*R))
+ return false;
+ return true;
+}
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index 84d6df25397c..070a55704dc5 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -81,7 +81,7 @@ unsigned LookForIdenticalPHI(MachineBasicBlock *BB,
if (BB->empty())
return 0;
- MachineBasicBlock::iterator I = BB->front();
+ MachineBasicBlock::iterator I = BB->begin();
if (!I->isPHI())
return 0;
@@ -182,7 +182,7 @@ unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
return DupPHI;
// Otherwise, we do need a PHI: insert one now.
- MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
+ MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin();
MachineInstr *InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB,
Loc, VRC, MRI, TII);
@@ -214,7 +214,6 @@ MachineBasicBlock *findCorrespondingPred(const MachineInstr *MI,
}
llvm_unreachable("MachineOperand::getParent() failure?");
- return 0;
}
/// RewriteUse - Rewrite a use of the symbolic value. This handles PHI nodes,
@@ -311,7 +310,7 @@ public:
/// Add it into the specified block and return the register.
static unsigned CreateEmptyPHI(MachineBasicBlock *BB, unsigned NumPreds,
MachineSSAUpdater *Updater) {
- MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
+ MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin();
MachineInstr *PHI = InsertNewDef(TargetOpcode::PHI, BB, Loc,
Updater->VRC, Updater->MRI,
Updater->TII);
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
new file mode 100644
index 000000000000..1d3241b8cc6b
--- /dev/null
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -0,0 +1,614 @@
+//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// MachineScheduler schedules machine instructions after phi elimination. It
+// preserves LiveIntervals so it can be invoked before register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "misched"
+
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/PriorityQueue.h"
+
+#include <queue>
+
+using namespace llvm;
+
+static cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
+ cl::desc("Force top-down list scheduling"));
+static cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
+ cl::desc("Force bottom-up list scheduling"));
+
+#ifndef NDEBUG
+static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
+ cl::desc("Pop up a window to show MISched dags after they are processed"));
+
+static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
+ cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
+#else
+static bool ViewMISchedDAGs = false;
+#endif // NDEBUG
+
+//===----------------------------------------------------------------------===//
+// Machine Instruction Scheduling Pass and Registry
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// MachineScheduler runs after coalescing and before register allocation.
+class MachineScheduler : public MachineSchedContext,
+ public MachineFunctionPass {
+public:
+ MachineScheduler();
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ virtual void releaseMemory() {}
+
+ virtual bool runOnMachineFunction(MachineFunction&);
+
+ virtual void print(raw_ostream &O, const Module* = 0) const;
+
+ static char ID; // Class identification, replacement for typeinfo
+};
+} // namespace
+
+char MachineScheduler::ID = 0;
+
+char &llvm::MachineSchedulerID = MachineScheduler::ID;
+
+INITIALIZE_PASS_BEGIN(MachineScheduler, "misched",
+ "Machine Instruction Scheduler", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(MachineScheduler, "misched",
+ "Machine Instruction Scheduler", false, false)
+
+MachineScheduler::MachineScheduler()
+: MachineFunctionPass(ID) {
+ initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
+}
+
+void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequiredID(MachineDominatorsID);
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<TargetPassConfig>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachinePassRegistry MachineSchedRegistry::Registry;
+
+/// A dummy default scheduler factory indicates whether the scheduler
+/// is overridden on the command line.
+static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
+ return 0;
+}
+
+/// MachineSchedOpt allows command line selection of the scheduler.
+static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
+ RegisterPassParser<MachineSchedRegistry> >
+MachineSchedOpt("misched",
+ cl::init(&useDefaultMachineSched), cl::Hidden,
+ cl::desc("Machine instruction scheduler to use"));
+
+static MachineSchedRegistry
+DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
+ useDefaultMachineSched);
+
+/// Forward declare the standard machine scheduler. This will be used as the
+/// default scheduler if the target does not set a default.
+static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C);
+
+/// Top-level MachineScheduler pass driver.
+///
+/// Visit blocks in function order. Divide each block into scheduling regions
+/// and visit them bottom-up. Visiting regions bottom-up is not required, but is
+/// consistent with the DAG builder, which traverses the interior of the
+/// scheduling regions bottom-up.
+///
+/// This design avoids exposing scheduling boundaries to the DAG builder,
+/// simplifying the DAG builder's support for "special" target instructions.
+/// At the same time the design allows target schedulers to operate across
+/// scheduling boundaries, for example to bundle the boudary instructions
+/// without reordering them. This creates complexity, because the target
+/// scheduler must update the RegionBegin and RegionEnd positions cached by
+/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
+/// design would be to split blocks at scheduling boundaries, but LLVM has a
+/// general bias against block splitting purely for implementation simplicity.
+bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
+ // Initialize the context of the pass.
+ MF = &mf;
+ MLI = &getAnalysis<MachineLoopInfo>();
+ MDT = &getAnalysis<MachineDominatorTree>();
+ PassConfig = &getAnalysis<TargetPassConfig>();
+ AA = &getAnalysis<AliasAnalysis>();
+
+ LIS = &getAnalysis<LiveIntervals>();
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+
+ // Select the scheduler, or set the default.
+ MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
+ if (Ctor == useDefaultMachineSched) {
+ // Get the default scheduler set by the target.
+ Ctor = MachineSchedRegistry::getDefault();
+ if (!Ctor) {
+ Ctor = createConvergingSched;
+ MachineSchedRegistry::setDefault(Ctor);
+ }
+ }
+ // Instantiate the selected scheduler.
+ OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this));
+
+ // Visit all machine basic blocks.
+ for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
+ MBB != MBBEnd; ++MBB) {
+
+ Scheduler->startBlock(MBB);
+
+ // Break the block into scheduling regions [I, RegionEnd), and schedule each
+ // region as soon as it is discovered. RegionEnd points the the scheduling
+ // boundary at the bottom of the region. The DAG does not include RegionEnd,
+ // but the region does (i.e. the next RegionEnd is above the previous
+ // RegionBegin). If the current block has no terminator then RegionEnd ==
+ // MBB->end() for the bottom region.
+ //
+ // The Scheduler may insert instructions during either schedule() or
+ // exitRegion(), even for empty regions. So the local iterators 'I' and
+ // 'RegionEnd' are invalid across these calls.
+ unsigned RemainingCount = MBB->size();
+ for(MachineBasicBlock::iterator RegionEnd = MBB->end();
+ RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) {
+ // Avoid decrementing RegionEnd for blocks with no terminator.
+ if (RegionEnd != MBB->end()
+ || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) {
+ --RegionEnd;
+ // Count the boundary instruction.
+ --RemainingCount;
+ }
+
+ // The next region starts above the previous region. Look backward in the
+ // instruction stream until we find the nearest boundary.
+ MachineBasicBlock::iterator I = RegionEnd;
+ for(;I != MBB->begin(); --I, --RemainingCount) {
+ if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF))
+ break;
+ }
+ // Notify the scheduler of the region, even if we may skip scheduling
+ // it. Perhaps it still needs to be bundled.
+ Scheduler->enterRegion(MBB, I, RegionEnd, RemainingCount);
+
+ // Skip empty scheduling regions (0 or 1 schedulable instructions).
+ if (I == RegionEnd || I == llvm::prior(RegionEnd)) {
+ // Close the current region. Bundle the terminator if needed.
+ // This invalidates 'RegionEnd' and 'I'.
+ Scheduler->exitRegion();
+ continue;
+ }
+ DEBUG(dbgs() << "MachineScheduling " << MF->getFunction()->getName()
+ << ":BB#" << MBB->getNumber() << "\n From: " << *I << " To: ";
+ if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
+ else dbgs() << "End";
+ dbgs() << " Remaining: " << RemainingCount << "\n");
+
+ // Schedule a region: possibly reorder instructions.
+ // This invalidates 'RegionEnd' and 'I'.
+ Scheduler->schedule();
+
+ // Close the current region.
+ Scheduler->exitRegion();
+
+ // Scheduling has invalidated the current iterator 'I'. Ask the
+ // scheduler for the top of it's scheduled region.
+ RegionEnd = Scheduler->begin();
+ }
+ assert(RemainingCount == 0 && "Instruction count mismatch!");
+ Scheduler->finishBlock();
+ }
+ Scheduler->finalizeSchedule();
+ DEBUG(LIS->print(dbgs()));
+ return true;
+}
+
+void MachineScheduler::print(raw_ostream &O, const Module* m) const {
+ // unimplemented
+}
+
+//===----------------------------------------------------------------------===//
+// MachineSchedStrategy - Interface to a machine scheduling algorithm.
+//===----------------------------------------------------------------------===//
+
+namespace {
+class ScheduleDAGMI;
+
+/// MachineSchedStrategy - Interface used by ScheduleDAGMI to drive the selected
+/// scheduling algorithm.
+///
+/// If this works well and targets wish to reuse ScheduleDAGMI, we may expose it
+/// in ScheduleDAGInstrs.h
+class MachineSchedStrategy {
+public:
+ virtual ~MachineSchedStrategy() {}
+
+ /// Initialize the strategy after building the DAG for a new region.
+ virtual void initialize(ScheduleDAGMI *DAG) = 0;
+
+ /// Pick the next node to schedule, or return NULL. Set IsTopNode to true to
+ /// schedule the node at the top of the unscheduled region. Otherwise it will
+ /// be scheduled at the bottom.
+ virtual SUnit *pickNode(bool &IsTopNode) = 0;
+
+ /// When all predecessor dependencies have been resolved, free this node for
+ /// top-down scheduling.
+ virtual void releaseTopNode(SUnit *SU) = 0;
+ /// When all successor dependencies have been resolved, free this node for
+ /// bottom-up scheduling.
+ virtual void releaseBottomNode(SUnit *SU) = 0;
+};
+} // namespace
+
+//===----------------------------------------------------------------------===//
+// ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals
+// preservation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// ScheduleDAGMI is an implementation of ScheduleDAGInstrs that schedules
+/// machine instructions while updating LiveIntervals.
+class ScheduleDAGMI : public ScheduleDAGInstrs {
+ AliasAnalysis *AA;
+ MachineSchedStrategy *SchedImpl;
+
+ /// The top of the unscheduled zone.
+ MachineBasicBlock::iterator CurrentTop;
+
+ /// The bottom of the unscheduled zone.
+ MachineBasicBlock::iterator CurrentBottom;
+
+ /// The number of instructions scheduled so far. Used to cut off the
+ /// scheduler at the point determined by misched-cutoff.
+ unsigned NumInstrsScheduled;
+public:
+ ScheduleDAGMI(MachineSchedContext *C, MachineSchedStrategy *S):
+ ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, /*IsPostRA=*/false, C->LIS),
+ AA(C->AA), SchedImpl(S), CurrentTop(), CurrentBottom(),
+ NumInstrsScheduled(0) {}
+
+ ~ScheduleDAGMI() {
+ delete SchedImpl;
+ }
+
+ MachineBasicBlock::iterator top() const { return CurrentTop; }
+ MachineBasicBlock::iterator bottom() const { return CurrentBottom; }
+
+ /// Implement ScheduleDAGInstrs interface.
+ void schedule();
+
+protected:
+ void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos);
+ bool checkSchedLimit();
+
+ void releaseSucc(SUnit *SU, SDep *SuccEdge);
+ void releaseSuccessors(SUnit *SU);
+ void releasePred(SUnit *SU, SDep *PredEdge);
+ void releasePredecessors(SUnit *SU);
+};
+} // namespace
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
+/// NumPredsLeft reaches zero, release the successor node.
+void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
+ SUnit *SuccSU = SuccEdge->getSUnit();
+
+#ifndef NDEBUG
+ if (SuccSU->NumPredsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ SuccSU->dump(this);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(0);
+ }
+#endif
+ --SuccSU->NumPredsLeft;
+ if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
+ SchedImpl->releaseTopNode(SuccSU);
+}
+
+/// releaseSuccessors - Call releaseSucc on each of SU's successors.
+void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ releaseSucc(SU, &*I);
+ }
+}
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
+/// NumSuccsLeft reaches zero, release the predecessor node.
+void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
+ SUnit *PredSU = PredEdge->getSUnit();
+
+#ifndef NDEBUG
+ if (PredSU->NumSuccsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ PredSU->dump(this);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(0);
+ }
+#endif
+ --PredSU->NumSuccsLeft;
+ if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
+ SchedImpl->releaseBottomNode(PredSU);
+}
+
+/// releasePredecessors - Call releasePred on each of SU's predecessors.
+void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ releasePred(SU, &*I);
+ }
+}
+
+void ScheduleDAGMI::moveInstruction(MachineInstr *MI,
+ MachineBasicBlock::iterator InsertPos) {
+ // Fix RegionBegin if the first instruction moves down.
+ if (&*RegionBegin == MI)
+ RegionBegin = llvm::next(RegionBegin);
+ BB->splice(InsertPos, BB, MI);
+ LIS->handleMove(MI);
+ // Fix RegionBegin if another instruction moves above the first instruction.
+ if (RegionBegin == InsertPos)
+ RegionBegin = MI;
+}
+
+bool ScheduleDAGMI::checkSchedLimit() {
+#ifndef NDEBUG
+ if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
+ CurrentTop = CurrentBottom;
+ return false;
+ }
+ ++NumInstrsScheduled;
+#endif
+ return true;
+}
+
+/// schedule - Called back from MachineScheduler::runOnMachineFunction
+/// after setting up the current scheduling region.
+void ScheduleDAGMI::schedule() {
+ buildSchedGraph(AA);
+
+ DEBUG(dbgs() << "********** MI Scheduling **********\n");
+ DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+ SUnits[su].dumpAll(this));
+
+ if (ViewMISchedDAGs) viewGraph();
+
+ SchedImpl->initialize(this);
+
+ // Release edges from the special Entry node or to the special Exit node.
+ releaseSuccessors(&EntrySU);
+ releasePredecessors(&ExitSU);
+
+ // Release all DAG roots for scheduling.
+ for (std::vector<SUnit>::iterator I = SUnits.begin(), E = SUnits.end();
+ I != E; ++I) {
+ // A SUnit is ready to top schedule if it has no predecessors.
+ if (I->Preds.empty())
+ SchedImpl->releaseTopNode(&(*I));
+ // A SUnit is ready to bottom schedule if it has no successors.
+ if (I->Succs.empty())
+ SchedImpl->releaseBottomNode(&(*I));
+ }
+
+ CurrentTop = RegionBegin;
+ CurrentBottom = RegionEnd;
+ bool IsTopNode = false;
+ while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
+ DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
+ << " Scheduling Instruction:\n"; SU->dump(this));
+ if (!checkSchedLimit())
+ break;
+
+ // Move the instruction to its new location in the instruction stream.
+ MachineInstr *MI = SU->getInstr();
+
+ if (IsTopNode) {
+ assert(SU->isTopReady() && "node still has unscheduled dependencies");
+ if (&*CurrentTop == MI)
+ ++CurrentTop;
+ else
+ moveInstruction(MI, CurrentTop);
+ // Release dependent instructions for scheduling.
+ releaseSuccessors(SU);
+ }
+ else {
+ assert(SU->isBottomReady() && "node still has unscheduled dependencies");
+ if (&*llvm::prior(CurrentBottom) == MI)
+ --CurrentBottom;
+ else {
+ if (&*CurrentTop == MI)
+ CurrentTop = llvm::next(CurrentTop);
+ moveInstruction(MI, CurrentBottom);
+ CurrentBottom = MI;
+ }
+ // Release dependent instructions for scheduling.
+ releasePredecessors(SU);
+ }
+ SU->isScheduled = true;
+ }
+ assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
+}
+
+//===----------------------------------------------------------------------===//
+// ConvergingScheduler - Implementation of the standard MachineSchedStrategy.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance
+/// the schedule.
+class ConvergingScheduler : public MachineSchedStrategy {
+ ScheduleDAGMI *DAG;
+
+ unsigned NumTopReady;
+ unsigned NumBottomReady;
+
+public:
+ virtual void initialize(ScheduleDAGMI *dag) {
+ DAG = dag;
+
+ assert((!ForceTopDown || !ForceBottomUp) &&
+ "-misched-topdown incompatible with -misched-bottomup");
+ }
+
+ virtual SUnit *pickNode(bool &IsTopNode) {
+ if (DAG->top() == DAG->bottom())
+ return NULL;
+
+ // As an initial placeholder heuristic, schedule in the direction that has
+ // the fewest choices.
+ SUnit *SU;
+ if (ForceTopDown || (!ForceBottomUp && NumTopReady <= NumBottomReady)) {
+ SU = DAG->getSUnit(DAG->top());
+ IsTopNode = true;
+ }
+ else {
+ SU = DAG->getSUnit(llvm::prior(DAG->bottom()));
+ IsTopNode = false;
+ }
+ if (SU->isTopReady()) {
+ assert(NumTopReady > 0 && "bad ready count");
+ --NumTopReady;
+ }
+ if (SU->isBottomReady()) {
+ assert(NumBottomReady > 0 && "bad ready count");
+ --NumBottomReady;
+ }
+ return SU;
+ }
+
+ virtual void releaseTopNode(SUnit *SU) {
+ ++NumTopReady;
+ }
+ virtual void releaseBottomNode(SUnit *SU) {
+ ++NumBottomReady;
+ }
+};
+} // namespace
+
+/// Create the standard converging machine scheduler. This will be used as the
+/// default scheduler if the target does not set a default.
+static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
+ assert((!ForceTopDown || !ForceBottomUp) &&
+ "-misched-topdown incompatible with -misched-bottomup");
+ return new ScheduleDAGMI(C, new ConvergingScheduler());
+}
+static MachineSchedRegistry
+ConvergingSchedRegistry("converge", "Standard converging scheduler.",
+ createConvergingSched);
+
+//===----------------------------------------------------------------------===//
+// Machine Instruction Shuffler for Correctness Testing
+//===----------------------------------------------------------------------===//
+
+#ifndef NDEBUG
+namespace {
+/// Apply a less-than relation on the node order, which corresponds to the
+/// instruction order prior to scheduling. IsReverse implements greater-than.
+template<bool IsReverse>
+struct SUnitOrder {
+ bool operator()(SUnit *A, SUnit *B) const {
+ if (IsReverse)
+ return A->NodeNum > B->NodeNum;
+ else
+ return A->NodeNum < B->NodeNum;
+ }
+};
+
+/// Reorder instructions as much as possible.
+class InstructionShuffler : public MachineSchedStrategy {
+ bool IsAlternating;
+ bool IsTopDown;
+
+ // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
+ // gives nodes with a higher number higher priority causing the latest
+ // instructions to be scheduled first.
+ PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> >
+ TopQ;
+ // When scheduling bottom-up, use greater-than as the queue priority.
+ PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> >
+ BottomQ;
+public:
+ InstructionShuffler(bool alternate, bool topdown)
+ : IsAlternating(alternate), IsTopDown(topdown) {}
+
+ virtual void initialize(ScheduleDAGMI *) {
+ TopQ.clear();
+ BottomQ.clear();
+ }
+
+ /// Implement MachineSchedStrategy interface.
+ /// -----------------------------------------
+
+ virtual SUnit *pickNode(bool &IsTopNode) {
+ SUnit *SU;
+ if (IsTopDown) {
+ do {
+ if (TopQ.empty()) return NULL;
+ SU = TopQ.top();
+ TopQ.pop();
+ } while (SU->isScheduled);
+ IsTopNode = true;
+ }
+ else {
+ do {
+ if (BottomQ.empty()) return NULL;
+ SU = BottomQ.top();
+ BottomQ.pop();
+ } while (SU->isScheduled);
+ IsTopNode = false;
+ }
+ if (IsAlternating)
+ IsTopDown = !IsTopDown;
+ return SU;
+ }
+
+ virtual void releaseTopNode(SUnit *SU) {
+ TopQ.push(SU);
+ }
+ virtual void releaseBottomNode(SUnit *SU) {
+ BottomQ.push(SU);
+ }
+};
+} // namespace
+
+static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
+ bool Alternate = !ForceTopDown && !ForceBottomUp;
+ bool TopDown = !ForceBottomUp;
+ assert((TopDown || !ForceTopDown) &&
+ "-misched-topdown incompatible with -misched-bottomup");
+ return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown));
+}
+static MachineSchedRegistry ShufflerRegistry(
+ "shuffle", "Shuffle machine instructions alternating directions",
+ createInstructionShuffler);
+#endif // !NDEBUG
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index 29cfb49953b9..1ce546b578ad 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -32,7 +32,7 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-static cl::opt<bool>
+static cl::opt<bool>
SplitEdges("machine-sink-split",
cl::desc("Split critical edges during machine sinking"),
cl::init(true), cl::Hidden);
@@ -90,12 +90,19 @@ namespace {
bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB,
MachineBasicBlock *DefMBB,
bool &BreakPHIEdge, bool &LocalUse) const;
+ MachineBasicBlock *FindSuccToSinkTo(MachineInstr *MI, MachineBasicBlock *MBB,
+ bool &BreakPHIEdge);
+ bool isProfitableToSinkTo(unsigned Reg, MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *SuccToSinkTo);
+
bool PerformTrivialForwardCoalescing(MachineInstr *MI,
MachineBasicBlock *MBB);
};
} // end anonymous namespace
char MachineSinking::ID = 0;
+char &llvm::MachineSinkingID = MachineSinking::ID;
INITIALIZE_PASS_BEGIN(MachineSinking, "machine-sink",
"Machine code sinking", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
@@ -104,8 +111,6 @@ INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(MachineSinking, "machine-sink",
"Machine code sinking", false, false)
-FunctionPass *llvm::createMachineSinkingPass() { return new MachineSinking(); }
-
bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr *MI,
MachineBasicBlock *MBB) {
if (!MI->isCopy())
@@ -147,14 +152,10 @@ MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
"Only makes sense for vregs");
+ // Ignore debug uses because debug info doesn't affect the code.
if (MRI->use_nodbg_empty(Reg))
return true;
- // Ignoring debug uses is necessary so debug info doesn't affect the code.
- // This may leave a referencing dbg_value in the original block, before
- // the definition of the vreg. Dwarf generator handles this although the
- // user might not get the right info at runtime.
-
// BreakPHIEdge is true if all the uses are in the successor MBB being sunken
// into and they are all PHI nodes. In this case, machine-sink must break
// the critical edge first. e.g.
@@ -291,7 +292,7 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI,
if (!CEBCandidates.insert(std::make_pair(From, To)))
return true;
- if (!MI->isCopy() && !MI->getDesc().isAsCheapAsAMove())
+ if (!MI->isCopy() && !MI->isAsCheapAsAMove())
return true;
// MI is cheap, we probably don't want to break the critical edge for it.
@@ -382,9 +383,9 @@ static bool AvoidsSinking(MachineInstr *MI, MachineRegisterInfo *MRI) {
return MI->isInsertSubreg() || MI->isSubregToReg() || MI->isRegSequence();
}
-/// collectDebgValues - Scan instructions following MI and collect any
+/// collectDebgValues - Scan instructions following MI and collect any
/// matching DBG_VALUEs.
-static void collectDebugValues(MachineInstr *MI,
+static void collectDebugValues(MachineInstr *MI,
SmallVector<MachineInstr *, 2> & DbgValues) {
DbgValues.clear();
if (!MI->getOperand(0).isReg())
@@ -401,35 +402,76 @@ static void collectDebugValues(MachineInstr *MI,
}
}
-/// SinkInstruction - Determine whether it is safe to sink the specified machine
-/// instruction out of its current block into a successor.
-bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
- // Don't sink insert_subreg, subreg_to_reg, reg_sequence. These are meant to
- // be close to the source to make it easier to coalesce.
- if (AvoidsSinking(MI, MRI))
+/// isPostDominatedBy - Return true if A is post dominated by B.
+static bool isPostDominatedBy(MachineBasicBlock *A, MachineBasicBlock *B) {
+
+ // FIXME - Use real post dominator.
+ if (A->succ_size() != 2)
+ return false;
+ MachineBasicBlock::succ_iterator I = A->succ_begin();
+ if (B == *I)
+ ++I;
+ MachineBasicBlock *OtherSuccBlock = *I;
+ if (OtherSuccBlock->succ_size() != 1 ||
+ *(OtherSuccBlock->succ_begin()) != B)
return false;
- // Check if it's safe to move the instruction.
- if (!MI->isSafeToMove(TII, AA, SawStore))
+ return true;
+}
+
+/// isProfitableToSinkTo - Return true if it is profitable to sink MI.
+bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *SuccToSinkTo) {
+ assert (MI && "Invalid MachineInstr!");
+ assert (SuccToSinkTo && "Invalid SinkTo Candidate BB");
+
+ if (MBB == SuccToSinkTo)
return false;
- // FIXME: This should include support for sinking instructions within the
- // block they are currently in to shorten the live ranges. We often get
- // instructions sunk into the top of a large block, but it would be better to
- // also sink them down before their first use in the block. This xform has to
- // be careful not to *increase* register pressure though, e.g. sinking
- // "x = y + z" down if it kills y and z would increase the live ranges of y
- // and z and only shrink the live range of x.
+ // It is profitable if SuccToSinkTo does not post dominate current block.
+ if (!isPostDominatedBy(MBB, SuccToSinkTo))
+ return true;
+
+ // Check if only use in post dominated block is PHI instruction.
+ bool NonPHIUse = false;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end();
+ I != E; ++I) {
+ MachineInstr *UseInst = &*I;
+ MachineBasicBlock *UseBlock = UseInst->getParent();
+ if (UseBlock == SuccToSinkTo && !UseInst->isPHI())
+ NonPHIUse = true;
+ }
+ if (!NonPHIUse)
+ return true;
+
+ // If SuccToSinkTo post dominates then also it may be profitable if MI
+ // can further profitably sinked into another block in next round.
+ bool BreakPHIEdge = false;
+ // FIXME - If finding successor is compile time expensive then catch results.
+ if (MachineBasicBlock *MBB2 = FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge))
+ return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2);
+
+ // If SuccToSinkTo is final destination and it is a post dominator of current
+ // block then it is not profitable to sink MI into SuccToSinkTo block.
+ return false;
+}
+
+/// FindSuccToSinkTo - Find a successor to sink this instruction to.
+MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ bool &BreakPHIEdge) {
+
+ assert (MI && "Invalid MachineInstr!");
+ assert (MBB && "Invalid MachineBasicBlock!");
// Loop over all the operands of the specified instruction. If there is
// anything we can't handle, bail out.
- MachineBasicBlock *ParentBlock = MI->getParent();
// SuccToSinkTo - This is the successor to sink this instruction to, once we
// decide.
MachineBasicBlock *SuccToSinkTo = 0;
-
- bool BreakPHIEdge = false;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg()) continue; // Ignore non-register operands.
@@ -442,24 +484,11 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// If the physreg has no defs anywhere, it's just an ambient register
// and we can freely move its uses. Alternatively, if it's allocatable,
// it could get allocated to something with a def during allocation.
- if (!MRI->def_empty(Reg))
- return false;
-
- if (AllocatableSet.test(Reg))
- return false;
-
- // Check for a def among the register's aliases too.
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
- unsigned AliasReg = *Alias;
- if (!MRI->def_empty(AliasReg))
- return false;
-
- if (AllocatableSet.test(AliasReg))
- return false;
- }
+ if (!MRI->isConstantPhysReg(Reg, *MBB->getParent()))
+ return NULL;
} else if (!MO.isDead()) {
// A def that isn't dead. We can't move it.
- return false;
+ return NULL;
}
} else {
// Virtual register uses are always safe to sink.
@@ -467,7 +496,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// If it's not safe to move defs of the register class, then abort.
if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg)))
- return false;
+ return NULL;
// FIXME: This picks a successor to sink into based on having one
// successor that dominates all the uses. However, there are cases where
@@ -488,48 +517,79 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// If a previous operand picked a block to sink to, then this operand
// must be sinkable to the same block.
bool LocalUse = false;
- if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, ParentBlock,
+ if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB,
BreakPHIEdge, LocalUse))
- return false;
+ return NULL;
continue;
}
// Otherwise, we should look at all the successors and decide which one
// we should sink to.
- for (MachineBasicBlock::succ_iterator SI = ParentBlock->succ_begin(),
- E = ParentBlock->succ_end(); SI != E; ++SI) {
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ E = MBB->succ_end(); SI != E; ++SI) {
+ MachineBasicBlock *SuccBlock = *SI;
bool LocalUse = false;
- if (AllUsesDominatedByBlock(Reg, *SI, ParentBlock,
+ if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB,
BreakPHIEdge, LocalUse)) {
- SuccToSinkTo = *SI;
+ SuccToSinkTo = SuccBlock;
break;
}
if (LocalUse)
// Def is used locally, it's never safe to move this def.
- return false;
+ return NULL;
}
// If we couldn't find a block to sink to, ignore this instruction.
if (SuccToSinkTo == 0)
- return false;
+ return NULL;
+ else if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo))
+ return NULL;
}
}
- // If there are no outputs, it must have side-effects.
- if (SuccToSinkTo == 0)
- return false;
+ // It is not possible to sink an instruction into its own block. This can
+ // happen with loops.
+ if (MBB == SuccToSinkTo)
+ return NULL;
// It's not safe to sink instructions to EH landing pad. Control flow into
// landing pad is implicitly defined.
- if (SuccToSinkTo->isLandingPad())
+ if (SuccToSinkTo && SuccToSinkTo->isLandingPad())
+ return NULL;
+
+ return SuccToSinkTo;
+}
+
+/// SinkInstruction - Determine whether it is safe to sink the specified machine
+/// instruction out of its current block into a successor.
+bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
+ // Don't sink insert_subreg, subreg_to_reg, reg_sequence. These are meant to
+ // be close to the source to make it easier to coalesce.
+ if (AvoidsSinking(MI, MRI))
return false;
- // It is not possible to sink an instruction into its own block. This can
- // happen with loops.
- if (MI->getParent() == SuccToSinkTo)
+ // Check if it's safe to move the instruction.
+ if (!MI->isSafeToMove(TII, AA, SawStore))
return false;
+ // FIXME: This should include support for sinking instructions within the
+ // block they are currently in to shorten the live ranges. We often get
+ // instructions sunk into the top of a large block, but it would be better to
+ // also sink them down before their first use in the block. This xform has to
+ // be careful not to *increase* register pressure though, e.g. sinking
+ // "x = y + z" down if it kills y and z would increase the live ranges of y
+ // and z and only shrink the live range of x.
+
+ bool BreakPHIEdge = false;
+ MachineBasicBlock *ParentBlock = MI->getParent();
+ MachineBasicBlock *SuccToSinkTo = FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge);
+
+ // If there are no outputs, it must have side-effects.
+ if (SuccToSinkTo == 0)
+ return false;
+
+
// If the instruction to move defines a dead physical register which is live
// when leaving the basic block, don't move it because it could turn into a
// "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 26847d39e7ad..74ba94d1fcc0 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -28,6 +28,7 @@
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
@@ -69,14 +70,17 @@ namespace {
unsigned foundErrors;
typedef SmallVector<unsigned, 16> RegVector;
+ typedef SmallVector<const uint32_t*, 4> RegMaskVector;
typedef DenseSet<unsigned> RegSet;
typedef DenseMap<unsigned, const MachineInstr*> RegMap;
const MachineInstr *FirstTerminator;
BitVector regsReserved;
+ BitVector regsAllocatable;
RegSet regsLive;
RegVector regsDefined, regsDead, regsKilled;
+ RegMaskVector regMasks;
RegSet regsLiveInButUnused;
SlotIndex lastIndex;
@@ -85,7 +89,7 @@ namespace {
void addRegWithSubRegs(RegVector &RV, unsigned Reg) {
RV.push_back(Reg);
if (TargetRegisterInfo::isPhysicalRegister(Reg))
- for (const unsigned *R = TRI->getSubRegisters(Reg); *R; R++)
+ for (const uint16_t *R = TRI->getSubRegisters(Reg); *R; R++)
RV.push_back(*R);
}
@@ -175,6 +179,10 @@ namespace {
return Reg < regsReserved.size() && regsReserved.test(Reg);
}
+ bool isAllocatable(unsigned Reg) {
+ return Reg < regsAllocatable.size() && regsAllocatable.test(Reg);
+ }
+
// Analysis information if available
LiveVariables *LiveVars;
LiveIntervals *LiveInts;
@@ -194,6 +202,7 @@ namespace {
void report(const char *msg, const MachineInstr *MI);
void report(const char *msg, const MachineOperand *MO, unsigned MONum);
+ void checkLiveness(const MachineOperand *MO, unsigned MONum);
void markReachable(const MachineBasicBlock *MBB);
void calcRegsPassed();
void checkPHIOps(const MachineBasicBlock *MBB);
@@ -279,13 +288,17 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end();
MFI!=MFE; ++MFI) {
visitMachineBasicBlockBefore(MFI);
- for (MachineBasicBlock::const_iterator MBBI = MFI->begin(),
- MBBE = MFI->end(); MBBI != MBBE; ++MBBI) {
+ for (MachineBasicBlock::const_instr_iterator MBBI = MFI->instr_begin(),
+ MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) {
if (MBBI->getParent() != MFI) {
report("Bad instruction parent pointer", MFI);
*OS << "Instruction: " << *MBBI;
continue;
}
+ // Skip BUNDLE instruction for now. FIXME: We should add code to verify
+ // the BUNDLE's specifically.
+ if (MBBI->isBundle())
+ continue;
visitMachineInstrBefore(MBBI);
for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I)
visitMachineOperand(&MBBI->getOperand(I), I);
@@ -305,6 +318,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
regsDefined.clear();
regsDead.clear();
regsKilled.clear();
+ regMasks.clear();
regsLiveInButUnused.clear();
MBBInfoMap.clear();
@@ -320,7 +334,7 @@ void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
MF->print(*OS, Indexes);
}
*OS << "*** Bad machine code: " << msg << " ***\n"
- << "- function: " << MF->getFunction()->getNameStr() << "\n";
+ << "- function: " << MF->getFunction()->getName() << "\n";
}
void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) {
@@ -370,12 +384,15 @@ void MachineVerifier::visitMachineFunctionBefore() {
// A sub-register of a reserved register is also reserved
for (int Reg = regsReserved.find_first(); Reg>=0;
Reg = regsReserved.find_next(Reg)) {
- for (const unsigned *Sub = TRI->getSubRegisters(Reg); *Sub; ++Sub) {
+ for (const uint16_t *Sub = TRI->getSubRegisters(Reg); *Sub; ++Sub) {
// FIXME: This should probably be:
// assert(regsReserved.test(*Sub) && "Non-reserved sub-register");
regsReserved.set(*Sub);
}
}
+
+ regsAllocatable = TRI->getAllocatableSet(*MF);
+
markReachable(&MF->front());
}
@@ -393,6 +410,20 @@ void
MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
FirstTerminator = 0;
+ if (MRI->isSSA()) {
+ // If this block has allocatable physical registers live-in, check that
+ // it is an entry block or landing pad.
+ for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(),
+ LE = MBB->livein_end();
+ LI != LE; ++LI) {
+ unsigned reg = *LI;
+ if (isAllocatable(reg) && !MBB->isLandingPad() &&
+ MBB != MBB->getParent()->begin()) {
+ report("MBB has allocable live-in, but isn't entry or landing-pad.", MBB);
+ }
+ }
+ }
+
// Count the number of landing pad successors.
SmallPtrSet<MachineBasicBlock*, 4> LandingPadSuccs;
for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
@@ -435,7 +466,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
report("MBB exits via unconditional fall-through but its successor "
"differs from its CFG successor!", MBB);
}
- if (!MBB->empty() && MBB->back().getDesc().isBarrier() &&
+ if (!MBB->empty() && MBB->back().isBarrier() &&
!TII->isPredicated(&MBB->back())) {
report("MBB exits via unconditional fall-through but ends with a "
"barrier instruction!", MBB);
@@ -456,10 +487,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
if (MBB->empty()) {
report("MBB exits via unconditional branch but doesn't contain "
"any instructions!", MBB);
- } else if (!MBB->back().getDesc().isBarrier()) {
+ } else if (!MBB->back().isBarrier()) {
report("MBB exits via unconditional branch but doesn't end with a "
"barrier instruction!", MBB);
- } else if (!MBB->back().getDesc().isTerminator()) {
+ } else if (!MBB->back().isTerminator()) {
report("MBB exits via unconditional branch but the branch isn't a "
"terminator instruction!", MBB);
}
@@ -479,10 +510,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
if (MBB->empty()) {
report("MBB exits via conditional branch/fall-through but doesn't "
"contain any instructions!", MBB);
- } else if (MBB->back().getDesc().isBarrier()) {
+ } else if (MBB->back().isBarrier()) {
report("MBB exits via conditional branch/fall-through but ends with a "
"barrier instruction!", MBB);
- } else if (!MBB->back().getDesc().isTerminator()) {
+ } else if (!MBB->back().isTerminator()) {
report("MBB exits via conditional branch/fall-through but the branch "
"isn't a terminator instruction!", MBB);
}
@@ -499,10 +530,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
if (MBB->empty()) {
report("MBB exits via conditional branch/branch but doesn't "
"contain any instructions!", MBB);
- } else if (!MBB->back().getDesc().isBarrier()) {
+ } else if (!MBB->back().isBarrier()) {
report("MBB exits via conditional branch/branch but doesn't end with a "
"barrier instruction!", MBB);
- } else if (!MBB->back().getDesc().isTerminator()) {
+ } else if (!MBB->back().isTerminator()) {
report("MBB exits via conditional branch/branch but the branch "
"isn't a terminator instruction!", MBB);
}
@@ -523,7 +554,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
continue;
}
regsLive.insert(*I);
- for (const unsigned *R = TRI->getSubRegisters(*I); *R; R++)
+ for (const uint16_t *R = TRI->getSubRegisters(*I); *R; R++)
regsLive.insert(*R);
}
regsLiveInButUnused = regsLive;
@@ -533,7 +564,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
BitVector PR = MFI->getPristineRegs(MBB);
for (int I = PR.find_first(); I>0; I = PR.find_next(I)) {
regsLive.insert(I);
- for (const unsigned *R = TRI->getSubRegisters(I); *R; R++)
+ for (const uint16_t *R = TRI->getSubRegisters(I); *R; R++)
regsLive.insert(*R);
}
@@ -555,19 +586,22 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
// Check the MachineMemOperands for basic consistency.
for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
E = MI->memoperands_end(); I != E; ++I) {
- if ((*I)->isLoad() && !MCID.mayLoad())
+ if ((*I)->isLoad() && !MI->mayLoad())
report("Missing mayLoad flag", MI);
- if ((*I)->isStore() && !MCID.mayStore())
+ if ((*I)->isStore() && !MI->mayStore())
report("Missing mayStore flag", MI);
}
// Debug values must not have a slot index.
- // Other instructions must have one.
+ // Other instructions must have one, unless they are inside a bundle.
if (LiveInts) {
bool mapped = !LiveInts->isNotInMIMap(MI);
if (MI->isDebugValue()) {
if (mapped)
report("Debug instruction has a slot index", MI);
+ } else if (MI->isInsideBundle()) {
+ if (mapped)
+ report("Instruction inside bundle has a slot index", MI);
} else {
if (!mapped)
report("Missing slot index", MI);
@@ -575,7 +609,9 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
}
// Ensure non-terminators don't follow terminators.
- if (MCID.isTerminator()) {
+ // Ignore predicated terminators formed by if conversion.
+ // FIXME: If conversion shouldn't need to violate this rule.
+ if (MI->isTerminator() && !TII->isPredicated(MI)) {
if (!FirstTerminator)
FirstTerminator = MI;
} else if (FirstTerminator) {
@@ -606,7 +642,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
// Don't check if it's the last operand in a variadic instruction. See,
// e.g., LDM_RET in the arm back end.
if (MO->isReg() &&
- !(MCID.isVariadic() && MONum == MCID.getNumOperands()-1)) {
+ !(MI->isVariadic() && MONum == MCID.getNumOperands()-1)) {
if (MO->isDef() && !MCOI.isOptionalDef())
report("Explicit operand marked as def", MO, MONum);
if (MO->isImplicit())
@@ -614,7 +650,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
}
} else {
// ARM adds %reg0 operands to indicate predicates. We'll allow that.
- if (MO->isReg() && !MO->isImplicit() && !MCID.isVariadic() && MO->getReg())
+ if (MO->isReg() && !MO->isImplicit() && !MI->isVariadic() && MO->getReg())
report("Extra explicit operand on non-variadic instruction", MO, MONum);
}
@@ -623,112 +659,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
const unsigned Reg = MO->getReg();
if (!Reg)
return;
+ if (MRI->tracksLiveness() && !MI->isDebugValue())
+ checkLiveness(MO, MONum);
- // Check Live Variables.
- if (MI->isDebugValue()) {
- // Liveness checks are not valid for debug values.
- } else if (MO->isUse() && !MO->isUndef()) {
- regsLiveInButUnused.erase(Reg);
-
- bool isKill = false;
- unsigned defIdx;
- if (MI->isRegTiedToDefOperand(MONum, &defIdx)) {
- // A two-addr use counts as a kill if use and def are the same.
- unsigned DefReg = MI->getOperand(defIdx).getReg();
- if (Reg == DefReg)
- isKill = true;
- else if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- report("Two-address instruction operands must be identical",
- MO, MONum);
- }
- } else
- isKill = MO->isKill();
-
- if (isKill)
- addRegWithSubRegs(regsKilled, Reg);
-
- // Check that LiveVars knows this kill.
- if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) &&
- MO->isKill()) {
- LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
- if (std::find(VI.Kills.begin(),
- VI.Kills.end(), MI) == VI.Kills.end())
- report("Kill missing from LiveVariables", MO, MONum);
- }
-
- // Check LiveInts liveness and kill.
- if (TargetRegisterInfo::isVirtualRegister(Reg) &&
- LiveInts && !LiveInts->isNotInMIMap(MI)) {
- SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getUseIndex();
- if (LiveInts->hasInterval(Reg)) {
- const LiveInterval &LI = LiveInts->getInterval(Reg);
- if (!LI.liveAt(UseIdx)) {
- report("No live range at use", MO, MONum);
- *OS << UseIdx << " is not live in " << LI << '\n';
- }
- // Check for extra kill flags.
- // Note that we allow missing kill flags for now.
- if (MO->isKill() && !LI.killedAt(UseIdx.getDefIndex())) {
- report("Live range continues after kill flag", MO, MONum);
- *OS << "Live range: " << LI << '\n';
- }
- } else {
- report("Virtual register has no Live interval", MO, MONum);
- }
- }
-
- // Use of a dead register.
- if (!regsLive.count(Reg)) {
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- // Reserved registers may be used even when 'dead'.
- if (!isReserved(Reg))
- report("Using an undefined physical register", MO, MONum);
- } else {
- BBInfo &MInfo = MBBInfoMap[MI->getParent()];
- // We don't know which virtual registers are live in, so only complain
- // if vreg was killed in this MBB. Otherwise keep track of vregs that
- // must be live in. PHI instructions are handled separately.
- if (MInfo.regsKilled.count(Reg))
- report("Using a killed virtual register", MO, MONum);
- else if (!MI->isPHI())
- MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI));
- }
- }
- } else if (MO->isDef()) {
- // Register defined.
- // TODO: verify that earlyclobber ops are not used.
- if (MO->isDead())
- addRegWithSubRegs(regsDead, Reg);
- else
- addRegWithSubRegs(regsDefined, Reg);
-
- // Verify SSA form.
- if (MRI->isSSA() && TargetRegisterInfo::isVirtualRegister(Reg) &&
- llvm::next(MRI->def_begin(Reg)) != MRI->def_end())
- report("Multiple virtual register defs in SSA form", MO, MONum);
-
- // Check LiveInts for a live range, but only for virtual registers.
- if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) &&
- !LiveInts->isNotInMIMap(MI)) {
- SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getDefIndex();
- if (LiveInts->hasInterval(Reg)) {
- const LiveInterval &LI = LiveInts->getInterval(Reg);
- if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) {
- assert(VNI && "NULL valno is not allowed");
- if (VNI->def != DefIdx && !MO->isEarlyClobber()) {
- report("Inconsistent valno->def", MO, MONum);
- *OS << "Valno " << VNI->id << " is not defined at "
- << DefIdx << " in " << LI << '\n';
- }
- } else {
- report("No live range at def", MO, MONum);
- *OS << DefIdx << " is not live in " << LI << '\n';
- }
- } else {
- report("Virtual register has no Live interval", MO, MONum);
- }
- }
- }
// Check register classes.
if (MONum < MCID.getNumOperands() && !MO->isImplicit()) {
@@ -790,6 +723,10 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
break;
}
+ case MachineOperand::MO_RegisterMask:
+ regMasks.push_back(MO->getRegMask());
+ break;
+
case MachineOperand::MO_MachineBasicBlock:
if (MI->isPHI() && !MO->getMBB()->isSuccessor(MI->getParent()))
report("PHI operand is not in the CFG", MO, MONum);
@@ -800,11 +737,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
LiveInts && !LiveInts->isNotInMIMap(MI)) {
LiveInterval &LI = LiveStks->getInterval(MO->getIndex());
SlotIndex Idx = LiveInts->getInstructionIndex(MI);
- if (MCID.mayLoad() && !LI.liveAt(Idx.getUseIndex())) {
+ if (MI->mayLoad() && !LI.liveAt(Idx.getRegSlot(true))) {
report("Instruction loads from dead spill slot", MO, MONum);
*OS << "Live stack: " << LI << '\n';
}
- if (MCID.mayStore() && !LI.liveAt(Idx.getDefIndex())) {
+ if (MI->mayStore() && !LI.liveAt(Idx.getRegSlot())) {
report("Instruction stores to dead spill slot", MO, MONum);
*OS << "Live stack: " << LI << '\n';
}
@@ -816,10 +753,127 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
}
}
+void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
+ const MachineInstr *MI = MO->getParent();
+ const unsigned Reg = MO->getReg();
+
+ // Both use and def operands can read a register.
+ if (MO->readsReg()) {
+ regsLiveInButUnused.erase(Reg);
+
+ bool isKill = false;
+ unsigned defIdx;
+ if (MI->isRegTiedToDefOperand(MONum, &defIdx)) {
+ // A two-addr use counts as a kill if use and def are the same.
+ unsigned DefReg = MI->getOperand(defIdx).getReg();
+ if (Reg == DefReg)
+ isKill = true;
+ else if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ report("Two-address instruction operands must be identical", MO, MONum);
+ }
+ } else
+ isKill = MO->isKill();
+
+ if (isKill)
+ addRegWithSubRegs(regsKilled, Reg);
+
+ // Check that LiveVars knows this kill.
+ if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) &&
+ MO->isKill()) {
+ LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
+ if (std::find(VI.Kills.begin(), VI.Kills.end(), MI) == VI.Kills.end())
+ report("Kill missing from LiveVariables", MO, MONum);
+ }
+
+ // Check LiveInts liveness and kill.
+ if (TargetRegisterInfo::isVirtualRegister(Reg) &&
+ LiveInts && !LiveInts->isNotInMIMap(MI)) {
+ SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getRegSlot(true);
+ if (LiveInts->hasInterval(Reg)) {
+ const LiveInterval &LI = LiveInts->getInterval(Reg);
+ if (!LI.liveAt(UseIdx)) {
+ report("No live range at use", MO, MONum);
+ *OS << UseIdx << " is not live in " << LI << '\n';
+ }
+ // Check for extra kill flags.
+ // Note that we allow missing kill flags for now.
+ if (MO->isKill() && !LI.killedAt(UseIdx.getRegSlot())) {
+ report("Live range continues after kill flag", MO, MONum);
+ *OS << "Live range: " << LI << '\n';
+ }
+ } else {
+ report("Virtual register has no Live interval", MO, MONum);
+ }
+ }
+
+ // Use of a dead register.
+ if (!regsLive.count(Reg)) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // Reserved registers may be used even when 'dead'.
+ if (!isReserved(Reg))
+ report("Using an undefined physical register", MO, MONum);
+ } else {
+ BBInfo &MInfo = MBBInfoMap[MI->getParent()];
+ // We don't know which virtual registers are live in, so only complain
+ // if vreg was killed in this MBB. Otherwise keep track of vregs that
+ // must be live in. PHI instructions are handled separately.
+ if (MInfo.regsKilled.count(Reg))
+ report("Using a killed virtual register", MO, MONum);
+ else if (!MI->isPHI())
+ MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI));
+ }
+ }
+ }
+
+ if (MO->isDef()) {
+ // Register defined.
+ // TODO: verify that earlyclobber ops are not used.
+ if (MO->isDead())
+ addRegWithSubRegs(regsDead, Reg);
+ else
+ addRegWithSubRegs(regsDefined, Reg);
+
+ // Verify SSA form.
+ if (MRI->isSSA() && TargetRegisterInfo::isVirtualRegister(Reg) &&
+ llvm::next(MRI->def_begin(Reg)) != MRI->def_end())
+ report("Multiple virtual register defs in SSA form", MO, MONum);
+
+ // Check LiveInts for a live range, but only for virtual registers.
+ if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) &&
+ !LiveInts->isNotInMIMap(MI)) {
+ SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getRegSlot();
+ if (LiveInts->hasInterval(Reg)) {
+ const LiveInterval &LI = LiveInts->getInterval(Reg);
+ if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) {
+ assert(VNI && "NULL valno is not allowed");
+ if (VNI->def != DefIdx && !MO->isEarlyClobber()) {
+ report("Inconsistent valno->def", MO, MONum);
+ *OS << "Valno " << VNI->id << " is not defined at "
+ << DefIdx << " in " << LI << '\n';
+ }
+ } else {
+ report("No live range at def", MO, MONum);
+ *OS << DefIdx << " is not live in " << LI << '\n';
+ }
+ } else {
+ report("Virtual register has no Live interval", MO, MONum);
+ }
+ }
+ }
+}
+
void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) {
BBInfo &MInfo = MBBInfoMap[MI->getParent()];
set_union(MInfo.regsKilled, regsKilled);
set_subtract(regsLive, regsKilled); regsKilled.clear();
+ // Kill any masked registers.
+ while (!regMasks.empty()) {
+ const uint32_t *Mask = regMasks.pop_back_val();
+ for (RegSet::iterator I = regsLive.begin(), E = regsLive.end(); I != E; ++I)
+ if (TargetRegisterInfo::isPhysicalRegister(*I) &&
+ MachineOperand::clobbersPhysReg(Mask, *I))
+ regsDead.push_back(*I);
+ }
set_subtract(regsLive, regsDead); regsDead.clear();
set_union(regsLive, regsDefined); regsDefined.clear();
@@ -855,7 +909,7 @@ MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) {
void MachineVerifier::calcRegsPassed() {
// First push live-out regs to successors' vregsPassed. Remember the MBBs that
// have any vregsPassed.
- DenseSet<const MachineBasicBlock*> todo;
+ SmallPtrSet<const MachineBasicBlock*, 8> todo;
for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
MFI != MFE; ++MFI) {
const MachineBasicBlock &MBB(*MFI);
@@ -892,7 +946,7 @@ void MachineVerifier::calcRegsPassed() {
// similar to calcRegsPassed, only backwards.
void MachineVerifier::calcRegsRequired() {
// First push live-in regs to predecessors' vregsRequired.
- DenseSet<const MachineBasicBlock*> todo;
+ SmallPtrSet<const MachineBasicBlock*, 8> todo;
for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
MFI != MFE; ++MFI) {
const MachineBasicBlock &MBB(*MFI);
@@ -925,9 +979,10 @@ void MachineVerifier::calcRegsRequired() {
// Check PHI instructions at the beginning of MBB. It is assumed that
// calcRegsPassed has been run so BBInfo::isLiveOut is valid.
void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) {
+ SmallPtrSet<const MachineBasicBlock*, 8> seen;
for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end();
BBI != BBE && BBI->isPHI(); ++BBI) {
- DenseSet<const MachineBasicBlock*> seen;
+ seen.clear();
for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
unsigned Reg = BBI->getOperand(i).getReg();
@@ -968,8 +1023,17 @@ void MachineVerifier::visitMachineFunctionAfter() {
}
// Now check liveness info if available
- if (LiveVars || LiveInts)
- calcRegsRequired();
+ calcRegsRequired();
+
+ if (MRI->isSSA() && !MF->empty()) {
+ BBInfo &MInfo = MBBInfoMap[&MF->front()];
+ for (RegSet::iterator
+ I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E;
+ ++I)
+ report("Virtual register def doesn't dominate all uses.",
+ MRI->getVRegDef(*I));
+ }
+
if (LiveVars)
verifyLiveVariables();
if (LiveInts)
@@ -1065,33 +1129,43 @@ void MachineVerifier::verifyLiveIntervals() {
report("No instruction at def index", MF);
*OS << "Valno #" << VNI->id << " is defined at " << VNI->def
<< " in " << LI << '\n';
- } else if (!MI->modifiesRegister(LI.reg, TRI)) {
- report("Defining instruction does not modify register", MI);
- *OS << "Valno #" << VNI->id << " in " << LI << '\n';
+ continue;
}
+ bool hasDef = false;
bool isEarlyClobber = false;
- if (MI) {
- for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
- MOE = MI->operands_end(); MOI != MOE; ++MOI) {
- if (MOI->isReg() && MOI->getReg() == LI.reg && MOI->isDef() &&
- MOI->isEarlyClobber()) {
- isEarlyClobber = true;
- break;
- }
+ for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
+ if (!MOI->isReg() || !MOI->isDef())
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+ if (MOI->getReg() != LI.reg)
+ continue;
+ } else {
+ if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) ||
+ !TRI->regsOverlap(LI.reg, MOI->getReg()))
+ continue;
}
+ hasDef = true;
+ if (MOI->isEarlyClobber())
+ isEarlyClobber = true;
+ }
+
+ if (!hasDef) {
+ report("Defining instruction does not modify register", MI);
+ *OS << "Valno #" << VNI->id << " in " << LI << '\n';
}
// Early clobber defs begin at USE slots, but other defs must begin at
// DEF slots.
if (isEarlyClobber) {
- if (!VNI->def.isUse()) {
- report("Early clobber def must be at a USE slot", MF);
+ if (!VNI->def.isEarlyClobber()) {
+ report("Early clobber def must be at an early-clobber slot", MF);
*OS << "Valno #" << VNI->id << " is defined at " << VNI->def
<< " in " << LI << '\n';
}
- } else if (!VNI->def.isDef()) {
- report("Non-PHI, non-early clobber def must be at a DEF slot", MF);
+ } else if (!VNI->def.isRegister()) {
+ report("Non-PHI, non-early clobber def must be at a register slot",
+ MF);
*OS << "Valno #" << VNI->id << " is defined at " << VNI->def
<< " in " << LI << '\n';
}
@@ -1137,32 +1211,76 @@ void MachineVerifier::verifyLiveIntervals() {
*OS << " in " << LI << '\n';
continue;
}
- if (I->end != LiveInts->getMBBEndIdx(EndMBB)) {
- // The live segment is ending inside EndMBB
- const MachineInstr *MI =
- LiveInts->getInstructionFromIndex(I->end.getPrevSlot());
- if (!MI) {
- report("Live segment doesn't end at a valid instruction", EndMBB);
+
+ // No more checks for live-out segments.
+ if (I->end == LiveInts->getMBBEndIdx(EndMBB))
+ continue;
+
+ // The live segment is ending inside EndMBB
+ const MachineInstr *MI =
+ LiveInts->getInstructionFromIndex(I->end.getPrevSlot());
+ if (!MI) {
+ report("Live segment doesn't end at a valid instruction", EndMBB);
I->print(*OS);
*OS << " in " << LI << '\n' << "Basic block starts at "
- << MBBStartIdx << '\n';
- } else if (TargetRegisterInfo::isVirtualRegister(LI.reg) &&
- !MI->readsVirtualRegister(LI.reg)) {
- // A live range can end with either a redefinition, a kill flag on a
- // use, or a dead flag on a def.
- // FIXME: Should we check for each of these?
- bool hasDeadDef = false;
- for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
- MOE = MI->operands_end(); MOI != MOE; ++MOI) {
- if (MOI->isReg() && MOI->getReg() == LI.reg && MOI->isDef() && MOI->isDead()) {
- hasDeadDef = true;
- break;
- }
- }
+ << MBBStartIdx << '\n';
+ continue;
+ }
+
+ // The block slot must refer to a basic block boundary.
+ if (I->end.isBlock()) {
+ report("Live segment ends at B slot of an instruction", MI);
+ I->print(*OS);
+ *OS << " in " << LI << '\n';
+ }
+
+ if (I->end.isDead()) {
+ // Segment ends on the dead slot.
+ // That means there must be a dead def.
+ if (!SlotIndex::isSameInstr(I->start, I->end)) {
+ report("Live segment ending at dead slot spans instructions", MI);
+ I->print(*OS);
+ *OS << " in " << LI << '\n';
+ }
+ }
+
+ // A live segment can only end at an early-clobber slot if it is being
+ // redefined by an early-clobber def.
+ if (I->end.isEarlyClobber()) {
+ if (I+1 == E || (I+1)->start != I->end) {
+ report("Live segment ending at early clobber slot must be "
+ "redefined by an EC def in the same instruction", MI);
+ I->print(*OS);
+ *OS << " in " << LI << '\n';
+ }
+ }
+
+ // The following checks only apply to virtual registers. Physreg liveness
+ // is too weird to check.
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+ // A live range can end with either a redefinition, a kill flag on a
+ // use, or a dead flag on a def.
+ bool hasRead = false;
+ bool hasDeadDef = false;
+ for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
+ if (!MOI->isReg() || MOI->getReg() != LI.reg)
+ continue;
+ if (MOI->readsReg())
+ hasRead = true;
+ if (MOI->isDef() && MOI->isDead())
+ hasDeadDef = true;
+ }
+ if (I->end.isDead()) {
if (!hasDeadDef) {
- report("Instruction killing live segment neither defines nor reads "
- "register", MI);
+ report("Instruction doesn't have a dead def operand", MI);
+ I->print(*OS);
+ *OS << " in " << LI << '\n';
+ }
+ } else {
+ if (!hasRead) {
+ report("Instruction ending live range doesn't read the register",
+ MI);
I->print(*OS);
*OS << " in " << LI << '\n';
}
@@ -1192,8 +1310,8 @@ void MachineVerifier::verifyLiveIntervals() {
// Check that VNI is live-out of all predecessors.
for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(),
PE = MFI->pred_end(); PI != PE; ++PI) {
- SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI).getPrevSlot();
- const VNInfo *PVNI = LI.getVNInfoAt(PEnd);
+ SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI);
+ const VNInfo *PVNI = LI.getVNInfoBefore(PEnd);
if (VNI->isPHIDef() && VNI->def == LiveInts->getMBBStartIdx(MFI))
continue;
@@ -1201,7 +1319,7 @@ void MachineVerifier::verifyLiveIntervals() {
if (!PVNI) {
report("Register not marked live out of predecessor", *PI);
*OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
- << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live at "
+ << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before "
<< PEnd << " in " << LI << '\n';
continue;
}
diff --git a/lib/CodeGen/ObjectCodeEmitter.cpp b/lib/CodeGen/ObjectCodeEmitter.cpp
deleted file mode 100644
index cf05275d7a31..000000000000
--- a/lib/CodeGen/ObjectCodeEmitter.cpp
+++ /dev/null
@@ -1,141 +0,0 @@
-//===-- llvm/CodeGen/ObjectCodeEmitter.cpp -------------------- -*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/BinaryObject.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineRelocation.h"
-#include "llvm/CodeGen/ObjectCodeEmitter.h"
-
-//===----------------------------------------------------------------------===//
-// ObjectCodeEmitter Implementation
-//===----------------------------------------------------------------------===//
-
-namespace llvm {
-
-ObjectCodeEmitter::ObjectCodeEmitter() : BO(0) {}
-ObjectCodeEmitter::ObjectCodeEmitter(BinaryObject *bo) : BO(bo) {}
-ObjectCodeEmitter::~ObjectCodeEmitter() {}
-
-/// setBinaryObject - set the BinaryObject we are writting to
-void ObjectCodeEmitter::setBinaryObject(BinaryObject *bo) { BO = bo; }
-
-/// emitByte - This callback is invoked when a byte needs to be
-/// written to the data stream, without buffer overflow testing.
-void ObjectCodeEmitter::emitByte(uint8_t B) {
- BO->emitByte(B);
-}
-
-/// emitWordLE - This callback is invoked when a 32-bit word needs to be
-/// written to the data stream in little-endian format.
-void ObjectCodeEmitter::emitWordLE(uint32_t W) {
- BO->emitWordLE(W);
-}
-
-/// emitWordBE - This callback is invoked when a 32-bit word needs to be
-/// written to the data stream in big-endian format.
-void ObjectCodeEmitter::emitWordBE(uint32_t W) {
- BO->emitWordBE(W);
-}
-
-/// emitDWordLE - This callback is invoked when a 64-bit word needs to be
-/// written to the data stream in little-endian format.
-void ObjectCodeEmitter::emitDWordLE(uint64_t W) {
- BO->emitDWordLE(W);
-}
-
-/// emitDWordBE - This callback is invoked when a 64-bit word needs to be
-/// written to the data stream in big-endian format.
-void ObjectCodeEmitter::emitDWordBE(uint64_t W) {
- BO->emitDWordBE(W);
-}
-
-/// emitAlignment - Align 'BO' to the necessary alignment boundary.
-void ObjectCodeEmitter::emitAlignment(unsigned Alignment /* 0 */,
- uint8_t fill /* 0 */) {
- BO->emitAlignment(Alignment, fill);
-}
-
-/// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be
-/// written to the data stream.
-void ObjectCodeEmitter::emitULEB128Bytes(uint64_t Value) {
- BO->emitULEB128Bytes(Value);
-}
-
-/// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be
-/// written to the data stream.
-void ObjectCodeEmitter::emitSLEB128Bytes(uint64_t Value) {
- BO->emitSLEB128Bytes(Value);
-}
-
-/// emitString - This callback is invoked when a String needs to be
-/// written to the data stream.
-void ObjectCodeEmitter::emitString(const std::string &String) {
- BO->emitString(String);
-}
-
-/// getCurrentPCValue - This returns the address that the next emitted byte
-/// will be output to.
-uintptr_t ObjectCodeEmitter::getCurrentPCValue() const {
- return BO->getCurrentPCOffset();
-}
-
-/// getCurrentPCOffset - Return the offset from the start of the emitted
-/// buffer that we are currently writing to.
-uintptr_t ObjectCodeEmitter::getCurrentPCOffset() const {
- return BO->getCurrentPCOffset();
-}
-
-/// addRelocation - Whenever a relocatable address is needed, it should be
-/// noted with this interface.
-void ObjectCodeEmitter::addRelocation(const MachineRelocation& relocation) {
- BO->addRelocation(relocation);
-}
-
-/// StartMachineBasicBlock - This should be called by the target when a new
-/// basic block is about to be emitted. This way the MCE knows where the
-/// start of the block is, and can implement getMachineBasicBlockAddress.
-void ObjectCodeEmitter::StartMachineBasicBlock(MachineBasicBlock *MBB) {
- if (MBBLocations.size() <= (unsigned)MBB->getNumber())
- MBBLocations.resize((MBB->getNumber()+1)*2);
- MBBLocations[MBB->getNumber()] = getCurrentPCOffset();
-}
-
-/// getMachineBasicBlockAddress - Return the address of the specified
-/// MachineBasicBlock, only usable after the label for the MBB has been
-/// emitted.
-uintptr_t
-ObjectCodeEmitter::getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
- assert(MBBLocations.size() > (unsigned)MBB->getNumber() &&
- MBBLocations[MBB->getNumber()] && "MBB not emitted!");
- return MBBLocations[MBB->getNumber()];
-}
-
-/// getJumpTableEntryAddress - Return the address of the jump table with index
-/// 'Index' in the function that last called initJumpTableInfo.
-uintptr_t ObjectCodeEmitter::getJumpTableEntryAddress(unsigned Index) const {
- assert(JTLocations.size() > Index && "JT not emitted!");
- return JTLocations[Index];
-}
-
-/// getConstantPoolEntryAddress - Return the address of the 'Index' entry in
-/// the constant pool that was last emitted with the emitConstantPool method.
-uintptr_t ObjectCodeEmitter::getConstantPoolEntryAddress(unsigned Index) const {
- assert(CPLocations.size() > Index && "CP not emitted!");
- return CPLocations[Index];
-}
-
-/// getConstantPoolEntrySection - Return the section of the 'Index' entry in
-/// the constant pool that was last emitted with the emitConstantPool method.
-uintptr_t ObjectCodeEmitter::getConstantPoolEntrySection(unsigned Index) const {
- assert(CPSections.size() > Index && "CP not emitted!");
- return CPSections[Index];
-}
-
-} // end namespace llvm
-
diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp
index c05be130ec61..6da313e632af 100644
--- a/lib/CodeGen/OptimizePHIs.cpp
+++ b/lib/CodeGen/OptimizePHIs.cpp
@@ -56,11 +56,10 @@ namespace {
}
char OptimizePHIs::ID = 0;
+char &llvm::OptimizePHIsID = OptimizePHIs::ID;
INITIALIZE_PASS(OptimizePHIs, "opt-phis",
"Optimize machine instruction PHIs", false, false)
-FunctionPass *llvm::createOptimizePHIsPass() { return new OptimizePHIs(); }
-
bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) {
MRI = &Fn.getRegInfo();
TII = Fn.getTarget().getInstrInfo();
@@ -165,7 +164,11 @@ bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) {
InstrSet PHIsInCycle;
if (IsSingleValuePHICycle(MI, SingleValReg, PHIsInCycle) &&
SingleValReg != 0) {
- MRI->replaceRegWith(MI->getOperand(0).getReg(), SingleValReg);
+ unsigned OldReg = MI->getOperand(0).getReg();
+ if (!MRI->constrainRegClass(SingleValReg, MRI->getRegClass(OldReg)))
+ continue;
+
+ MRI->replaceRegWith(OldReg, SingleValReg);
MI->eraseFromParent();
++NumPHICycles;
Changed = true;
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index 6994aa58fbd5..0ed4c34bb105 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -92,11 +92,15 @@ STATISTIC(NumCriticalEdgesSplit, "Number of critical edges split");
STATISTIC(NumReused, "Number of reused lowered phis");
char PHIElimination::ID = 0;
-INITIALIZE_PASS(PHIElimination, "phi-node-elimination",
- "Eliminate PHI nodes for register allocation", false, false)
-
char& llvm::PHIEliminationID = PHIElimination::ID;
+INITIALIZE_PASS_BEGIN(PHIElimination, "phi-node-elimination",
+ "Eliminate PHI nodes for register allocation",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveVariables)
+INITIALIZE_PASS_END(PHIElimination, "phi-node-elimination",
+ "Eliminate PHI nodes for register allocation", false, false)
+
void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<LiveVariables>();
AU.addPreserved<MachineDominatorTree>();
@@ -241,7 +245,6 @@ void PHIElimination::LowerAtomicPHINode(
LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg);
// Increment use count of the newly created virtual register.
- VI.NumUses++;
LV->setPHIJoin(IncomingReg);
// When we are reusing the incoming register, it may already have been
@@ -410,7 +413,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
return false; // Quick exit for basic blocks without PHIs.
bool Changed = false;
- for (MachineBasicBlock::const_iterator BBI = MBB.begin(), BBE = MBB.end();
+ for (MachineBasicBlock::iterator BBI = MBB.begin(), BBE = MBB.end();
BBI != BBE && BBI->isPHI(); ++BBI) {
for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
unsigned Reg = BBI->getOperand(i).getReg();
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index 315aedddb9ef..53d1fcf7377a 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -12,62 +12,617 @@
//
//===---------------------------------------------------------------------===//
-#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
+static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden,
+ cl::desc("Disable Post Regalloc"));
+static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden,
+ cl::desc("Disable branch folding"));
+static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden,
+ cl::desc("Disable tail duplication"));
+static cl::opt<bool> DisableEarlyTailDup("disable-early-taildup", cl::Hidden,
+ cl::desc("Disable pre-register allocation tail duplication"));
+static cl::opt<bool> EnableBlockPlacement("enable-block-placement",
+ cl::Hidden, cl::desc("Enable probability-driven block placement"));
+static cl::opt<bool> EnableBlockPlacementStats("enable-block-placement-stats",
+ cl::Hidden, cl::desc("Collect probability-driven block placement stats"));
+static cl::opt<bool> DisableCodePlace("disable-code-place", cl::Hidden,
+ cl::desc("Disable code placement"));
+static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden,
+ cl::desc("Disable Stack Slot Coloring"));
+static cl::opt<bool> DisableMachineDCE("disable-machine-dce", cl::Hidden,
+ cl::desc("Disable Machine Dead Code Elimination"));
+static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden,
+ cl::desc("Disable Machine LICM"));
+static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden,
+ cl::desc("Disable Machine Common Subexpression Elimination"));
+static cl::opt<cl::boolOrDefault>
+OptimizeRegAlloc("optimize-regalloc", cl::Hidden,
+ cl::desc("Enable optimized register allocation compilation path."));
+static cl::opt<cl::boolOrDefault>
+EnableMachineSched("enable-misched", cl::Hidden,
+ cl::desc("Enable the machine instruction scheduling pass."));
+static cl::opt<bool> EnableStrongPHIElim("strong-phi-elim", cl::Hidden,
+ cl::desc("Use strong PHI elimination."));
+static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm",
+ cl::Hidden,
+ cl::desc("Disable Machine LICM"));
+static cl::opt<bool> DisableMachineSink("disable-machine-sink", cl::Hidden,
+ cl::desc("Disable Machine Sinking"));
+static cl::opt<bool> DisableLSR("disable-lsr", cl::Hidden,
+ cl::desc("Disable Loop Strength Reduction Pass"));
+static cl::opt<bool> DisableCGP("disable-cgp", cl::Hidden,
+ cl::desc("Disable Codegen Prepare"));
+static cl::opt<bool> DisableCopyProp("disable-copyprop", cl::Hidden,
+ cl::desc("Disable Copy Propagation pass"));
+static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden,
+ cl::desc("Print LLVM IR produced by the loop-reduce pass"));
+static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden,
+ cl::desc("Print LLVM IR input to isel pass"));
+static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
+ cl::desc("Dump garbage collector data"));
+static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
+ cl::desc("Verify generated machine code"),
+ cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
+
+/// Allow standard passes to be disabled by command line options. This supports
+/// simple binary flags that either suppress the pass or do nothing.
+/// i.e. -disable-mypass=false has no effect.
+/// These should be converted to boolOrDefault in order to use applyOverride.
+static AnalysisID applyDisable(AnalysisID ID, bool Override) {
+ if (Override)
+ return &NoPassID;
+ return ID;
+}
+
+/// Allow Pass selection to be overriden by command line options. This supports
+/// flags with ternary conditions. TargetID is passed through by default. The
+/// pass is suppressed when the option is false. When the option is true, the
+/// StandardID is selected if the target provides no default.
+static AnalysisID applyOverride(AnalysisID TargetID, cl::boolOrDefault Override,
+ AnalysisID StandardID) {
+ switch (Override) {
+ case cl::BOU_UNSET:
+ return TargetID;
+ case cl::BOU_TRUE:
+ if (TargetID != &NoPassID)
+ return TargetID;
+ if (StandardID == &NoPassID)
+ report_fatal_error("Target cannot enable pass");
+ return StandardID;
+ case cl::BOU_FALSE:
+ return &NoPassID;
+ }
+ llvm_unreachable("Invalid command line option state");
+}
+
+/// Allow standard passes to be disabled by the command line, regardless of who
+/// is adding the pass.
+///
+/// StandardID is the pass identified in the standard pass pipeline and provided
+/// to addPass(). It may be a target-specific ID in the case that the target
+/// directly adds its own pass, but in that case we harmlessly fall through.
+///
+/// TargetID is the pass that the target has configured to override StandardID.
+///
+/// StandardID may be a pseudo ID. In that case TargetID is the name of the real
+/// pass to run. This allows multiple options to control a single pass depending
+/// on where in the pipeline that pass is added.
+static AnalysisID overridePass(AnalysisID StandardID, AnalysisID TargetID) {
+ if (StandardID == &PostRASchedulerID)
+ return applyDisable(TargetID, DisablePostRA);
+
+ if (StandardID == &BranchFolderPassID)
+ return applyDisable(TargetID, DisableBranchFold);
+
+ if (StandardID == &TailDuplicateID)
+ return applyDisable(TargetID, DisableTailDuplicate);
+
+ if (StandardID == &TargetPassConfig::EarlyTailDuplicateID)
+ return applyDisable(TargetID, DisableEarlyTailDup);
+
+ if (StandardID == &MachineBlockPlacementID)
+ return applyDisable(TargetID, DisableCodePlace);
+
+ if (StandardID == &CodePlacementOptID)
+ return applyDisable(TargetID, DisableCodePlace);
+
+ if (StandardID == &StackSlotColoringID)
+ return applyDisable(TargetID, DisableSSC);
+
+ if (StandardID == &DeadMachineInstructionElimID)
+ return applyDisable(TargetID, DisableMachineDCE);
+
+ if (StandardID == &MachineLICMID)
+ return applyDisable(TargetID, DisableMachineLICM);
+
+ if (StandardID == &MachineCSEID)
+ return applyDisable(TargetID, DisableMachineCSE);
+
+ if (StandardID == &MachineSchedulerID)
+ return applyOverride(TargetID, EnableMachineSched, StandardID);
+
+ if (StandardID == &TargetPassConfig::PostRAMachineLICMID)
+ return applyDisable(TargetID, DisablePostRAMachineLICM);
+
+ if (StandardID == &MachineSinkingID)
+ return applyDisable(TargetID, DisableMachineSink);
+
+ if (StandardID == &MachineCopyPropagationID)
+ return applyDisable(TargetID, DisableCopyProp);
+
+ return TargetID;
+}
+
//===---------------------------------------------------------------------===//
+/// TargetPassConfig
+//===---------------------------------------------------------------------===//
+
+INITIALIZE_PASS(TargetPassConfig, "targetpassconfig",
+ "Target Pass Configuration", false, false)
+char TargetPassConfig::ID = 0;
+
+static char NoPassIDAnchor = 0;
+char &llvm::NoPassID = NoPassIDAnchor;
+
+// Pseudo Pass IDs.
+char TargetPassConfig::EarlyTailDuplicateID = 0;
+char TargetPassConfig::PostRAMachineLICMID = 0;
+
+namespace llvm {
+class PassConfigImpl {
+public:
+ // List of passes explicitly substituted by this target. Normally this is
+ // empty, but it is a convenient way to suppress or replace specific passes
+ // that are part of a standard pass pipeline without overridding the entire
+ // pipeline. This mechanism allows target options to inherit a standard pass's
+ // user interface. For example, a target may disable a standard pass by
+ // default by substituting NoPass, and the user may still enable that standard
+ // pass with an explicit command line option.
+ DenseMap<AnalysisID,AnalysisID> TargetPasses;
+};
+} // namespace llvm
+
+// Out of line virtual method.
+TargetPassConfig::~TargetPassConfig() {
+ delete Impl;
+}
+
+// Out of line constructor provides default values for pass options and
+// registers all common codegen passes.
+TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
+ : ImmutablePass(ID), TM(tm), PM(pm), Impl(0), Initialized(false),
+ DisableVerify(false),
+ EnableTailMerge(true) {
+
+ Impl = new PassConfigImpl();
+
+ // Register all target independent codegen passes to activate their PassIDs,
+ // including this pass itself.
+ initializeCodeGen(*PassRegistry::getPassRegistry());
+
+ // Substitute Pseudo Pass IDs for real ones.
+ substitutePass(EarlyTailDuplicateID, TailDuplicateID);
+ substitutePass(PostRAMachineLICMID, MachineLICMID);
+
+ // Temporarily disable experimental passes.
+ substitutePass(MachineSchedulerID, NoPassID);
+}
+
+/// createPassConfig - Create a pass configuration object to be used by
+/// addPassToEmitX methods for generating a pipeline of CodeGen passes.
+///
+/// Targets may override this to extend TargetPassConfig.
+TargetPassConfig *LLVMTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new TargetPassConfig(this, PM);
+}
+
+TargetPassConfig::TargetPassConfig()
+ : ImmutablePass(ID), PM(*(PassManagerBase*)0) {
+ llvm_unreachable("TargetPassConfig should not be constructed on-the-fly");
+}
+
+// Helper to verify the analysis is really immutable.
+void TargetPassConfig::setOpt(bool &Opt, bool Val) {
+ assert(!Initialized && "PassConfig is immutable");
+ Opt = Val;
+}
+
+void TargetPassConfig::substitutePass(char &StandardID, char &TargetID) {
+ Impl->TargetPasses[&StandardID] = &TargetID;
+}
+
+AnalysisID TargetPassConfig::getPassSubstitution(AnalysisID ID) const {
+ DenseMap<AnalysisID, AnalysisID>::const_iterator
+ I = Impl->TargetPasses.find(ID);
+ if (I == Impl->TargetPasses.end())
+ return ID;
+ return I->second;
+}
+
+/// Add a CodeGen pass at this point in the pipeline after checking for target
+/// and command line overrides.
+AnalysisID TargetPassConfig::addPass(char &ID) {
+ assert(!Initialized && "PassConfig is immutable");
+
+ AnalysisID TargetID = getPassSubstitution(&ID);
+ AnalysisID FinalID = overridePass(&ID, TargetID);
+ if (FinalID == &NoPassID)
+ return FinalID;
+
+ Pass *P = Pass::createPass(FinalID);
+ if (!P)
+ llvm_unreachable("Pass ID not registered");
+ PM.add(P);
+ return FinalID;
+}
+
+void TargetPassConfig::printAndVerify(const char *Banner) const {
+ if (TM->shouldPrintMachineCode())
+ PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
+
+ if (VerifyMachineCode)
+ PM.add(createMachineVerifierPass(Banner));
+}
+
+/// Add common target configurable passes that perform LLVM IR to IR transforms
+/// following machine independent optimization.
+void TargetPassConfig::addIRPasses() {
+ // Basic AliasAnalysis support.
+ // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
+ // BasicAliasAnalysis wins if they disagree. This is intended to help
+ // support "obvious" type-punning idioms.
+ PM.add(createTypeBasedAliasAnalysisPass());
+ PM.add(createBasicAliasAnalysisPass());
+
+ // Before running any passes, run the verifier to determine if the input
+ // coming from the front-end and/or optimizer is valid.
+ if (!DisableVerify)
+ PM.add(createVerifierPass());
+
+ // Run loop strength reduction before anything else.
+ if (getOptLevel() != CodeGenOpt::None && !DisableLSR) {
+ PM.add(createLoopStrengthReducePass(getTargetLowering()));
+ if (PrintLSR)
+ PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
+ }
+
+ PM.add(createGCLoweringPass());
+
+ // Make sure that no unreachable blocks are instruction selected.
+ PM.add(createUnreachableBlockEliminationPass());
+}
+
+/// Add common passes that perform LLVM IR to IR transforms in preparation for
+/// instruction selection.
+void TargetPassConfig::addISelPrepare() {
+ if (getOptLevel() != CodeGenOpt::None && !DisableCGP)
+ PM.add(createCodeGenPreparePass(getTargetLowering()));
+
+ PM.add(createStackProtectorPass(getTargetLowering()));
+
+ addPreISel();
+
+ if (PrintISelInput)
+ PM.add(createPrintFunctionPass("\n\n"
+ "*** Final LLVM Code input to ISel ***\n",
+ &dbgs()));
+
+ // All passes which modify the LLVM IR are now complete; run the verifier
+ // to ensure that the IR is valid.
+ if (!DisableVerify)
+ PM.add(createVerifierPass());
+}
+
+/// Add the complete set of target-independent postISel code generator passes.
///
-/// RegisterRegAlloc class - Track the registration of register allocators.
+/// This can be read as the standard order of major LLVM CodeGen stages. Stages
+/// with nontrivial configuration or multiple passes are broken out below in
+/// add%Stage routines.
///
+/// Any TargetPassConfig::addXX routine may be overriden by the Target. The
+/// addPre/Post methods with empty header implementations allow injecting
+/// target-specific fixups just before or after major stages. Additionally,
+/// targets have the flexibility to change pass order within a stage by
+/// overriding default implementation of add%Stage routines below. Each
+/// technique has maintainability tradeoffs because alternate pass orders are
+/// not well supported. addPre/Post works better if the target pass is easily
+/// tied to a common pass. But if it has subtle dependencies on multiple passes,
+/// the target should override the stage instead.
+///
+/// TODO: We could use a single addPre/Post(ID) hook to allow pass injection
+/// before/after any target-independent pass. But it's currently overkill.
+void TargetPassConfig::addMachinePasses() {
+ // Print the instruction selected machine code...
+ printAndVerify("After Instruction Selection");
+
+ // Expand pseudo-instructions emitted by ISel.
+ addPass(ExpandISelPseudosID);
+
+ // Add passes that optimize machine instructions in SSA form.
+ if (getOptLevel() != CodeGenOpt::None) {
+ addMachineSSAOptimization();
+ }
+ else {
+ // If the target requests it, assign local variables to stack slots relative
+ // to one another and simplify frame index references where possible.
+ addPass(LocalStackSlotAllocationID);
+ }
+
+ // Run pre-ra passes.
+ if (addPreRegAlloc())
+ printAndVerify("After PreRegAlloc passes");
+
+ // Run register allocation and passes that are tightly coupled with it,
+ // including phi elimination and scheduling.
+ if (getOptimizeRegAlloc())
+ addOptimizedRegAlloc(createRegAllocPass(true));
+ else
+ addFastRegAlloc(createRegAllocPass(false));
+
+ // Run post-ra passes.
+ if (addPostRegAlloc())
+ printAndVerify("After PostRegAlloc passes");
+
+ // Insert prolog/epilog code. Eliminate abstract frame index references...
+ addPass(PrologEpilogCodeInserterID);
+ printAndVerify("After PrologEpilogCodeInserter");
+
+ /// Add passes that optimize machine instructions after register allocation.
+ if (getOptLevel() != CodeGenOpt::None)
+ addMachineLateOptimization();
+
+ // Expand pseudo instructions before second scheduling pass.
+ addPass(ExpandPostRAPseudosID);
+ printAndVerify("After ExpandPostRAPseudos");
+
+ // Run pre-sched2 passes.
+ if (addPreSched2())
+ printAndVerify("After PreSched2 passes");
+
+ // Second pass scheduler.
+ if (getOptLevel() != CodeGenOpt::None) {
+ addPass(PostRASchedulerID);
+ printAndVerify("After PostRAScheduler");
+ }
+
+ // GC
+ addPass(GCMachineCodeAnalysisID);
+ if (PrintGCInfo)
+ PM.add(createGCInfoPrinter(dbgs()));
+
+ // Basic block placement.
+ if (getOptLevel() != CodeGenOpt::None)
+ addBlockPlacement();
+
+ if (addPreEmitPass())
+ printAndVerify("After PreEmit passes");
+}
+
+/// Add passes that optimize machine instructions in SSA form.
+void TargetPassConfig::addMachineSSAOptimization() {
+ // Pre-ra tail duplication.
+ if (addPass(EarlyTailDuplicateID) != &NoPassID)
+ printAndVerify("After Pre-RegAlloc TailDuplicate");
+
+ // Optimize PHIs before DCE: removing dead PHI cycles may make more
+ // instructions dead.
+ addPass(OptimizePHIsID);
+
+ // If the target requests it, assign local variables to stack slots relative
+ // to one another and simplify frame index references where possible.
+ addPass(LocalStackSlotAllocationID);
+
+ // With optimization, dead code should already be eliminated. However
+ // there is one known exception: lowered code for arguments that are only
+ // used by tail calls, where the tail calls reuse the incoming stack
+ // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
+ addPass(DeadMachineInstructionElimID);
+ printAndVerify("After codegen DCE pass");
+
+ addPass(MachineLICMID);
+ addPass(MachineCSEID);
+ addPass(MachineSinkingID);
+ printAndVerify("After Machine LICM, CSE and Sinking passes");
+
+ addPass(PeepholeOptimizerID);
+ printAndVerify("After codegen peephole optimization pass");
+}
+
+//===---------------------------------------------------------------------===//
+/// Register Allocation Pass Configuration
//===---------------------------------------------------------------------===//
+
+bool TargetPassConfig::getOptimizeRegAlloc() const {
+ switch (OptimizeRegAlloc) {
+ case cl::BOU_UNSET: return getOptLevel() != CodeGenOpt::None;
+ case cl::BOU_TRUE: return true;
+ case cl::BOU_FALSE: return false;
+ }
+ llvm_unreachable("Invalid optimize-regalloc state");
+}
+
+/// RegisterRegAlloc's global Registry tracks allocator registration.
MachinePassRegistry RegisterRegAlloc::Registry;
-static FunctionPass *createDefaultRegisterAllocator() { return 0; }
+/// A dummy default pass factory indicates whether the register allocator is
+/// overridden on the command line.
+static FunctionPass *useDefaultRegisterAllocator() { return 0; }
static RegisterRegAlloc
defaultRegAlloc("default",
"pick register allocator based on -O option",
- createDefaultRegisterAllocator);
+ useDefaultRegisterAllocator);
-//===---------------------------------------------------------------------===//
-///
-/// RegAlloc command line options.
-///
-//===---------------------------------------------------------------------===//
+/// -regalloc=... command line option.
static cl::opt<RegisterRegAlloc::FunctionPassCtor, false,
RegisterPassParser<RegisterRegAlloc> >
RegAlloc("regalloc",
- cl::init(&createDefaultRegisterAllocator),
+ cl::init(&useDefaultRegisterAllocator),
cl::desc("Register allocator to use"));
-//===---------------------------------------------------------------------===//
+/// Instantiate the default register allocator pass for this target for either
+/// the optimized or unoptimized allocation path. This will be added to the pass
+/// manager by addFastRegAlloc in the unoptimized case or addOptimizedRegAlloc
+/// in the optimized case.
///
-/// createRegisterAllocator - choose the appropriate register allocator.
+/// A target that uses the standard regalloc pass order for fast or optimized
+/// allocation may still override this for per-target regalloc
+/// selection. But -regalloc=... always takes precedence.
+FunctionPass *TargetPassConfig::createTargetRegisterAllocator(bool Optimized) {
+ if (Optimized)
+ return createGreedyRegisterAllocator();
+ else
+ return createFastRegisterAllocator();
+}
+
+/// Find and instantiate the register allocation pass requested by this target
+/// at the current optimization level. Different register allocators are
+/// defined as separate passes because they may require different analysis.
///
-//===---------------------------------------------------------------------===//
-FunctionPass *llvm::createRegisterAllocator(CodeGenOpt::Level OptLevel) {
+/// This helper ensures that the regalloc= option is always available,
+/// even for targets that override the default allocator.
+///
+/// FIXME: When MachinePassRegistry register pass IDs instead of function ptrs,
+/// this can be folded into addPass.
+FunctionPass *TargetPassConfig::createRegAllocPass(bool Optimized) {
RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault();
+ // Initialize the global default.
if (!Ctor) {
Ctor = RegAlloc;
RegisterRegAlloc::setDefault(RegAlloc);
}
+ if (Ctor != useDefaultRegisterAllocator)
+ return Ctor();
- // This forces linking of the linear scan register allocator,
- // so -regalloc=linearscan still works in clang.
- if (Ctor == createLinearScanRegisterAllocator)
- return createLinearScanRegisterAllocator();
+ // With no -regalloc= override, ask the target for a regalloc pass.
+ return createTargetRegisterAllocator(Optimized);
+}
- if (Ctor != createDefaultRegisterAllocator)
- return Ctor();
+/// Add the minimum set of target-independent passes that are required for
+/// register allocation. No coalescing or scheduling.
+void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
+ addPass(PHIEliminationID);
+ addPass(TwoAddressInstructionPassID);
- // When the 'default' allocator is requested, pick one based on OptLevel.
- switch (OptLevel) {
- case CodeGenOpt::None:
- return createFastRegisterAllocator();
- default:
- return createGreedyRegisterAllocator();
+ PM.add(RegAllocPass);
+ printAndVerify("After Register Allocation");
+}
+
+/// Add standard target-independent passes that are tightly coupled with
+/// optimized register allocation, including coalescing, machine instruction
+/// scheduling, and register allocation itself.
+void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
+ // LiveVariables currently requires pure SSA form.
+ //
+ // FIXME: Once TwoAddressInstruction pass no longer uses kill flags,
+ // LiveVariables can be removed completely, and LiveIntervals can be directly
+ // computed. (We still either need to regenerate kill flags after regalloc, or
+ // preferably fix the scavenger to not depend on them).
+ addPass(LiveVariablesID);
+
+ // Add passes that move from transformed SSA into conventional SSA. This is a
+ // "copy coalescing" problem.
+ //
+ if (!EnableStrongPHIElim) {
+ // Edge splitting is smarter with machine loop info.
+ addPass(MachineLoopInfoID);
+ addPass(PHIEliminationID);
+ }
+ addPass(TwoAddressInstructionPassID);
+
+ // FIXME: Either remove this pass completely, or fix it so that it works on
+ // SSA form. We could modify LiveIntervals to be independent of this pass, But
+ // it would be even better to simply eliminate *all* IMPLICIT_DEFs before
+ // leaving SSA.
+ addPass(ProcessImplicitDefsID);
+
+ if (EnableStrongPHIElim)
+ addPass(StrongPHIEliminationID);
+
+ addPass(RegisterCoalescerID);
+
+ // PreRA instruction scheduling.
+ if (addPass(MachineSchedulerID) != &NoPassID)
+ printAndVerify("After Machine Scheduling");
+
+ // Add the selected register allocation pass.
+ PM.add(RegAllocPass);
+ printAndVerify("After Register Allocation");
+
+ // FinalizeRegAlloc is convenient until MachineInstrBundles is more mature,
+ // but eventually, all users of it should probably be moved to addPostRA and
+ // it can go away. Currently, it's the intended place for targets to run
+ // FinalizeMachineBundles, because passes other than MachineScheduling an
+ // RegAlloc itself may not be aware of bundles.
+ if (addFinalizeRegAlloc())
+ printAndVerify("After RegAlloc finalization");
+
+ // Perform stack slot coloring and post-ra machine LICM.
+ //
+ // FIXME: Re-enable coloring with register when it's capable of adding
+ // kill markers.
+ addPass(StackSlotColoringID);
+
+ // Run post-ra machine LICM to hoist reloads / remats.
+ //
+ // FIXME: can this move into MachineLateOptimization?
+ addPass(PostRAMachineLICMID);
+
+ printAndVerify("After StackSlotColoring and postra Machine LICM");
+}
+
+//===---------------------------------------------------------------------===//
+/// Post RegAlloc Pass Configuration
+//===---------------------------------------------------------------------===//
+
+/// Add passes that optimize machine instructions after register allocation.
+void TargetPassConfig::addMachineLateOptimization() {
+ // Branch folding must be run after regalloc and prolog/epilog insertion.
+ if (addPass(BranchFolderPassID) != &NoPassID)
+ printAndVerify("After BranchFolding");
+
+ // Tail duplication.
+ if (addPass(TailDuplicateID) != &NoPassID)
+ printAndVerify("After TailDuplicate");
+
+ // Copy propagation.
+ if (addPass(MachineCopyPropagationID) != &NoPassID)
+ printAndVerify("After copy propagation pass");
+}
+
+/// Add standard basic block placement passes.
+void TargetPassConfig::addBlockPlacement() {
+ AnalysisID ID = &NoPassID;
+ if (EnableBlockPlacement) {
+ // MachineBlockPlacement is an experimental pass which is disabled by
+ // default currently. Eventually it should subsume CodePlacementOpt, so
+ // when enabled, the other is disabled.
+ ID = addPass(MachineBlockPlacementID);
+ } else {
+ ID = addPass(CodePlacementOptID);
+ }
+ if (ID != &NoPassID) {
+ // Run a separate pass to collect block placement statistics.
+ if (EnableBlockPlacementStats)
+ addPass(MachineBlockPlacementStatsID);
+
+ printAndVerify("After machine block placement.");
}
}
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index bbc7ce2d0a42..9c5c029000c0 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -39,7 +39,7 @@
// =>
// v1 = bitcast v0
// = v0
-//
+//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "peephole-opt"
@@ -68,7 +68,7 @@ DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
STATISTIC(NumReuse, "Number of extension results reused");
STATISTIC(NumBitcasts, "Number of bitcasts eliminated");
STATISTIC(NumCmps, "Number of compares eliminated");
-STATISTIC(NumImmFold, "Number of move immediate foled");
+STATISTIC(NumImmFold, "Number of move immediate folded");
namespace {
class PeepholeOptimizer : public MachineFunctionPass {
@@ -109,22 +109,19 @@ namespace {
}
char PeepholeOptimizer::ID = 0;
+char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID;
INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts",
"Peephole Optimizations", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts",
"Peephole Optimizations", false, false)
-FunctionPass *llvm::createPeepholeOptimizerPass() {
- return new PeepholeOptimizer();
-}
-
/// OptimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads
/// a single register and writes a single register and it does not modify the
/// source, and if the source value is preserved as a sub-register of the
/// result, then replace all reachable uses of the source with the subreg of the
/// result.
-///
+///
/// Do not generate an EXTRACT that is used only in a debug use, as this changes
/// the code. Since this code does not currently share EXTRACTs, just ignore all
/// debug uses.
@@ -134,7 +131,7 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
unsigned SrcReg, DstReg, SubIdx;
if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
return false;
-
+
if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
TargetRegisterInfo::isPhysicalRegister(SrcReg))
return false;
@@ -240,6 +237,10 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
if (PHIBBs.count(UseMBB))
continue;
+ // About to add uses of DstReg, clear DstReg's kill flags.
+ if (!Changed)
+ MRI->clearKillFlags(DstReg);
+
unsigned NewVR = MRI->createVirtualRegister(RC);
BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
TII->get(TargetOpcode::COPY), NewVR)
@@ -292,7 +293,7 @@ bool PeepholeOptimizer::OptimizeBitcastInstr(MachineInstr *MI,
assert(Def && Src && "Malformed bitcast instruction!");
MachineInstr *DefMI = MRI->getVRegDef(Src);
- if (!DefMI || !DefMI->getDesc().isBitcast())
+ if (!DefMI || !DefMI->isBitcast())
return false;
unsigned SrcSrc = 0;
@@ -353,7 +354,7 @@ bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
const MCInstrDesc &MCID = MI->getDesc();
- if (!MCID.isMoveImmediate())
+ if (!MI->isMoveImmediate())
return false;
if (MCID.getNumDefs() != 1)
return false;
@@ -363,7 +364,7 @@ bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
ImmDefRegs.insert(Reg);
return true;
}
-
+
return false;
}
@@ -395,7 +396,7 @@ bool PeepholeOptimizer::FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (DisablePeephole)
return false;
-
+
TM = &MF.getTarget();
TII = TM->getInstrInfo();
MRI = &MF.getRegInfo();
@@ -408,7 +409,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
DenseMap<unsigned, MachineInstr*> ImmDefMIs;
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
MachineBasicBlock *MBB = &*I;
-
+
bool SeenMoveImm = false;
LocalMIs.clear();
ImmDefRegs.clear();
@@ -428,17 +429,15 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
continue;
}
- const MCInstrDesc &MCID = MI->getDesc();
-
- if (MCID.isBitcast()) {
+ if (MI->isBitcast()) {
if (OptimizeBitcastInstr(MI, MBB)) {
// MI is deleted.
LocalMIs.erase(MI);
Changed = true;
MII = First ? I->begin() : llvm::next(PMII);
continue;
- }
- } else if (MCID.isCompare()) {
+ }
+ } else if (MI->isCompare()) {
if (OptimizeCmpInstr(MI, MBB)) {
// MI is deleted.
LocalMIs.erase(MI);
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index c73e87733cb4..24d3e5ab0c9d 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -23,7 +23,6 @@
#include "AggressiveAntiDepBreaker.h"
#include "CriticalAntiDepBreaker.h"
#include "RegisterClassInfo.h"
-#include "ScheduleDAGInstrs.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/LatencyPriorityQueue.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
@@ -32,6 +31,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Target/TargetLowering.h"
@@ -45,7 +45,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/Statistic.h"
-#include <set>
using namespace llvm;
STATISTIC(NumNoops, "Number of noops inserted");
@@ -82,16 +81,15 @@ namespace {
AliasAnalysis *AA;
const TargetInstrInfo *TII;
RegisterClassInfo RegClassInfo;
- CodeGenOpt::Level OptLevel;
public:
static char ID;
- PostRAScheduler(CodeGenOpt::Level ol) :
- MachineFunctionPass(ID), OptLevel(ol) {}
+ PostRAScheduler() : MachineFunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<AliasAnalysis>();
+ AU.addRequired<TargetPassConfig>();
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
@@ -99,10 +97,6 @@ namespace {
MachineFunctionPass::getAnalysisUsage(AU);
}
- const char *getPassName() const {
- return "Post RA top-down list latency scheduler";
- }
-
bool runOnMachineFunction(MachineFunction &Fn);
};
char PostRAScheduler::ID = 0;
@@ -130,36 +124,49 @@ namespace {
/// AA - AliasAnalysis for making memory reference queries.
AliasAnalysis *AA;
- /// KillIndices - The index of the most recent kill (proceding bottom-up),
- /// or ~0u if the register is not live.
- std::vector<unsigned> KillIndices;
+ /// LiveRegs - true if the register is live.
+ BitVector LiveRegs;
+
+ /// The schedule. Null SUnit*'s represent noop instructions.
+ std::vector<SUnit*> Sequence;
public:
SchedulePostRATDList(
MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
AliasAnalysis *AA, const RegisterClassInfo&,
TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
- SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs);
+ SmallVectorImpl<const TargetRegisterClass*> &CriticalPathRCs);
~SchedulePostRATDList();
- /// StartBlock - Initialize register live-range state for scheduling in
+ /// startBlock - Initialize register live-range state for scheduling in
/// this block.
///
- void StartBlock(MachineBasicBlock *BB);
+ void startBlock(MachineBasicBlock *BB);
+
+ /// Initialize the scheduler state for the next scheduling region.
+ virtual void enterRegion(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned endcount);
+
+ /// Notify that the scheduler has finished scheduling the current region.
+ virtual void exitRegion();
/// Schedule - Schedule the instruction range using list scheduling.
///
- void Schedule();
+ void schedule();
+
+ void EmitSchedule();
/// Observe - Update liveness information to account for the current
/// instruction, which will not be scheduled.
///
void Observe(MachineInstr *MI, unsigned Count);
- /// FinishBlock - Clean up register live-range state.
+ /// finishBlock - Clean up register live-range state.
///
- void FinishBlock();
+ void finishBlock();
/// FixupKills - Fix register kill flags that have been made
/// invalid due to scheduling
@@ -177,16 +184,23 @@ namespace {
// adjustments may be made to the instruction if necessary. Return
// true if the operand has been deleted, false if not.
bool ToggleKillFlag(MachineInstr *MI, MachineOperand &MO);
+
+ void dumpSchedule() const;
};
}
+char &llvm::PostRASchedulerID = PostRAScheduler::ID;
+
+INITIALIZE_PASS(PostRAScheduler, "post-RA-sched",
+ "Post RA top-down list latency scheduler", false, false)
+
SchedulePostRATDList::SchedulePostRATDList(
MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
AliasAnalysis *AA, const RegisterClassInfo &RCI,
TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
- SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs)
- : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), AA(AA),
- KillIndices(TRI->getNumRegs())
+ SmallVectorImpl<const TargetRegisterClass*> &CriticalPathRCs)
+ : ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), Topo(SUnits), AA(AA),
+ LiveRegs(TRI->getNumRegs())
{
const TargetMachine &TM = MF.getTarget();
const InstrItineraryData *InstrItins = TM.getInstrItineraryData();
@@ -204,16 +218,48 @@ SchedulePostRATDList::~SchedulePostRATDList() {
delete AntiDepBreak;
}
+/// Initialize state associated with the next scheduling region.
+void SchedulePostRATDList::enterRegion(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned endcount) {
+ ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount);
+ Sequence.clear();
+}
+
+/// Print the schedule before exiting the region.
+void SchedulePostRATDList::exitRegion() {
+ DEBUG({
+ dbgs() << "*** Final schedule ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
+ ScheduleDAGInstrs::exitRegion();
+}
+
+/// dumpSchedule - dump the scheduled Sequence.
+void SchedulePostRATDList::dumpSchedule() const {
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ if (SUnit *SU = Sequence[i])
+ SU->dump(this);
+ else
+ dbgs() << "**** NOOP ****\n";
+ }
+}
+
bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
TII = Fn.getTarget().getInstrInfo();
MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();
+ TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
+
RegClassInfo.runOnMachineFunction(Fn);
// Check for explicit enable/disable of post-ra scheduling.
- TargetSubtargetInfo::AntiDepBreakMode AntiDepMode = TargetSubtargetInfo::ANTIDEP_NONE;
- SmallVector<TargetRegisterClass*, 4> CriticalPathRCs;
+ TargetSubtargetInfo::AntiDepBreakMode AntiDepMode =
+ TargetSubtargetInfo::ANTIDEP_NONE;
+ SmallVector<const TargetRegisterClass*, 4> CriticalPathRCs;
if (EnablePostRAScheduler.getPosition() > 0) {
if (!EnablePostRAScheduler)
return false;
@@ -221,7 +267,8 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
// Check that post-RA scheduling is enabled for this target.
// This may upgrade the AntiDepMode.
const TargetSubtargetInfo &ST = Fn.getTarget().getSubtarget<TargetSubtargetInfo>();
- if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode, CriticalPathRCs))
+ if (!ST.enablePostRAScheduler(PassConfig->getOptLevel(), AntiDepMode,
+ CriticalPathRCs))
return false;
}
@@ -248,13 +295,13 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
static int bbcnt = 0;
if (bbcnt++ % DebugDiv != DebugMod)
continue;
- dbgs() << "*** DEBUG scheduling " << Fn.getFunction()->getNameStr() <<
- ":BB#" << MBB->getNumber() << " ***\n";
+ dbgs() << "*** DEBUG scheduling " << Fn.getFunction()->getName()
+ << ":BB#" << MBB->getNumber() << " ***\n";
}
#endif
// Initialize register live-range state for scheduling in this block.
- Scheduler.StartBlock(MBB);
+ Scheduler.startBlock(MBB);
// Schedule each sequence of instructions not interrupted by a label
// or anything else that effectively needs to shut down scheduling.
@@ -262,8 +309,13 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
unsigned Count = MBB->size(), CurrentCount = Count;
for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) {
MachineInstr *MI = llvm::prior(I);
- if (TII->isSchedulingBoundary(MI, MBB, Fn)) {
- Scheduler.Run(MBB, I, Current, CurrentCount);
+ // Calls are not scheduling boundaries before register allocation, but
+ // post-ra we don't gain anything by scheduling across calls since we
+ // don't need to worry about register pressure.
+ if (MI->isCall() || TII->isSchedulingBoundary(MI, MBB, Fn)) {
+ Scheduler.enterRegion(MBB, I, Current, CurrentCount);
+ Scheduler.schedule();
+ Scheduler.exitRegion();
Scheduler.EmitSchedule();
Current = MI;
CurrentCount = Count - 1;
@@ -271,15 +323,19 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
}
I = MI;
--Count;
+ if (MI->isBundle())
+ Count -= MI->getBundleSize();
}
assert(Count == 0 && "Instruction count mismatch!");
assert((MBB->begin() == Current || CurrentCount != 0) &&
"Instruction count mismatch!");
- Scheduler.Run(MBB, MBB->begin(), Current, CurrentCount);
+ Scheduler.enterRegion(MBB, MBB->begin(), Current, CurrentCount);
+ Scheduler.schedule();
+ Scheduler.exitRegion();
Scheduler.EmitSchedule();
// Clean up register live-range state.
- Scheduler.FinishBlock();
+ Scheduler.finishBlock();
// Update register kills
Scheduler.FixupKills(MBB);
@@ -291,9 +347,9 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
/// StartBlock - Initialize register live-range state for scheduling in
/// this block.
///
-void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) {
+void SchedulePostRATDList::startBlock(MachineBasicBlock *BB) {
// Call the superclass.
- ScheduleDAGInstrs::StartBlock(BB);
+ ScheduleDAGInstrs::startBlock(BB);
// Reset the hazard recognizer and anti-dep breaker.
HazardRec->Reset();
@@ -303,14 +359,14 @@ void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) {
/// Schedule - Schedule the instruction range using list scheduling.
///
-void SchedulePostRATDList::Schedule() {
+void SchedulePostRATDList::schedule() {
// Build the scheduling graph.
- BuildSchedGraph(AA);
+ buildSchedGraph(AA);
if (AntiDepBreak != NULL) {
unsigned Broken =
- AntiDepBreak->BreakAntiDependencies(SUnits, Begin, InsertPos,
- InsertPosIndex, DbgValues);
+ AntiDepBreak->BreakAntiDependencies(SUnits, RegionBegin, RegionEnd,
+ EndIndex, DbgValues);
if (Broken != 0) {
// We made changes. Update the dependency graph.
@@ -319,11 +375,8 @@ void SchedulePostRATDList::Schedule() {
// the def's anti-dependence *and* output-dependence edges due to
// that register, and add new anti-dependence and output-dependence
// edges based on the next live range of the register.
- SUnits.clear();
- Sequence.clear();
- EntrySU = SUnit();
- ExitSU = SUnit();
- BuildSchedGraph(AA);
+ ScheduleDAG::clearDAG();
+ buildSchedGraph(AA);
NumFixedAnti += Broken;
}
@@ -343,38 +396,36 @@ void SchedulePostRATDList::Schedule() {
///
void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) {
if (AntiDepBreak != NULL)
- AntiDepBreak->Observe(MI, Count, InsertPosIndex);
+ AntiDepBreak->Observe(MI, Count, EndIndex);
}
/// FinishBlock - Clean up register live-range state.
///
-void SchedulePostRATDList::FinishBlock() {
+void SchedulePostRATDList::finishBlock() {
if (AntiDepBreak != NULL)
AntiDepBreak->FinishBlock();
// Call the superclass.
- ScheduleDAGInstrs::FinishBlock();
+ ScheduleDAGInstrs::finishBlock();
}
/// StartBlockForKills - Initialize register live-range state for updating kills
///
void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) {
- // Initialize the indices to indicate that no registers are live.
- for (unsigned i = 0; i < TRI->getNumRegs(); ++i)
- KillIndices[i] = ~0u;
+ // Start with no live registers.
+ LiveRegs.reset();
// Determine the live-out physregs for this block.
- if (!BB->empty() && BB->back().getDesc().isReturn()) {
+ if (!BB->empty() && BB->back().isReturn()) {
// In a return block, examine the function live-out regs.
for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
E = MRI.liveout_end(); I != E; ++I) {
unsigned Reg = *I;
- KillIndices[Reg] = BB->size();
+ LiveRegs.set(Reg);
// Repeat, for all subregs.
- for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
- *Subreg; ++Subreg) {
- KillIndices[*Subreg] = BB->size();
- }
+ for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg)
+ LiveRegs.set(*Subreg);
}
}
else {
@@ -384,12 +435,11 @@ void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) {
for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
E = (*SI)->livein_end(); I != E; ++I) {
unsigned Reg = *I;
- KillIndices[Reg] = BB->size();
+ LiveRegs.set(Reg);
// Repeat, for all subregs.
- for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
- *Subreg; ++Subreg) {
- KillIndices[*Subreg] = BB->size();
- }
+ for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg)
+ LiveRegs.set(*Subreg);
}
}
}
@@ -404,7 +454,7 @@ bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI,
}
// If MO itself is live, clear the kill flag...
- if (KillIndices[MO.getReg()] != ~0u) {
+ if (LiveRegs.test(MO.getReg())) {
MO.setIsKill(false);
return false;
}
@@ -414,9 +464,9 @@ bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI,
MO.setIsKill(false);
bool AllDead = true;
const unsigned SuperReg = MO.getReg();
- for (const unsigned *Subreg = TRI->getSubRegisters(SuperReg);
+ for (const uint16_t *Subreg = TRI->getSubRegisters(SuperReg);
*Subreg; ++Subreg) {
- if (KillIndices[*Subreg] != ~0u) {
+ if (LiveRegs.test(*Subreg)) {
MI->addOperand(MachineOperand::CreateReg(*Subreg,
true /*IsDef*/,
true /*IsImp*/,
@@ -437,7 +487,7 @@ bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI,
void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n');
- std::set<unsigned> killedRegs;
+ BitVector killedRegs(TRI->getNumRegs());
BitVector ReservedRegs = TRI->getReservedRegs(MF);
StartBlockForKills(MBB);
@@ -455,6 +505,8 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
// are completely defined.
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
+ if (MO.isRegMask())
+ LiveRegs.clearBitsNotInMask(MO.getRegMask());
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
if (Reg == 0) continue;
@@ -462,19 +514,18 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
// Ignore two-addr defs.
if (MI->isRegTiedToUseOperand(i)) continue;
- KillIndices[Reg] = ~0u;
+ LiveRegs.reset(Reg);
// Repeat for all subregs.
- for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
- *Subreg; ++Subreg) {
- KillIndices[*Subreg] = ~0u;
- }
+ for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg)
+ LiveRegs.reset(*Subreg);
}
// Examine all used registers and set/clear kill flag. When a
// register is used multiple times we only set the kill flag on
// the first use.
- killedRegs.clear();
+ killedRegs.reset();
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || !MO.isUse()) continue;
@@ -482,12 +533,12 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
bool kill = false;
- if (killedRegs.find(Reg) == killedRegs.end()) {
+ if (!killedRegs.test(Reg)) {
kill = true;
// A register is not killed if any subregs are live...
- for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
*Subreg; ++Subreg) {
- if (KillIndices[*Subreg] != ~0u) {
+ if (LiveRegs.test(*Subreg)) {
kill = false;
break;
}
@@ -496,7 +547,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
// If subreg is not live, then register is killed if it became
// live in this instruction
if (kill)
- kill = (KillIndices[Reg] == ~0u);
+ kill = !LiveRegs.test(Reg);
}
if (MO.isKill() != kill) {
@@ -506,7 +557,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
DEBUG(MI->dump());
}
- killedRegs.insert(Reg);
+ killedRegs.set(Reg);
}
// Mark any used register (that is not using undef) and subregs as
@@ -517,12 +568,11 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
unsigned Reg = MO.getReg();
if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
- KillIndices[Reg] = Count;
+ LiveRegs.set(Reg);
- for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
- *Subreg; ++Subreg) {
- KillIndices[*Subreg] = Count;
- }
+ for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg)
+ LiveRegs.set(*Subreg);
}
}
}
@@ -585,7 +635,7 @@ void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
ReleaseSuccessors(SU);
SU->isScheduled = true;
- AvailableQueue.ScheduledNode(SU);
+ AvailableQueue.scheduledNode(SU);
}
/// ListScheduleTopDown - The main loop of list scheduling for top-down
@@ -699,14 +749,46 @@ void SchedulePostRATDList::ListScheduleTopDown() {
}
#ifndef NDEBUG
- VerifySchedule(/*isBottomUp=*/false);
-#endif
+ unsigned ScheduledNodes = VerifyScheduledDAG(/*isBottomUp=*/false);
+ unsigned Noops = 0;
+ for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
+ if (!Sequence[i])
+ ++Noops;
+ assert(Sequence.size() - Noops == ScheduledNodes &&
+ "The number of nodes scheduled doesn't match the expected number!");
+#endif // NDEBUG
}
-//===----------------------------------------------------------------------===//
-// Public Constructor Functions
-//===----------------------------------------------------------------------===//
+// EmitSchedule - Emit the machine code in scheduled order.
+void SchedulePostRATDList::EmitSchedule() {
+ RegionBegin = RegionEnd;
+
+ // If first instruction was a DBG_VALUE then put it back.
+ if (FirstDbgValue)
+ BB->splice(RegionEnd, BB, FirstDbgValue);
+
+ // Then re-insert them according to the given schedule.
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ if (SUnit *SU = Sequence[i])
+ BB->splice(RegionEnd, BB, SU->getInstr());
+ else
+ // Null SUnit* is a noop.
+ TII->insertNoop(*BB, RegionEnd);
+
+ // Update the Begin iterator, as the first instruction in the block
+ // may have been scheduled later.
+ if (i == 0)
+ RegionBegin = prior(RegionEnd);
+ }
-FunctionPass *llvm::createPostRAScheduler(CodeGenOpt::Level OptLevel) {
- return new PostRAScheduler(OptLevel);
+ // Reinsert any remaining debug_values.
+ for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
+ DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
+ std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
+ MachineInstr *DbgValue = P.first;
+ MachineBasicBlock::iterator OrigPrivMI = P.second;
+ BB->splice(++OrigPrivMI, BB, DbgValue);
+ }
+ DbgValues.clear();
+ FirstDbgValue = NULL;
}
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index b1d8c9760225..1ad3479afb4c 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -26,6 +26,8 @@
using namespace llvm;
char ProcessImplicitDefs::ID = 0;
+char &llvm::ProcessImplicitDefsID = ProcessImplicitDefs::ID;
+
INITIALIZE_PASS_BEGIN(ProcessImplicitDefs, "processimpdefs",
"Process Implicit Definitions", false, false)
INITIALIZE_PASS_DEPENDENCY(LiveVariables)
@@ -36,7 +38,6 @@ void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addPreserved<AliasAnalysis>();
AU.addPreserved<LiveVariables>();
- AU.addRequired<LiveVariables>();
AU.addPreservedID(MachineLoopInfoID);
AU.addPreservedID(MachineDominatorsID);
AU.addPreservedID(TwoAddressInstructionPassID);
@@ -50,10 +51,10 @@ ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI,
SmallSet<unsigned, 8> &ImpDefRegs) {
switch(OpIdx) {
case 1:
- return MI->isCopy() && (MI->getOperand(0).getSubReg() == 0 ||
+ return MI->isCopy() && (!MI->getOperand(0).readsReg() ||
ImpDefRegs.count(MI->getOperand(0).getReg()));
case 2:
- return MI->isSubregToReg() && (MI->getOperand(0).getSubReg() == 0 ||
+ return MI->isSubregToReg() && (!MI->getOperand(0).readsReg() ||
ImpDefRegs.count(MI->getOperand(0).getReg()));
default: return false;
}
@@ -66,7 +67,7 @@ static bool isUndefCopy(MachineInstr *MI, unsigned Reg,
MachineOperand &MO1 = MI->getOperand(1);
if (MO1.getReg() != Reg)
return false;
- if (!MO0.getSubReg() || ImpDefRegs.count(MO0.getReg()))
+ if (!MO0.readsReg() || ImpDefRegs.count(MO0.getReg()))
return true;
return false;
}
@@ -87,7 +88,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
TII = fn.getTarget().getInstrInfo();
TRI = fn.getTarget().getRegisterInfo();
MRI = &fn.getRegInfo();
- LV = &getAnalysis<LiveVariables>();
+ LV = getAnalysisIfAvailable<LiveVariables>();
SmallSet<unsigned, 8> ImpDefRegs;
SmallVector<MachineInstr*, 8> ImpDefMIs;
@@ -105,23 +106,24 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
MachineInstr *MI = &*I;
++I;
if (MI->isImplicitDef()) {
- if (MI->getOperand(0).getSubReg())
+ ImpDefMIs.push_back(MI);
+ // Is this a sub-register read-modify-write?
+ if (MI->getOperand(0).readsReg())
continue;
unsigned Reg = MI->getOperand(0).getReg();
ImpDefRegs.insert(Reg);
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS)
+ for (const uint16_t *SS = TRI->getSubRegisters(Reg); *SS; ++SS)
ImpDefRegs.insert(*SS);
}
- ImpDefMIs.push_back(MI);
continue;
}
// Eliminate %reg1032:sub<def> = COPY undef.
- if (MI->isCopy() && MI->getOperand(0).getSubReg()) {
+ if (MI->isCopy() && MI->getOperand(0).readsReg()) {
MachineOperand &MO = MI->getOperand(1);
if (MO.isUndef() || ImpDefRegs.count(MO.getReg())) {
- if (MO.isKill()) {
+ if (LV && MO.isKill()) {
LiveVariables::VarInfo& vi = LV->getVarInfo(MO.getReg());
vi.removeKill(MI);
}
@@ -140,7 +142,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
bool ChangedToImpDef = false;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand& MO = MI->getOperand(i);
- if (!MO.isReg() || (MO.isDef() && !MO.getSubReg()) || MO.isUndef())
+ if (!MO.isReg() || !MO.readsReg())
continue;
unsigned Reg = MO.getReg();
if (!Reg)
@@ -155,8 +157,10 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
MI->RemoveOperand(j);
if (isKill) {
ImpDefRegs.erase(Reg);
- LiveVariables::VarInfo& vi = LV->getVarInfo(Reg);
- vi.removeKill(MI);
+ if (LV) {
+ LiveVariables::VarInfo& vi = LV->getVarInfo(Reg);
+ vi.removeKill(MI);
+ }
}
ChangedToImpDef = true;
Changed = true;
@@ -172,10 +176,10 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
continue;
}
if (MO.isKill() || MI->isRegTiedToDefOperand(i)) {
- // Make sure other uses of
+ // Make sure other reads of Reg are also marked <undef>.
for (unsigned j = i+1; j != e; ++j) {
MachineOperand &MOJ = MI->getOperand(j);
- if (MOJ.isReg() && MOJ.isUse() && MOJ.getReg() == Reg)
+ if (MOJ.isReg() && MOJ.getReg() == Reg && MOJ.readsReg())
MOJ.setIsUndef();
}
ImpDefRegs.erase(Reg);
@@ -265,7 +269,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
}
// Update LiveVariables varinfo if the instruction is a kill.
- if (isKill) {
+ if (LV && isKill) {
LiveVariables::VarInfo& vi = LV->getVarInfo(Reg);
vi.removeKill(RMI);
}
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 32c932552bed..458915ea5d93 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -45,24 +45,22 @@
using namespace llvm;
char PEI::ID = 0;
+char &llvm::PrologEpilogCodeInserterID = PEI::ID;
INITIALIZE_PASS_BEGIN(PEI, "prologepilog",
"Prologue/Epilogue Insertion", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_END(PEI, "prologepilog",
- "Prologue/Epilogue Insertion", false, false)
+ "Prologue/Epilogue Insertion & Frame Finalization",
+ false, false)
STATISTIC(NumVirtualFrameRegs, "Number of virtual frame regs encountered");
STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
STATISTIC(NumBytesStackSpace,
"Number of bytes used for stack in all functions");
-/// createPrologEpilogCodeInserter - This function returns a pass that inserts
-/// prolog and epilog code, and eliminates abstract frame references.
-///
-FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); }
-
/// runOnMachineFunction - Insert prolog/epilog code and replace abstract
/// frame indexes with appropriate references.
///
@@ -71,6 +69,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
+ assert(!Fn.getRegInfo().getNumVirtRegs() && "Regalloc must assign all vregs");
+
RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL;
FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
@@ -125,6 +125,9 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging)
scavengeFrameVirtualRegs(Fn);
+ // Clear any vregs created by virtual scavenging.
+ Fn.getRegInfo().clearVirtRegs();
+
delete RS;
clearAllSets();
return true;
@@ -207,7 +210,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
MachineFrameInfo *MFI = Fn.getFrameInfo();
// Get the callee saved register list...
- const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn);
+ const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&Fn);
// These are used to keep track the callee-save area. Initialize them.
MinCSFrameIndex = INT_MAX;
@@ -224,17 +227,9 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
std::vector<CalleeSavedInfo> CSI;
for (unsigned i = 0; CSRegs[i]; ++i) {
unsigned Reg = CSRegs[i];
- if (Fn.getRegInfo().isPhysRegUsed(Reg)) {
+ if (Fn.getRegInfo().isPhysRegOrOverlapUsed(Reg)) {
// If the reg is modified, save it!
CSI.push_back(CalleeSavedInfo(Reg));
- } else {
- for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg);
- *AliasSet; ++AliasSet) { // Check alias registers too.
- if (Fn.getRegInfo().isPhysRegUsed(*AliasSet)) {
- CSI.push_back(CalleeSavedInfo(Reg));
- break;
- }
- }
}
}
@@ -332,7 +327,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
// Skip over all terminator instructions, which are part of the return
// sequence.
MachineBasicBlock::iterator I2 = I;
- while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator())
+ while (I2 != MBB->begin() && (--I2)->isTerminator())
I = I2;
bool AtStart = I == MBB->begin();
@@ -426,11 +421,11 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
// Skip over all terminator instructions, which are part of the
// return sequence.
- if (! I->getDesc().isTerminator()) {
+ if (! I->isTerminator()) {
++I;
} else {
MachineBasicBlock::iterator I2 = I;
- while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator())
+ while (I2 != MBB->begin() && (--I2)->isTerminator())
I = I2;
}
}
@@ -698,7 +693,7 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
// Add epilogue to restore the callee-save registers in each exiting block
for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
// If last instruction is a return instruction, add an epilogue
- if (!I->empty() && I->back().getDesc().isReturn())
+ if (!I->empty() && I->back().isReturn())
TFI.emitEpilogue(Fn, *I);
}
@@ -706,7 +701,7 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
// we've been asked for it. This, when linked with a runtime with support
// for segmented stacks (libgcc is one), will result in allocating stack
// space in small chunks instead of one large contiguous block.
- if (EnableSegmentedStacks)
+ if (Fn.getTarget().Options.EnableSegmentedStacks)
TFI.adjustForSegmentedStacks(Fn);
}
@@ -813,6 +808,10 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
/// scavengeFrameVirtualRegs - Replace all frame index virtual registers
/// with physical registers. Use the register scavenger to find an
/// appropriate register to use.
+///
+/// FIXME: Iterating over the instruction stream is unnecessary. We can simply
+/// iterate over the vreg use list, which at this point only contains machine
+/// operands for which eliminateFrameIndex need a new scratch reg.
void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
// Run through the instructions and find any virtual registers.
for (MachineFunction::iterator BB = Fn.begin(),
diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h
index e2391591ad06..0d140a9bb481 100644
--- a/lib/CodeGen/PrologEpilogInserter.h
+++ b/lib/CodeGen/PrologEpilogInserter.h
@@ -40,10 +40,6 @@ namespace llvm {
initializePEIPass(*PassRegistry::getPassRegistry());
}
- const char *getPassName() const {
- return "Prolog/Epilog Insertion & Frame Finalization";
- }
-
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
/// runOnMachineFunction - Insert prolog/epilog code and replace abstract
diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp
index 73b66d868f3d..49599b3ab980 100644
--- a/lib/CodeGen/PseudoSourceValue.cpp
+++ b/lib/CodeGen/PseudoSourceValue.cpp
@@ -87,7 +87,6 @@ bool PseudoSourceValue::isConstant(const MachineFrameInfo *) const {
this == getJumpTable())
return true;
llvm_unreachable("Unknown PseudoSourceValue!");
- return false;
}
bool PseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const {
@@ -97,7 +96,6 @@ bool PseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const {
this == getJumpTable())
return false;
llvm_unreachable("Unknown PseudoSourceValue!");
- return true;
}
bool PseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const {
diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp
new file mode 100644
index 000000000000..b00eceb17f11
--- /dev/null
+++ b/lib/CodeGen/RegAllocBase.cpp
@@ -0,0 +1,280 @@
+//===-- RegAllocBase.cpp - Register Allocator Base Class ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RegAllocBase class which provides comon functionality
+// for LiveIntervalUnion-based register allocators.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "RegAllocBase.h"
+#include "Spiller.h"
+#include "VirtRegMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#ifndef NDEBUG
+#include "llvm/ADT/SparseBitVector.h"
+#endif
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Timer.h"
+
+using namespace llvm;
+
+STATISTIC(NumAssigned , "Number of registers assigned");
+STATISTIC(NumUnassigned , "Number of registers unassigned");
+STATISTIC(NumNewQueued , "Number of new live ranges queued");
+
+// Temporary verification option until we can put verification inside
+// MachineVerifier.
+static cl::opt<bool, true>
+VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled),
+ cl::desc("Verify during register allocation"));
+
+const char *RegAllocBase::TimerGroupName = "Register Allocation";
+bool RegAllocBase::VerifyEnabled = false;
+
+#ifndef NDEBUG
+// Verify each LiveIntervalUnion.
+void RegAllocBase::verify() {
+ LiveVirtRegBitSet VisitedVRegs;
+ OwningArrayPtr<LiveVirtRegBitSet>
+ unionVRegs(new LiveVirtRegBitSet[PhysReg2LiveUnion.numRegs()]);
+
+ // Verify disjoint unions.
+ for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
+ DEBUG(PhysReg2LiveUnion[PhysReg].print(dbgs(), TRI));
+ LiveVirtRegBitSet &VRegs = unionVRegs[PhysReg];
+ PhysReg2LiveUnion[PhysReg].verify(VRegs);
+ // Union + intersection test could be done efficiently in one pass, but
+ // don't add a method to SparseBitVector unless we really need it.
+ assert(!VisitedVRegs.intersects(VRegs) && "vreg in multiple unions");
+ VisitedVRegs |= VRegs;
+ }
+
+ // Verify vreg coverage.
+ for (LiveIntervals::iterator liItr = LIS->begin(), liEnd = LIS->end();
+ liItr != liEnd; ++liItr) {
+ unsigned reg = liItr->first;
+ if (TargetRegisterInfo::isPhysicalRegister(reg)) continue;
+ if (!VRM->hasPhys(reg)) continue; // spilled?
+ unsigned PhysReg = VRM->getPhys(reg);
+ if (!unionVRegs[PhysReg].test(reg)) {
+ dbgs() << "LiveVirtReg " << reg << " not in union " <<
+ TRI->getName(PhysReg) << "\n";
+ llvm_unreachable("unallocated live vreg");
+ }
+ }
+ // FIXME: I'm not sure how to verify spilled intervals.
+}
+#endif //!NDEBUG
+
+//===----------------------------------------------------------------------===//
+// RegAllocBase Implementation
+//===----------------------------------------------------------------------===//
+
+// Instantiate a LiveIntervalUnion for each physical register.
+void RegAllocBase::LiveUnionArray::init(LiveIntervalUnion::Allocator &allocator,
+ unsigned NRegs) {
+ NumRegs = NRegs;
+ Array =
+ static_cast<LiveIntervalUnion*>(malloc(sizeof(LiveIntervalUnion)*NRegs));
+ for (unsigned r = 0; r != NRegs; ++r)
+ new(Array + r) LiveIntervalUnion(r, allocator);
+}
+
+void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis) {
+ NamedRegionTimer T("Initialize", TimerGroupName, TimePassesIsEnabled);
+ TRI = &vrm.getTargetRegInfo();
+ MRI = &vrm.getRegInfo();
+ VRM = &vrm;
+ LIS = &lis;
+ MRI->freezeReservedRegs(vrm.getMachineFunction());
+ RegClassInfo.runOnMachineFunction(vrm.getMachineFunction());
+
+ const unsigned NumRegs = TRI->getNumRegs();
+ if (NumRegs != PhysReg2LiveUnion.numRegs()) {
+ PhysReg2LiveUnion.init(UnionAllocator, NumRegs);
+ // Cache an interferece query for each physical reg
+ Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]);
+ }
+}
+
+void RegAllocBase::LiveUnionArray::clear() {
+ if (!Array)
+ return;
+ for (unsigned r = 0; r != NumRegs; ++r)
+ Array[r].~LiveIntervalUnion();
+ free(Array);
+ NumRegs = 0;
+ Array = 0;
+}
+
+void RegAllocBase::releaseMemory() {
+ for (unsigned r = 0, e = PhysReg2LiveUnion.numRegs(); r != e; ++r)
+ PhysReg2LiveUnion[r].clear();
+}
+
+// Visit all the live registers. If they are already assigned to a physical
+// register, unify them with the corresponding LiveIntervalUnion, otherwise push
+// them on the priority queue for later assignment.
+void RegAllocBase::seedLiveRegs() {
+ NamedRegionTimer T("Seed Live Regs", TimerGroupName, TimePassesIsEnabled);
+ for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); I != E; ++I) {
+ unsigned RegNum = I->first;
+ LiveInterval &VirtReg = *I->second;
+ if (TargetRegisterInfo::isPhysicalRegister(RegNum))
+ PhysReg2LiveUnion[RegNum].unify(VirtReg);
+ else
+ enqueue(&VirtReg);
+ }
+}
+
+void RegAllocBase::assign(LiveInterval &VirtReg, unsigned PhysReg) {
+ DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI)
+ << " to " << PrintReg(PhysReg, TRI) << '\n');
+ assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
+ VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
+ MRI->setPhysRegUsed(PhysReg);
+ PhysReg2LiveUnion[PhysReg].unify(VirtReg);
+ ++NumAssigned;
+}
+
+void RegAllocBase::unassign(LiveInterval &VirtReg, unsigned PhysReg) {
+ DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI)
+ << " from " << PrintReg(PhysReg, TRI) << '\n');
+ assert(VRM->getPhys(VirtReg.reg) == PhysReg && "Inconsistent unassign");
+ PhysReg2LiveUnion[PhysReg].extract(VirtReg);
+ VRM->clearVirt(VirtReg.reg);
+ ++NumUnassigned;
+}
+
+// Top-level driver to manage the queue of unassigned VirtRegs and call the
+// selectOrSplit implementation.
+void RegAllocBase::allocatePhysRegs() {
+ seedLiveRegs();
+
+ // Continue assigning vregs one at a time to available physical registers.
+ while (LiveInterval *VirtReg = dequeue()) {
+ assert(!VRM->hasPhys(VirtReg->reg) && "Register already assigned");
+
+ // Unused registers can appear when the spiller coalesces snippets.
+ if (MRI->reg_nodbg_empty(VirtReg->reg)) {
+ DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n');
+ LIS->removeInterval(VirtReg->reg);
+ continue;
+ }
+
+ // Invalidate all interference queries, live ranges could have changed.
+ invalidateVirtRegs();
+
+ // selectOrSplit requests the allocator to return an available physical
+ // register if possible and populate a list of new live intervals that
+ // result from splitting.
+ DEBUG(dbgs() << "\nselectOrSplit "
+ << MRI->getRegClass(VirtReg->reg)->getName()
+ << ':' << *VirtReg << '\n');
+ typedef SmallVector<LiveInterval*, 4> VirtRegVec;
+ VirtRegVec SplitVRegs;
+ unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs);
+
+ if (AvailablePhysReg == ~0u) {
+ // selectOrSplit failed to find a register!
+ const char *Msg = "ran out of registers during register allocation";
+ // Probably caused by an inline asm.
+ MachineInstr *MI;
+ for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(VirtReg->reg);
+ (MI = I.skipInstruction());)
+ if (MI->isInlineAsm())
+ break;
+ if (MI)
+ MI->emitError(Msg);
+ else
+ report_fatal_error(Msg);
+ // Keep going after reporting the error.
+ VRM->assignVirt2Phys(VirtReg->reg,
+ RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front());
+ continue;
+ }
+
+ if (AvailablePhysReg)
+ assign(*VirtReg, AvailablePhysReg);
+
+ for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end();
+ I != E; ++I) {
+ LiveInterval *SplitVirtReg = *I;
+ assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned");
+ if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) {
+ DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n');
+ LIS->removeInterval(SplitVirtReg->reg);
+ continue;
+ }
+ DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n");
+ assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) &&
+ "expect split value in virtual register");
+ enqueue(SplitVirtReg);
+ ++NumNewQueued;
+ }
+ }
+}
+
+// Check if this live virtual register interferes with a physical register. If
+// not, then check for interference on each register that aliases with the
+// physical register. Return the interfering register.
+unsigned RegAllocBase::checkPhysRegInterference(LiveInterval &VirtReg,
+ unsigned PhysReg) {
+ for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI)
+ if (query(VirtReg, *AliasI).checkInterference())
+ return *AliasI;
+ return 0;
+}
+
+// Add newly allocated physical registers to the MBB live in sets.
+void RegAllocBase::addMBBLiveIns(MachineFunction *MF) {
+ NamedRegionTimer T("MBB Live Ins", TimerGroupName, TimePassesIsEnabled);
+ SlotIndexes *Indexes = LIS->getSlotIndexes();
+ if (MF->size() <= 1)
+ return;
+
+ LiveIntervalUnion::SegmentIter SI;
+ for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
+ LiveIntervalUnion &LiveUnion = PhysReg2LiveUnion[PhysReg];
+ if (LiveUnion.empty())
+ continue;
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " live-in:");
+ MachineFunction::iterator MBB = llvm::next(MF->begin());
+ MachineFunction::iterator MFE = MF->end();
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = Indexes->getMBBRange(MBB);
+ SI.setMap(LiveUnion.getMap());
+ SI.find(Start);
+ while (SI.valid()) {
+ if (SI.start() <= Start) {
+ if (!MBB->isLiveIn(PhysReg))
+ MBB->addLiveIn(PhysReg);
+ DEBUG(dbgs() << "\tBB#" << MBB->getNumber() << ':'
+ << PrintReg(SI.value()->reg, TRI));
+ } else if (SI.start() > Stop)
+ MBB = Indexes->getMBBFromIndex(SI.start().getPrevIndex());
+ if (++MBB == MFE)
+ break;
+ tie(Start, Stop) = Indexes->getMBBRange(MBB);
+ SI.advanceTo(Start);
+ }
+ DEBUG(dbgs() << '\n');
+ }
+}
+
diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h
index 031642117efc..072fe2bdb656 100644
--- a/lib/CodeGen/RegAllocBase.h
+++ b/lib/CodeGen/RegAllocBase.h
@@ -49,11 +49,6 @@ class VirtRegMap;
class LiveIntervals;
class Spiller;
-// Forward declare a priority queue of live virtual registers. If an
-// implementation needs to prioritize by anything other than spill weight, then
-// this will become an abstract base class with virtual calls to push/get.
-class LiveVirtRegQueue;
-
/// RegAllocBase provides the register allocation driver and interface that can
/// be extended to add interesting heuristics.
///
@@ -67,7 +62,6 @@ class RegAllocBase {
// registers may have changed.
unsigned UserTag;
-protected:
// Array of LiveIntervalUnions indexed by physical register.
class LiveUnionArray {
unsigned NumRegs;
@@ -88,17 +82,19 @@ protected:
}
};
- const TargetRegisterInfo *TRI;
- MachineRegisterInfo *MRI;
- VirtRegMap *VRM;
- LiveIntervals *LIS;
- RegisterClassInfo RegClassInfo;
LiveUnionArray PhysReg2LiveUnion;
// Current queries, one per physreg. They must be reinitialized each time we
// query on a new live virtual register.
OwningArrayPtr<LiveIntervalUnion::Query> Queries;
+protected:
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+ VirtRegMap *VRM;
+ LiveIntervals *LIS;
+ RegisterClassInfo RegClassInfo;
+
RegAllocBase(): UserTag(0), TRI(0), MRI(0), VRM(0), LIS(0) {}
virtual ~RegAllocBase() {}
@@ -115,16 +111,17 @@ protected:
return Queries[PhysReg];
}
+ // Get direct access to the underlying LiveIntervalUnion for PhysReg.
+ LiveIntervalUnion &getLiveUnion(unsigned PhysReg) {
+ return PhysReg2LiveUnion[PhysReg];
+ }
+
// Invalidate all cached information about virtual registers - live ranges may
// have changed.
void invalidateVirtRegs() { ++UserTag; }
// The top-level driver. The output is a VirtRegMap that us updated with
// physical register assignments.
- //
- // If an implementation wants to override the LiveInterval comparator, we
- // should modify this interface to allow passing in an instance derived from
- // LiveVirtRegQueue.
void allocatePhysRegs();
// Get a temporary reference to a Spiller instance.
@@ -160,12 +157,6 @@ protected:
/// allocation is making progress.
void unassign(LiveInterval &VirtReg, unsigned PhysReg);
- // Helper for spilling all live virtual registers currently unified under preg
- // that interfere with the most recently queried lvr. Return true if spilling
- // was successful, and append any new spilled/split intervals to splitLVRs.
- bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
- SmallVectorImpl<LiveInterval*> &SplitVRegs);
-
/// addMBBLiveIns - Add physreg liveins to basic blocks.
void addMBBLiveIns(MachineFunction *);
@@ -183,9 +174,6 @@ public:
private:
void seedLiveRegs();
-
- void spillReg(LiveInterval &VirtReg, unsigned PhysReg,
- SmallVectorImpl<LiveInterval*> &SplitVRegs);
};
} // end namespace llvm
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index 5496d69fd3df..77ee3148f31a 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -1,4 +1,4 @@
-//===-- RegAllocBasic.cpp - basic register allocator ----------------------===//
+//===-- RegAllocBasic.cpp - Basic Register Allocator ----------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,18 +15,15 @@
#define DEBUG_TYPE "regalloc"
#include "RegAllocBase.h"
#include "LiveDebugVariables.h"
-#include "LiveIntervalUnion.h"
-#include "LiveRangeEdit.h"
#include "RenderMachineFunction.h"
#include "Spiller.h"
#include "VirtRegMap.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Function.h"
#include "llvm/PassAnalysisSupport.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -37,35 +34,17 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#ifndef NDEBUG
-#include "llvm/ADT/SparseBitVector.h"
-#endif
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Timer.h"
#include <cstdlib>
#include <queue>
using namespace llvm;
-STATISTIC(NumAssigned , "Number of registers assigned");
-STATISTIC(NumUnassigned , "Number of registers unassigned");
-STATISTIC(NumNewQueued , "Number of new live ranges queued");
-
static RegisterRegAlloc basicRegAlloc("basic", "basic register allocator",
createBasicRegisterAllocator);
-// Temporary verification option until we can put verification inside
-// MachineVerifier.
-static cl::opt<bool, true>
-VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled),
- cl::desc("Verify during register allocation"));
-
-const char *RegAllocBase::TimerGroupName = "Register Allocation";
-bool RegAllocBase::VerifyEnabled = false;
-
namespace {
struct CompSpillWeight {
bool operator()(LiveInterval *A, LiveInterval *B) const {
@@ -93,6 +72,11 @@ class RABasic : public MachineFunctionPass, public RegAllocBase
std::auto_ptr<Spiller> SpillerInstance;
std::priority_queue<LiveInterval*, std::vector<LiveInterval*>,
CompSpillWeight> Queue;
+
+ // Scratch space. Allocated here to avoid repeated malloc calls in
+ // selectOrSplit().
+ BitVector UsableRegs;
+
public:
RABasic();
@@ -128,6 +112,15 @@ public:
/// Perform register allocation.
virtual bool runOnMachineFunction(MachineFunction &mf);
+ // Helper for spilling all live virtual registers currently unified under preg
+ // that interfere with the most recently queried lvr. Return true if spilling
+ // was successful, and append any new spilled/split intervals to splitLVRs.
+ bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs);
+
+ void spillReg(LiveInterval &VirtReg, unsigned PhysReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs);
+
static char ID;
};
@@ -139,8 +132,8 @@ RABasic::RABasic(): MachineFunctionPass(ID) {
initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
- initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
+ initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
initializeLiveStacksPass(*PassRegistry::getPassRegistry());
initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
@@ -157,9 +150,6 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<SlotIndexes>();
AU.addRequired<LiveDebugVariables>();
AU.addPreserved<LiveDebugVariables>();
- if (StrongPHIElim)
- AU.addRequiredID(StrongPHIEliminationID);
- AU.addRequiredTransitiveID(RegisterCoalescerPassID);
AU.addRequired<CalculateSpillWeights>();
AU.addRequired<LiveStacks>();
AU.addPreserved<LiveStacks>();
@@ -178,204 +168,10 @@ void RABasic::releaseMemory() {
RegAllocBase::releaseMemory();
}
-#ifndef NDEBUG
-// Verify each LiveIntervalUnion.
-void RegAllocBase::verify() {
- LiveVirtRegBitSet VisitedVRegs;
- OwningArrayPtr<LiveVirtRegBitSet>
- unionVRegs(new LiveVirtRegBitSet[PhysReg2LiveUnion.numRegs()]);
-
- // Verify disjoint unions.
- for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
- DEBUG(PhysReg2LiveUnion[PhysReg].print(dbgs(), TRI));
- LiveVirtRegBitSet &VRegs = unionVRegs[PhysReg];
- PhysReg2LiveUnion[PhysReg].verify(VRegs);
- // Union + intersection test could be done efficiently in one pass, but
- // don't add a method to SparseBitVector unless we really need it.
- assert(!VisitedVRegs.intersects(VRegs) && "vreg in multiple unions");
- VisitedVRegs |= VRegs;
- }
-
- // Verify vreg coverage.
- for (LiveIntervals::iterator liItr = LIS->begin(), liEnd = LIS->end();
- liItr != liEnd; ++liItr) {
- unsigned reg = liItr->first;
- if (TargetRegisterInfo::isPhysicalRegister(reg)) continue;
- if (!VRM->hasPhys(reg)) continue; // spilled?
- unsigned PhysReg = VRM->getPhys(reg);
- if (!unionVRegs[PhysReg].test(reg)) {
- dbgs() << "LiveVirtReg " << reg << " not in union " <<
- TRI->getName(PhysReg) << "\n";
- llvm_unreachable("unallocated live vreg");
- }
- }
- // FIXME: I'm not sure how to verify spilled intervals.
-}
-#endif //!NDEBUG
-
-//===----------------------------------------------------------------------===//
-// RegAllocBase Implementation
-//===----------------------------------------------------------------------===//
-
-// Instantiate a LiveIntervalUnion for each physical register.
-void RegAllocBase::LiveUnionArray::init(LiveIntervalUnion::Allocator &allocator,
- unsigned NRegs) {
- NumRegs = NRegs;
- Array =
- static_cast<LiveIntervalUnion*>(malloc(sizeof(LiveIntervalUnion)*NRegs));
- for (unsigned r = 0; r != NRegs; ++r)
- new(Array + r) LiveIntervalUnion(r, allocator);
-}
-
-void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis) {
- NamedRegionTimer T("Initialize", TimerGroupName, TimePassesIsEnabled);
- TRI = &vrm.getTargetRegInfo();
- MRI = &vrm.getRegInfo();
- VRM = &vrm;
- LIS = &lis;
- RegClassInfo.runOnMachineFunction(vrm.getMachineFunction());
-
- const unsigned NumRegs = TRI->getNumRegs();
- if (NumRegs != PhysReg2LiveUnion.numRegs()) {
- PhysReg2LiveUnion.init(UnionAllocator, NumRegs);
- // Cache an interferece query for each physical reg
- Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]);
- }
-}
-
-void RegAllocBase::LiveUnionArray::clear() {
- if (!Array)
- return;
- for (unsigned r = 0; r != NumRegs; ++r)
- Array[r].~LiveIntervalUnion();
- free(Array);
- NumRegs = 0;
- Array = 0;
-}
-
-void RegAllocBase::releaseMemory() {
- for (unsigned r = 0, e = PhysReg2LiveUnion.numRegs(); r != e; ++r)
- PhysReg2LiveUnion[r].clear();
-}
-
-// Visit all the live registers. If they are already assigned to a physical
-// register, unify them with the corresponding LiveIntervalUnion, otherwise push
-// them on the priority queue for later assignment.
-void RegAllocBase::seedLiveRegs() {
- NamedRegionTimer T("Seed Live Regs", TimerGroupName, TimePassesIsEnabled);
- for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); I != E; ++I) {
- unsigned RegNum = I->first;
- LiveInterval &VirtReg = *I->second;
- if (TargetRegisterInfo::isPhysicalRegister(RegNum))
- PhysReg2LiveUnion[RegNum].unify(VirtReg);
- else
- enqueue(&VirtReg);
- }
-}
-
-void RegAllocBase::assign(LiveInterval &VirtReg, unsigned PhysReg) {
- DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI)
- << " to " << PrintReg(PhysReg, TRI) << '\n');
- assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
- VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
- MRI->setPhysRegUsed(PhysReg);
- PhysReg2LiveUnion[PhysReg].unify(VirtReg);
- ++NumAssigned;
-}
-
-void RegAllocBase::unassign(LiveInterval &VirtReg, unsigned PhysReg) {
- DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI)
- << " from " << PrintReg(PhysReg, TRI) << '\n');
- assert(VRM->getPhys(VirtReg.reg) == PhysReg && "Inconsistent unassign");
- PhysReg2LiveUnion[PhysReg].extract(VirtReg);
- VRM->clearVirt(VirtReg.reg);
- ++NumUnassigned;
-}
-
-// Top-level driver to manage the queue of unassigned VirtRegs and call the
-// selectOrSplit implementation.
-void RegAllocBase::allocatePhysRegs() {
- seedLiveRegs();
-
- // Continue assigning vregs one at a time to available physical registers.
- while (LiveInterval *VirtReg = dequeue()) {
- assert(!VRM->hasPhys(VirtReg->reg) && "Register already assigned");
-
- // Unused registers can appear when the spiller coalesces snippets.
- if (MRI->reg_nodbg_empty(VirtReg->reg)) {
- DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n');
- LIS->removeInterval(VirtReg->reg);
- continue;
- }
-
- // Invalidate all interference queries, live ranges could have changed.
- invalidateVirtRegs();
-
- // selectOrSplit requests the allocator to return an available physical
- // register if possible and populate a list of new live intervals that
- // result from splitting.
- DEBUG(dbgs() << "\nselectOrSplit "
- << MRI->getRegClass(VirtReg->reg)->getName()
- << ':' << *VirtReg << '\n');
- typedef SmallVector<LiveInterval*, 4> VirtRegVec;
- VirtRegVec SplitVRegs;
- unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs);
-
- if (AvailablePhysReg == ~0u) {
- // selectOrSplit failed to find a register!
- const char *Msg = "ran out of registers during register allocation";
- // Probably caused by an inline asm.
- MachineInstr *MI;
- for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(VirtReg->reg);
- (MI = I.skipInstruction());)
- if (MI->isInlineAsm())
- break;
- if (MI)
- MI->emitError(Msg);
- else
- report_fatal_error(Msg);
- // Keep going after reporting the error.
- VRM->assignVirt2Phys(VirtReg->reg,
- RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front());
- continue;
- }
-
- if (AvailablePhysReg)
- assign(*VirtReg, AvailablePhysReg);
-
- for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end();
- I != E; ++I) {
- LiveInterval *SplitVirtReg = *I;
- assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned");
- if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) {
- DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n');
- LIS->removeInterval(SplitVirtReg->reg);
- continue;
- }
- DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n");
- assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) &&
- "expect split value in virtual register");
- enqueue(SplitVirtReg);
- ++NumNewQueued;
- }
- }
-}
-
-// Check if this live virtual register interferes with a physical register. If
-// not, then check for interference on each register that aliases with the
-// physical register. Return the interfering register.
-unsigned RegAllocBase::checkPhysRegInterference(LiveInterval &VirtReg,
- unsigned PhysReg) {
- for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI)
- if (query(VirtReg, *AliasI).checkInterference())
- return *AliasI;
- return 0;
-}
-
-// Helper for spillInteferences() that spills all interfering vregs currently
+// Helper for spillInterferences() that spills all interfering vregs currently
// assigned to this physical register.
-void RegAllocBase::spillReg(LiveInterval& VirtReg, unsigned PhysReg,
- SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+void RABasic::spillReg(LiveInterval& VirtReg, unsigned PhysReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs) {
LiveIntervalUnion::Query &Q = query(VirtReg, PhysReg);
assert(Q.seenAllInterferences() && "need collectInterferences()");
const SmallVectorImpl<LiveInterval*> &PendingSpills = Q.interferingVRegs();
@@ -391,7 +187,7 @@ void RegAllocBase::spillReg(LiveInterval& VirtReg, unsigned PhysReg,
unassign(SpilledVReg, PhysReg);
// Spill the extracted interval.
- LiveRangeEdit LRE(SpilledVReg, SplitVRegs, 0, &PendingSpills);
+ LiveRangeEdit LRE(SpilledVReg, SplitVRegs, *MF, *LIS, VRM);
spiller().spill(LRE);
}
// After extracting segments, the query's results are invalid. But keep the
@@ -402,14 +198,13 @@ void RegAllocBase::spillReg(LiveInterval& VirtReg, unsigned PhysReg,
// Spill or split all live virtual registers currently unified under PhysReg
// that interfere with VirtReg. The newly spilled or split live intervals are
// returned by appending them to SplitVRegs.
-bool
-RegAllocBase::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
+bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
SmallVectorImpl<LiveInterval*> &SplitVRegs) {
// Record each interference and determine if all are spillable before mutating
// either the union or live intervals.
unsigned NumInterferences = 0;
// Collect interferences assigned to any alias of the physical register.
- for (const unsigned *asI = TRI->getOverlaps(PhysReg); *asI; ++asI) {
+ for (const uint16_t *asI = TRI->getOverlaps(PhysReg); *asI; ++asI) {
LiveIntervalUnion::Query &QAlias = query(VirtReg, *asI);
NumInterferences += QAlias.collectInterferingVRegs();
if (QAlias.seenUnspillableVReg()) {
@@ -421,52 +216,11 @@ RegAllocBase::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
assert(NumInterferences > 0 && "expect interference");
// Spill each interfering vreg allocated to PhysReg or an alias.
- for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI)
+ for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI)
spillReg(VirtReg, *AliasI, SplitVRegs);
return true;
}
-// Add newly allocated physical registers to the MBB live in sets.
-void RegAllocBase::addMBBLiveIns(MachineFunction *MF) {
- NamedRegionTimer T("MBB Live Ins", TimerGroupName, TimePassesIsEnabled);
- SlotIndexes *Indexes = LIS->getSlotIndexes();
- if (MF->size() <= 1)
- return;
-
- LiveIntervalUnion::SegmentIter SI;
- for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
- LiveIntervalUnion &LiveUnion = PhysReg2LiveUnion[PhysReg];
- if (LiveUnion.empty())
- continue;
- DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " live-in:");
- MachineFunction::iterator MBB = llvm::next(MF->begin());
- MachineFunction::iterator MFE = MF->end();
- SlotIndex Start, Stop;
- tie(Start, Stop) = Indexes->getMBBRange(MBB);
- SI.setMap(LiveUnion.getMap());
- SI.find(Start);
- while (SI.valid()) {
- if (SI.start() <= Start) {
- if (!MBB->isLiveIn(PhysReg))
- MBB->addLiveIn(PhysReg);
- DEBUG(dbgs() << "\tBB#" << MBB->getNumber() << ':'
- << PrintReg(SI.value()->reg, TRI));
- } else if (SI.start() > Stop)
- MBB = Indexes->getMBBFromIndex(SI.start().getPrevIndex());
- if (++MBB == MFE)
- break;
- tie(Start, Stop) = Indexes->getMBBRange(MBB);
- SI.advanceTo(Start);
- }
- DEBUG(dbgs() << '\n');
- }
-}
-
-
-//===----------------------------------------------------------------------===//
-// RABasic Implementation
-//===----------------------------------------------------------------------===//
-
// Driver for the register assignment and splitting heuristics.
// Manages iteration over the LiveIntervalUnions.
//
@@ -481,6 +235,10 @@ void RegAllocBase::addMBBLiveIns(MachineFunction *MF) {
// selectOrSplit().
unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+ // Check for register mask interference. When live ranges cross calls, the
+ // set of usable registers is reduced to the callee-saved ones.
+ bool CrossRegMasks = LIS->checkRegMaskInterference(VirtReg, UsableRegs);
+
// Populate a list of physical register spill candidates.
SmallVector<unsigned, 8> PhysRegSpillCands;
@@ -491,6 +249,11 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
++I) {
unsigned PhysReg = *I;
+ // If PhysReg is clobbered by a register mask, it isn't useful for
+ // allocation or spilling.
+ if (CrossRegMasks && !UsableRegs.test(PhysReg))
+ continue;
+
// Check interference and as a side effect, intialize queries for this
// VirtReg and its aliases.
unsigned interfReg = checkPhysRegInterference(VirtReg, PhysReg);
@@ -498,9 +261,9 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
// Found an available register.
return PhysReg;
}
- Queries[interfReg].collectInterferingVRegs(1);
- LiveInterval *interferingVirtReg =
- Queries[interfReg].interferingVRegs().front();
+ LiveIntervalUnion::Query &IntfQ = query(VirtReg, interfReg);
+ IntfQ.collectInterferingVRegs(1);
+ LiveInterval *interferingVirtReg = IntfQ.interferingVRegs().front();
// The current VirtReg must either be spillable, or one of its interferences
// must have less spill weight.
@@ -524,7 +287,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
DEBUG(dbgs() << "spilling: " << VirtReg << '\n');
if (!VirtReg.isSpillable())
return ~0u;
- LiveRangeEdit LRE(VirtReg, SplitVRegs);
+ LiveRangeEdit LRE(VirtReg, SplitVRegs, *MF, *LIS, VRM);
spiller().spill(LRE);
// The live virtual register requesting allocation was spilled, so tell
@@ -579,7 +342,10 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) {
// Write out new DBG_VALUE instructions.
getAnalysis<LiveDebugVariables>().emitDebugValues(VRM);
- // The pass output is in VirtRegMap. Release all the transient data.
+ // All machine operands and other references to virtual registers have been
+ // replaced. Remove the virtual registers and release all the transient data.
+ VRM->clearAllVirt();
+ MRI->clearVirtRegs();
releaseMemory();
return true;
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index b36a445291b7..e09b7f8d26be 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -32,6 +32,7 @@
#include "llvm/ADT/IndexedMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SparseSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include <algorithm>
@@ -49,10 +50,7 @@ namespace {
public:
static char ID;
RAFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1),
- isBulkSpilling(false) {
- initializePHIEliminationPass(*PassRegistry::getPassRegistry());
- initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
- }
+ isBulkSpilling(false) {}
private:
const TargetMachine *TM;
MachineFunction *MF;
@@ -71,16 +69,20 @@ namespace {
// Everything we know about a live virtual register.
struct LiveReg {
MachineInstr *LastUse; // Last instr to use reg.
+ unsigned VirtReg; // Virtual register number.
unsigned PhysReg; // Currently held here.
unsigned short LastOpNum; // OpNum on LastUse.
bool Dirty; // Register needs spill.
- LiveReg(unsigned p=0) : LastUse(0), PhysReg(p), LastOpNum(0),
- Dirty(false) {}
+ explicit LiveReg(unsigned v)
+ : LastUse(0), VirtReg(v), PhysReg(0), LastOpNum(0), Dirty(false) {}
+
+ unsigned getSparseSetKey() const {
+ return TargetRegisterInfo::virtReg2Index(VirtReg);
+ }
};
- typedef DenseMap<unsigned, LiveReg> LiveRegMap;
- typedef LiveRegMap::value_type LiveRegEntry;
+ typedef SparseSet<LiveReg> LiveRegMap;
// LiveVirtRegs - This map contains entries for each virtual register
// that is currently available in a physical register.
@@ -137,8 +139,6 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
- AU.addRequiredID(PHIEliminationID);
- AU.addRequiredID(TwoAddressInstructionPassID);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -159,14 +159,23 @@ namespace {
void usePhysReg(MachineOperand&);
void definePhysReg(MachineInstr *MI, unsigned PhysReg, RegState NewState);
unsigned calcSpillCost(unsigned PhysReg) const;
- void assignVirtToPhysReg(LiveRegEntry &LRE, unsigned PhysReg);
- void allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint);
+ void assignVirtToPhysReg(LiveReg&, unsigned PhysReg);
+ LiveRegMap::iterator findLiveVirtReg(unsigned VirtReg) {
+ return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg));
+ }
+ LiveRegMap::const_iterator findLiveVirtReg(unsigned VirtReg) const {
+ return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg));
+ }
+ LiveRegMap::iterator assignVirtToPhysReg(unsigned VReg, unsigned PhysReg);
+ LiveRegMap::iterator allocVirtReg(MachineInstr *MI, LiveRegMap::iterator,
+ unsigned Hint);
LiveRegMap::iterator defineVirtReg(MachineInstr *MI, unsigned OpNum,
unsigned VirtReg, unsigned Hint);
LiveRegMap::iterator reloadVirtReg(MachineInstr *MI, unsigned OpNum,
unsigned VirtReg, unsigned Hint);
void spillAll(MachineInstr *MI);
bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg);
+ void addRetOperands(MachineBasicBlock *MBB);
};
char RAFast::ID = 0;
}
@@ -222,10 +231,10 @@ void RAFast::addKillFlag(const LiveReg &LR) {
/// killVirtReg - Mark virtreg as no longer available.
void RAFast::killVirtReg(LiveRegMap::iterator LRI) {
- addKillFlag(LRI->second);
- const LiveReg &LR = LRI->second;
- assert(PhysRegState[LR.PhysReg] == LRI->first && "Broken RegState mapping");
- PhysRegState[LR.PhysReg] = regFree;
+ addKillFlag(*LRI);
+ assert(PhysRegState[LRI->PhysReg] == LRI->VirtReg &&
+ "Broken RegState mapping");
+ PhysRegState[LRI->PhysReg] = regFree;
// Erase from LiveVirtRegs unless we're spilling in bulk.
if (!isBulkSpilling)
LiveVirtRegs.erase(LRI);
@@ -235,7 +244,7 @@ void RAFast::killVirtReg(LiveRegMap::iterator LRI) {
void RAFast::killVirtReg(unsigned VirtReg) {
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
"killVirtReg needs a virtual register");
- LiveRegMap::iterator LRI = LiveVirtRegs.find(VirtReg);
+ LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
if (LRI != LiveVirtRegs.end())
killVirtReg(LRI);
}
@@ -245,7 +254,7 @@ void RAFast::killVirtReg(unsigned VirtReg) {
void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) {
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
"Spilling a physical register is illegal!");
- LiveRegMap::iterator LRI = LiveVirtRegs.find(VirtReg);
+ LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
assert(LRI != LiveVirtRegs.end() && "Spilling unmapped virtual register");
spillVirtReg(MI, LRI);
}
@@ -253,18 +262,18 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) {
/// spillVirtReg - Do the actual work of spilling.
void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
LiveRegMap::iterator LRI) {
- LiveReg &LR = LRI->second;
- assert(PhysRegState[LR.PhysReg] == LRI->first && "Broken RegState mapping");
+ LiveReg &LR = *LRI;
+ assert(PhysRegState[LR.PhysReg] == LRI->VirtReg && "Broken RegState mapping");
if (LR.Dirty) {
// If this physreg is used by the instruction, we want to kill it on the
// instruction, not on the spill.
bool SpillKill = LR.LastUse != MI;
LR.Dirty = false;
- DEBUG(dbgs() << "Spilling " << PrintReg(LRI->first, TRI)
+ DEBUG(dbgs() << "Spilling " << PrintReg(LRI->VirtReg, TRI)
<< " in " << PrintReg(LR.PhysReg, TRI));
- const TargetRegisterClass *RC = MRI->getRegClass(LRI->first);
- int FI = getStackSpaceFor(LRI->first, RC);
+ const TargetRegisterClass *RC = MRI->getRegClass(LRI->VirtReg);
+ int FI = getStackSpaceFor(LRI->VirtReg, RC);
DEBUG(dbgs() << " to stack slot #" << FI << "\n");
TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, RC, TRI);
++NumStores; // Update statistics
@@ -272,7 +281,8 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
// If this register is used by DBG_VALUE then insert new DBG_VALUE to
// identify spilled location as the place to find corresponding variable's
// value.
- SmallVector<MachineInstr *, 4> &LRIDbgValues = LiveDbgValueMap[LRI->first];
+ SmallVector<MachineInstr *, 4> &LRIDbgValues =
+ LiveDbgValueMap[LRI->VirtReg];
for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) {
MachineInstr *DBG = LRIDbgValues[li];
const MDNode *MDPtr =
@@ -295,8 +305,9 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV);
}
}
- // Now this register is spilled there is should not be any DBG_VALUE pointing
- // to this register because they are all pointing to spilled value now.
+ // Now this register is spilled there is should not be any DBG_VALUE
+ // pointing to this register because they are all pointing to spilled value
+ // now.
LRIDbgValues.clear();
if (SpillKill)
LR.LastUse = 0; // Don't kill register again
@@ -343,7 +354,7 @@ void RAFast::usePhysReg(MachineOperand &MO) {
}
// Maybe a superregister is reserved?
- for (const unsigned *AS = TRI->getAliasSet(PhysReg);
+ for (const uint16_t *AS = TRI->getAliasSet(PhysReg);
unsigned Alias = *AS; ++AS) {
switch (PhysRegState[Alias]) {
case regDisabled:
@@ -397,7 +408,7 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg,
// This is a disabled register, disable all aliases.
PhysRegState[PhysReg] = NewState;
- for (const unsigned *AS = TRI->getAliasSet(PhysReg);
+ for (const uint16_t *AS = TRI->getAliasSet(PhysReg);
unsigned Alias = *AS; ++AS) {
switch (unsigned VirtReg = PhysRegState[Alias]) {
case regDisabled:
@@ -435,14 +446,17 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
DEBUG(dbgs() << PrintReg(VirtReg, TRI) << " corresponding "
<< PrintReg(PhysReg, TRI) << " is reserved already.\n");
return spillImpossible;
- default:
- return LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean;
+ default: {
+ LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg);
+ assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
+ return I->Dirty ? spillDirty : spillClean;
+ }
}
// This is a disabled register, add up cost of aliases.
DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is disabled.\n");
unsigned Cost = 0;
- for (const unsigned *AS = TRI->getAliasSet(PhysReg);
+ for (const uint16_t *AS = TRI->getAliasSet(PhysReg);
unsigned Alias = *AS; ++AS) {
if (UsedInInstr.test(Alias))
return spillImpossible;
@@ -454,10 +468,13 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
break;
case regReserved:
return spillImpossible;
- default:
- Cost += LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean;
+ default: {
+ LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg);
+ assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
+ Cost += I->Dirty ? spillDirty : spillClean;
break;
}
+ }
}
return Cost;
}
@@ -467,17 +484,27 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
/// that PhysReg is the proper container for VirtReg now. The physical
/// register must not be used for anything else when this is called.
///
-void RAFast::assignVirtToPhysReg(LiveRegEntry &LRE, unsigned PhysReg) {
- DEBUG(dbgs() << "Assigning " << PrintReg(LRE.first, TRI) << " to "
+void RAFast::assignVirtToPhysReg(LiveReg &LR, unsigned PhysReg) {
+ DEBUG(dbgs() << "Assigning " << PrintReg(LR.VirtReg, TRI) << " to "
<< PrintReg(PhysReg, TRI) << "\n");
- PhysRegState[PhysReg] = LRE.first;
- assert(!LRE.second.PhysReg && "Already assigned a physreg");
- LRE.second.PhysReg = PhysReg;
+ PhysRegState[PhysReg] = LR.VirtReg;
+ assert(!LR.PhysReg && "Already assigned a physreg");
+ LR.PhysReg = PhysReg;
+}
+
+RAFast::LiveRegMap::iterator
+RAFast::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) {
+ LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
+ assert(LRI != LiveVirtRegs.end() && "VirtReg disappeared");
+ assignVirtToPhysReg(*LRI, PhysReg);
+ return LRI;
}
/// allocVirtReg - Allocate a physical register for VirtReg.
-void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
- const unsigned VirtReg = LRE.first;
+RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI,
+ LiveRegMap::iterator LRI,
+ unsigned Hint) {
+ const unsigned VirtReg = LRI->VirtReg;
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
"Can only allocate virtual registers");
@@ -496,7 +523,9 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
if (Cost < spillDirty) {
if (Cost)
definePhysReg(MI, Hint, regFree);
- return assignVirtToPhysReg(LRE, Hint);
+ // definePhysReg may kill virtual registers and modify LiveVirtRegs.
+ // That invalidates LRI, so run a new lookup for VirtReg.
+ return assignVirtToPhysReg(VirtReg, Hint);
}
}
@@ -505,8 +534,10 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
// First try to find a completely free register.
for (ArrayRef<unsigned>::iterator I = AO.begin(), E = AO.end(); I != E; ++I) {
unsigned PhysReg = *I;
- if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg))
- return assignVirtToPhysReg(LRE, PhysReg);
+ if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg)) {
+ assignVirtToPhysReg(*LRI, PhysReg);
+ return LRI;
+ }
}
DEBUG(dbgs() << "Allocating " << PrintReg(VirtReg) << " from "
@@ -519,21 +550,25 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
DEBUG(dbgs() << "\tCost: " << Cost << "\n");
DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n");
// Cost is 0 when all aliases are already disabled.
- if (Cost == 0)
- return assignVirtToPhysReg(LRE, *I);
+ if (Cost == 0) {
+ assignVirtToPhysReg(*LRI, *I);
+ return LRI;
+ }
if (Cost < BestCost)
BestReg = *I, BestCost = Cost;
}
if (BestReg) {
definePhysReg(MI, BestReg, regFree);
- return assignVirtToPhysReg(LRE, BestReg);
+ // definePhysReg may kill virtual registers and modify LiveVirtRegs.
+ // That invalidates LRI, so run a new lookup for VirtReg.
+ return assignVirtToPhysReg(VirtReg, BestReg);
}
// Nothing we can do. Report an error and keep going with a bad allocation.
MI->emitError("ran out of registers during register allocation");
definePhysReg(MI, *AO.begin(), regFree);
- assignVirtToPhysReg(LRE, *AO.begin());
+ return assignVirtToPhysReg(VirtReg, *AO.begin());
}
/// defineVirtReg - Allocate a register for VirtReg and mark it as dirty.
@@ -544,8 +579,7 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum,
"Not a virtual register");
LiveRegMap::iterator LRI;
bool New;
- tie(LRI, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg()));
- LiveReg &LR = LRI->second;
+ tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
if (New) {
// If there is no hint, peek at the only use of this register.
if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) &&
@@ -555,18 +589,18 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum,
if (UseMI.isCopyLike())
Hint = UseMI.getOperand(0).getReg();
}
- allocVirtReg(MI, *LRI, Hint);
- } else if (LR.LastUse) {
+ LRI = allocVirtReg(MI, LRI, Hint);
+ } else if (LRI->LastUse) {
// Redefining a live register - kill at the last use, unless it is this
// instruction defining VirtReg multiple times.
- if (LR.LastUse != MI || LR.LastUse->getOperand(LR.LastOpNum).isUse())
- addKillFlag(LR);
+ if (LRI->LastUse != MI || LRI->LastUse->getOperand(LRI->LastOpNum).isUse())
+ addKillFlag(*LRI);
}
- assert(LR.PhysReg && "Register not assigned");
- LR.LastUse = MI;
- LR.LastOpNum = OpNum;
- LR.Dirty = true;
- UsedInInstr.set(LR.PhysReg);
+ assert(LRI->PhysReg && "Register not assigned");
+ LRI->LastUse = MI;
+ LRI->LastOpNum = OpNum;
+ LRI->Dirty = true;
+ UsedInInstr.set(LRI->PhysReg);
return LRI;
}
@@ -578,18 +612,17 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum,
"Not a virtual register");
LiveRegMap::iterator LRI;
bool New;
- tie(LRI, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg()));
- LiveReg &LR = LRI->second;
+ tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
MachineOperand &MO = MI->getOperand(OpNum);
if (New) {
- allocVirtReg(MI, *LRI, Hint);
+ LRI = allocVirtReg(MI, LRI, Hint);
const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
int FrameIndex = getStackSpaceFor(VirtReg, RC);
DEBUG(dbgs() << "Reloading " << PrintReg(VirtReg, TRI) << " into "
- << PrintReg(LR.PhysReg, TRI) << "\n");
- TII->loadRegFromStackSlot(*MBB, MI, LR.PhysReg, FrameIndex, RC, TRI);
+ << PrintReg(LRI->PhysReg, TRI) << "\n");
+ TII->loadRegFromStackSlot(*MBB, MI, LRI->PhysReg, FrameIndex, RC, TRI);
++NumLoads;
- } else if (LR.Dirty) {
+ } else if (LRI->Dirty) {
if (isLastUseOfLocalReg(MO)) {
DEBUG(dbgs() << "Killing last use: " << MO << "\n");
if (MO.isUse())
@@ -614,10 +647,10 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum,
DEBUG(dbgs() << "Clearing clean dead: " << MO << "\n");
MO.setIsDead(false);
}
- assert(LR.PhysReg && "Register not assigned");
- LR.LastUse = MI;
- LR.LastOpNum = OpNum;
- UsedInInstr.set(LR.PhysReg);
+ assert(LRI->PhysReg && "Register not assigned");
+ LRI->LastUse = MI;
+ LRI->LastOpNum = OpNum;
+ UsedInInstr.set(LRI->PhysReg);
return LRI;
}
@@ -674,7 +707,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
UsedInInstr.set(Reg);
if (ThroughRegs.count(PhysRegState[Reg]))
definePhysReg(MI, Reg, regFree);
- for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+ for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
UsedInInstr.set(*AS);
if (ThroughRegs.count(PhysRegState[*AS]))
definePhysReg(MI, *AS, regFree);
@@ -682,7 +715,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
}
SmallVector<unsigned, 8> PartialDefs;
- DEBUG(dbgs() << "Allocating tied uses and early clobbers.\n");
+ DEBUG(dbgs() << "Allocating tied uses.\n");
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg()) continue;
@@ -694,7 +727,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
DEBUG(dbgs() << "Operand " << i << "("<< MO << ") is tied to operand "
<< DefIdx << ".\n");
LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0);
- unsigned PhysReg = LRI->second.PhysReg;
+ unsigned PhysReg = LRI->PhysReg;
setPhysReg(MI, i, PhysReg);
// Note: we don't update the def operand yet. That would cause the normal
// def-scan to attempt spilling.
@@ -703,16 +736,25 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
// Reload the register, but don't assign to the operand just yet.
// That would confuse the later phys-def processing pass.
LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0);
- PartialDefs.push_back(LRI->second.PhysReg);
- } else if (MO.isEarlyClobber()) {
- // Note: defineVirtReg may invalidate MO.
- LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0);
- unsigned PhysReg = LRI->second.PhysReg;
- if (setPhysReg(MI, i, PhysReg))
- VirtDead.push_back(Reg);
+ PartialDefs.push_back(LRI->PhysReg);
}
}
+ DEBUG(dbgs() << "Allocating early clobbers.\n");
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
+ if (!MO.isEarlyClobber())
+ continue;
+ // Note: defineVirtReg may invalidate MO.
+ LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0);
+ unsigned PhysReg = LRI->PhysReg;
+ if (setPhysReg(MI, i, PhysReg))
+ VirtDead.push_back(Reg);
+ }
+
// Restore UsedInInstr to a state usable for allocating normal virtual uses.
UsedInInstr.reset();
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -730,32 +772,66 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
UsedInInstr.set(PartialDefs[i]);
}
-void RAFast::AllocateBasicBlock() {
- DEBUG(dbgs() << "\nAllocating " << *MBB);
+/// addRetOperand - ensure that a return instruction has an operand for each
+/// value live out of the function.
+///
+/// Things marked both call and return are tail calls; do not do this for them.
+/// The tail callee need not take the same registers as input that it produces
+/// as output, and there are dependencies for its input registers elsewhere.
+///
+/// FIXME: This should be done as part of instruction selection, and this helper
+/// should be deleted. Until then, we use custom logic here to create the proper
+/// operand under all circumstances. We can't use addRegisterKilled because that
+/// doesn't make sense for undefined values. We can't simply avoid calling it
+/// for undefined values, because we must ensure that the operand always exists.
+void RAFast::addRetOperands(MachineBasicBlock *MBB) {
+ if (MBB->empty() || !MBB->back().isReturn() || MBB->back().isCall())
+ return;
+
+ MachineInstr *MI = &MBB->back();
+
+ for (MachineRegisterInfo::liveout_iterator
+ I = MBB->getParent()->getRegInfo().liveout_begin(),
+ E = MBB->getParent()->getRegInfo().liveout_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ "Cannot have a live-out virtual register.");
+
+ bool hasDef = PhysRegState[Reg] == regReserved;
+
+ // Check if this register already has an operand.
+ bool Found = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+
+ unsigned OperReg = MO.getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(OperReg))
+ continue;
- // FIXME: This should probably be added by instruction selection instead?
- // If the last instruction in the block is a return, make sure to mark it as
- // using all of the live-out values in the function. Things marked both call
- // and return are tail calls; do not do this for them. The tail callee need
- // not take the same registers as input that it produces as output, and there
- // are dependencies for its input registers elsewhere.
- if (!MBB->empty() && MBB->back().getDesc().isReturn() &&
- !MBB->back().getDesc().isCall()) {
- MachineInstr *Ret = &MBB->back();
-
- for (MachineRegisterInfo::liveout_iterator
- I = MF->getRegInfo().liveout_begin(),
- E = MF->getRegInfo().liveout_end(); I != E; ++I) {
- assert(TargetRegisterInfo::isPhysicalRegister(*I) &&
- "Cannot have a live-out virtual register.");
-
- // Add live-out registers as implicit uses.
- Ret->addRegisterKilled(*I, TRI, true);
+ if (OperReg == Reg || TRI->isSuperRegister(OperReg, Reg)) {
+ // If the ret already has an operand for this physreg or a superset,
+ // don't duplicate it. Set the kill flag if the value is defined.
+ if (hasDef && !MO.isKill())
+ MO.setIsKill();
+ Found = true;
+ break;
+ }
}
+ if (!Found)
+ MI->addOperand(MachineOperand::CreateReg(Reg,
+ false /*IsDef*/,
+ true /*IsImp*/,
+ hasDef/*IsKill*/));
}
+}
+
+void RAFast::AllocateBasicBlock() {
+ DEBUG(dbgs() << "\nAllocating " << *MBB);
PhysRegState.assign(TRI->getNumRegs(), regDisabled);
- assert(LiveVirtRegs.empty() && "Mapping not cleared form last block?");
+ assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?");
MachineBasicBlock::iterator MII = MBB->begin();
@@ -783,25 +859,26 @@ void RAFast::AllocateBasicBlock() {
case regReserved:
dbgs() << "*";
break;
- default:
+ default: {
dbgs() << '=' << PrintReg(PhysRegState[Reg]);
- if (LiveVirtRegs[PhysRegState[Reg]].Dirty)
+ LiveRegMap::iterator I = findLiveVirtReg(PhysRegState[Reg]);
+ assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
+ if (I->Dirty)
dbgs() << "*";
- assert(LiveVirtRegs[PhysRegState[Reg]].PhysReg == Reg &&
- "Bad inverse map");
+ assert(I->PhysReg == Reg && "Bad inverse map");
break;
}
+ }
}
dbgs() << '\n';
// Check that LiveVirtRegs is the inverse.
for (LiveRegMap::iterator i = LiveVirtRegs.begin(),
e = LiveVirtRegs.end(); i != e; ++i) {
- assert(TargetRegisterInfo::isVirtualRegister(i->first) &&
+ assert(TargetRegisterInfo::isVirtualRegister(i->VirtReg) &&
"Bad map key");
- assert(TargetRegisterInfo::isPhysicalRegister(i->second.PhysReg) &&
+ assert(TargetRegisterInfo::isPhysicalRegister(i->PhysReg) &&
"Bad map value");
- assert(PhysRegState[i->second.PhysReg] == i->first &&
- "Bad inverse map");
+ assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map");
}
});
@@ -815,10 +892,9 @@ void RAFast::AllocateBasicBlock() {
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
- LiveDbgValueMap[Reg].push_back(MI);
- LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg);
+ LiveRegMap::iterator LRI = findLiveVirtReg(Reg);
if (LRI != LiveVirtRegs.end())
- setPhysReg(MI, i, LRI->second.PhysReg);
+ setPhysReg(MI, i, LRI->PhysReg);
else {
int SS = StackSlotForVirtReg[Reg];
if (SS == -1) {
@@ -849,6 +925,7 @@ void RAFast::AllocateBasicBlock() {
}
}
}
+ LiveDbgValueMap[Reg].push_back(MI);
}
}
// Next instruction.
@@ -932,7 +1009,7 @@ void RAFast::AllocateBasicBlock() {
if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
if (MO.isUse()) {
LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, CopyDst);
- unsigned PhysReg = LRI->second.PhysReg;
+ unsigned PhysReg = LRI->PhysReg;
CopySrc = (CopySrc == Reg || CopySrc == PhysReg) ? PhysReg : 0;
if (setPhysReg(MI, i, PhysReg))
killVirtReg(LRI);
@@ -953,13 +1030,13 @@ void RAFast::AllocateBasicBlock() {
// Look for physreg defs and tied uses.
if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue;
UsedInInstr.set(Reg);
- for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+ for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS)
UsedInInstr.set(*AS);
}
}
unsigned DefOpEnd = MI->getNumOperands();
- if (MCID.isCall()) {
+ if (MI->isCall()) {
// Spill all virtregs before a call. This serves two purposes: 1. If an
// exception is thrown, the landing pad is going to expect to find
// registers in their spill slots, and 2. we don't have to wade through
@@ -988,7 +1065,7 @@ void RAFast::AllocateBasicBlock() {
continue;
}
LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, CopySrc);
- unsigned PhysReg = LRI->second.PhysReg;
+ unsigned PhysReg = LRI->PhysReg;
if (setPhysReg(MI, i, PhysReg)) {
VirtDead.push_back(Reg);
CopyDst = 0; // cancel coalescing;
@@ -1024,6 +1101,9 @@ void RAFast::AllocateBasicBlock() {
MBB->erase(Coalesced[i]);
NumCopies += Coalesced.size();
+ // addRetOperands must run after we've seen all defs in this block.
+ addRetOperands(MBB);
+
DEBUG(MBB->dump());
}
@@ -1038,12 +1118,16 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
TM = &Fn.getTarget();
TRI = TM->getRegisterInfo();
TII = TM->getInstrInfo();
+ MRI->freezeReservedRegs(Fn);
RegClassInfo.runOnMachineFunction(Fn);
UsedInInstr.resize(TRI->getNumRegs());
+ assert(!MRI->isSSA() && "regalloc requires leaving SSA");
+
// initialize the virtual->physical register map to have a 'null'
// mapping for all virtual registers
StackSlotForVirtReg.resize(MRI->getNumVirtRegs());
+ LiveVirtRegs.setUniverse(MRI->getNumVirtRegs());
// Loop over all of the basic blocks, eliminating virtual register references
for (MachineFunction::iterator MBBi = Fn.begin(), MBBe = Fn.end();
@@ -1052,16 +1136,17 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
AllocateBasicBlock();
}
- // Make sure the set of used physregs is closed under subreg operations.
- MRI->closePhysRegsUsed(*TRI);
-
// Add the clobber lists for all the instructions we skipped earlier.
for (SmallPtrSet<const MCInstrDesc*, 4>::const_iterator
I = SkippedInstrs.begin(), E = SkippedInstrs.end(); I != E; ++I)
- if (const unsigned *Defs = (*I)->getImplicitDefs())
+ if (const uint16_t *Defs = (*I)->getImplicitDefs())
while (*Defs)
MRI->setPhysRegUsed(*Defs++);
+ // All machine operands and other references to virtual registers have been
+ // replaced. Remove the virtual registers.
+ MRI->clearVirtRegs();
+
SkippedInstrs.clear();
StackSlotForVirtReg.clear();
LiveDbgValueMap.clear();
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index f54a2c85d100..3f2a617100c3 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -16,7 +16,6 @@
#include "AllocationOrder.h"
#include "InterferenceCache.h"
#include "LiveDebugVariables.h"
-#include "LiveRangeEdit.h"
#include "RegAllocBase.h"
#include "Spiller.h"
#include "SpillPlacement.h"
@@ -29,6 +28,7 @@
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/EdgeBundles.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -168,6 +168,19 @@ class RAGreedy : public MachineFunctionPass,
}
};
+ // Register mask interference. The current VirtReg is checked for register
+ // mask interference on entry to selectOrSplit(). If there is no
+ // interference, UsableRegs is left empty. If there is interference,
+ // UsableRegs has a bit mask of registers that can be used without register
+ // mask interference.
+ BitVector UsableRegs;
+
+ /// clobberedByRegMask - Returns true if PhysReg is not directly usable
+ /// because of register mask clobbers.
+ bool clobberedByRegMask(unsigned PhysReg) const {
+ return !UsableRegs.empty() && !UsableRegs.test(PhysReg);
+ }
+
// splitting state.
std::auto_ptr<SplitAnalysis> SA;
std::auto_ptr<SplitEditor> SE;
@@ -248,7 +261,6 @@ public:
static char ID;
private:
- void LRE_WillEraseInstruction(MachineInstr*);
bool LRE_CanEraseVirtReg(unsigned);
void LRE_WillShrinkVirtReg(unsigned);
void LRE_DidCloneVirtReg(unsigned, unsigned);
@@ -308,8 +320,8 @@ RAGreedy::RAGreedy(): MachineFunctionPass(ID) {
initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
- initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
+ initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
initializeLiveStacksPass(*PassRegistry::getPassRegistry());
initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
@@ -328,9 +340,6 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<SlotIndexes>();
AU.addRequired<LiveDebugVariables>();
AU.addPreserved<LiveDebugVariables>();
- if (StrongPHIElim)
- AU.addRequiredID(StrongPHIEliminationID);
- AU.addRequiredTransitiveID(RegisterCoalescerPassID);
AU.addRequired<CalculateSpillWeights>();
AU.addRequired<LiveStacks>();
AU.addPreserved<LiveStacks>();
@@ -350,11 +359,6 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
// LiveRangeEdit delegate methods
//===----------------------------------------------------------------------===//
-void RAGreedy::LRE_WillEraseInstruction(MachineInstr *MI) {
- // LRE itself will remove from SlotIndexes and parent basic block.
- VRM->RemoveMachineInstrFromMaps(MI);
-}
-
bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) {
if (unsigned PhysReg = VRM->getPhys(VirtReg)) {
unassign(LIS->getInterval(VirtReg), PhysReg);
@@ -424,13 +428,13 @@ void RAGreedy::enqueue(LiveInterval *LI) {
Prio |= (1u << 30);
}
- Queue.push(std::make_pair(Prio, Reg));
+ Queue.push(std::make_pair(Prio, ~Reg));
}
LiveInterval *RAGreedy::dequeue() {
if (Queue.empty())
return 0;
- LiveInterval *LI = &LIS->getInterval(Queue.top().second);
+ LiveInterval *LI = &LIS->getInterval(~Queue.top().second);
Queue.pop();
return LI;
}
@@ -446,9 +450,12 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
SmallVectorImpl<LiveInterval*> &NewVRegs) {
Order.rewind();
unsigned PhysReg;
- while ((PhysReg = Order.next()))
+ while ((PhysReg = Order.next())) {
+ if (clobberedByRegMask(PhysReg))
+ continue;
if (!checkPhysRegInterference(VirtReg, PhysReg))
break;
+ }
if (!PhysReg || Order.isHint(PhysReg))
return PhysReg;
@@ -457,7 +464,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
// If we missed a simple hint, try to cheaply evict interference from the
// preferred register.
if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg))
- if (Order.isHint(Hint)) {
+ if (Order.isHint(Hint) && !clobberedByRegMask(Hint)) {
DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n');
EvictionCost MaxCost(1);
if (canEvictInterference(VirtReg, Hint, true, MaxCost)) {
@@ -532,7 +539,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
Cascade = NextCascade;
EvictionCost Cost;
- for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
+ for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
// If there is 10 or more interferences, chances are one is heavier.
if (Q.collectInterferingVRegs(10) >= 10)
@@ -590,7 +597,7 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg,
DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI)
<< " interference: Cascade " << Cascade << '\n');
- for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
+ for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
assert(Q.seenAllInterferences() && "Didn't check all interfererences.");
for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) {
@@ -629,6 +636,8 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
Order.rewind();
while (unsigned PhysReg = Order.next()) {
+ if (clobberedByRegMask(PhysReg))
+ continue;
if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit)
continue;
// The first use of a callee-saved register in a function has cost 1.
@@ -1118,6 +1127,8 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
}
--NumCands;
GlobalCand[Worst] = GlobalCand[NumCands];
+ if (BestCand == NumCands)
+ BestCand = Worst;
}
if (GlobalCand.size() <= NumCands)
@@ -1172,7 +1183,7 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
return 0;
// Prepare split editor.
- LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
+ LiveRangeEdit LREdit(VirtReg, NewVRegs, *MF, *LIS, VRM, this);
SE->reset(LREdit, SplitSpillMode);
// Assign all edge bundles to the preferred candidate, or NoCand.
@@ -1220,7 +1231,7 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
unsigned Reg = VirtReg.reg;
bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
- LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
+ LiveRangeEdit LREdit(VirtReg, NewVRegs, *MF, *LIS, VRM, this);
SE->reset(LREdit, SplitSpillMode);
ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
for (unsigned i = 0; i != UseBlocks.size(); ++i) {
@@ -1268,7 +1279,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg,
SmallVectorImpl<float> &GapWeight) {
assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
- const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
+ ArrayRef<SlotIndex> Uses = SA->getUseSlots();
const unsigned NumGaps = Uses.size()-1;
// Start and end points for the interference check.
@@ -1280,7 +1291,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg,
GapWeight.assign(NumGaps, 0.0f);
// Add interference from each overlapping register.
- for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) {
+ for (const uint16_t *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) {
if (!query(const_cast<LiveInterval&>(SA->getParent()), *AI)
.checkInterference())
continue;
@@ -1292,7 +1303,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg,
// surrounding the instruction. The exception is interference before
// StartIdx and after StopIdx.
//
- LiveIntervalUnion::SegmentIter IntI = PhysReg2LiveUnion[*AI].find(StartIdx);
+ LiveIntervalUnion::SegmentIter IntI = getLiveUnion(*AI).find(StartIdx);
for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
// Skip the gaps before IntI.
while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
@@ -1329,7 +1340,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// that the interval is continuous from FirstInstr to LastInstr. We should
// make sure that we don't do anything illegal to such an interval, though.
- const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
+ ArrayRef<SlotIndex> Uses = SA->getUseSlots();
if (Uses.size() <= 2)
return 0;
const unsigned NumGaps = Uses.size()-1;
@@ -1337,10 +1348,40 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
DEBUG({
dbgs() << "tryLocalSplit: ";
for (unsigned i = 0, e = Uses.size(); i != e; ++i)
- dbgs() << ' ' << SA->UseSlots[i];
+ dbgs() << ' ' << Uses[i];
dbgs() << '\n';
});
+ // If VirtReg is live across any register mask operands, compute a list of
+ // gaps with register masks.
+ SmallVector<unsigned, 8> RegMaskGaps;
+ if (!UsableRegs.empty()) {
+ // Get regmask slots for the whole block.
+ ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber());
+ DEBUG(dbgs() << RMS.size() << " regmasks in block:");
+ // Constrain to VirtReg's live range.
+ unsigned ri = std::lower_bound(RMS.begin(), RMS.end(),
+ Uses.front().getRegSlot()) - RMS.begin();
+ unsigned re = RMS.size();
+ for (unsigned i = 0; i != NumGaps && ri != re; ++i) {
+ // Look for Uses[i] <= RMS <= Uses[i+1].
+ assert(!SlotIndex::isEarlierInstr(RMS[ri], Uses[i]));
+ if (SlotIndex::isEarlierInstr(Uses[i+1], RMS[ri]))
+ continue;
+ // Skip a regmask on the same instruction as the last use. It doesn't
+ // overlap the live range.
+ if (SlotIndex::isSameInstr(Uses[i+1], RMS[ri]) && i+1 == NumGaps)
+ break;
+ DEBUG(dbgs() << ' ' << RMS[ri] << ':' << Uses[i] << '-' << Uses[i+1]);
+ RegMaskGaps.push_back(i);
+ // Advance ri to the next gap. A regmask on one of the uses counts in
+ // both gaps.
+ while (ri != re && SlotIndex::isEarlierInstr(RMS[ri], Uses[i+1]))
+ ++ri;
+ }
+ DEBUG(dbgs() << '\n');
+ }
+
// Since we allow local split results to be split again, there is a risk of
// creating infinite loops. It is tempting to require that the new live
// ranges have less instructions than the original. That would guarantee
@@ -1375,6 +1416,11 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// order to make use of PhysReg between UseSlots[i] and UseSlots[i+1].
calcGapWeights(PhysReg, GapWeight);
+ // Remove any gaps with regmask clobbers.
+ if (clobberedByRegMask(PhysReg))
+ for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i)
+ GapWeight[RegMaskGaps[i]] = HUGE_VALF;
+
// Try to find the best sequence of gaps to close.
// The new spill weight must be larger than any gap interference.
@@ -1466,7 +1512,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
<< '-' << Uses[BestAfter] << ", " << BestDiff
<< ", " << (BestAfter - BestBefore + 1) << " instrs\n");
- LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
+ LiveRangeEdit LREdit(VirtReg, NewVRegs, *MF, *LIS, VRM, this);
SE->reset(LREdit);
SE->openIntv();
@@ -1553,6 +1599,11 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ // Check if VirtReg is live across any calls.
+ UsableRegs.clear();
+ if (LIS->checkRegMaskInterference(VirtReg, UsableRegs))
+ DEBUG(dbgs() << "Live across regmasks.\n");
+
// First try assigning a free register.
AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo);
if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs))
@@ -1593,7 +1644,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
// Finally spill VirtReg itself.
NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
- LiveRangeEdit LRE(VirtReg, NewVRegs, this);
+ LiveRangeEdit LRE(VirtReg, NewVRegs, *MF, *LIS, VRM, this);
spiller().spill(LRE);
setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
@@ -1628,7 +1679,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
ExtraRegInfo.clear();
ExtraRegInfo.resize(MRI->getNumVirtRegs());
NextCascade = 1;
- IntfCache.init(MF, &PhysReg2LiveUnion[0], Indexes, TRI);
+ IntfCache.init(MF, &getLiveUnion(0), Indexes, LIS, TRI);
GlobalCand.resize(32); // This will grow as needed.
allocatePhysRegs();
@@ -1647,7 +1698,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
DebugVars->emitDebugValues(VRM);
}
- // The pass output is in VirtRegMap. Release all the transient data.
+ // All machine operands and other references to virtual registers have been
+ // replaced. Remove the virtual registers and release all the transient data.
+ VRM->clearAllVirt();
+ MRI->clearVirtRegs();
releaseMemory();
return true;
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
deleted file mode 100644
index ce3fb90b1126..000000000000
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ /dev/null
@@ -1,1543 +0,0 @@
-//===-- RegAllocLinearScan.cpp - Linear Scan register allocator -----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a linear scan register allocator.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "regalloc"
-#include "LiveDebugVariables.h"
-#include "LiveRangeEdit.h"
-#include "VirtRegMap.h"
-#include "VirtRegRewriter.h"
-#include "RegisterClassInfo.h"
-#include "Spiller.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/CalcSpillWeights.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/RegAllocRegistry.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/ADT/EquivalenceClasses.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <queue>
-#include <memory>
-#include <cmath>
-
-using namespace llvm;
-
-STATISTIC(NumIters , "Number of iterations performed");
-STATISTIC(NumBacktracks, "Number of times we had to backtrack");
-STATISTIC(NumCoalesce, "Number of copies coalesced");
-STATISTIC(NumDowngrade, "Number of registers downgraded");
-
-static cl::opt<bool>
-NewHeuristic("new-spilling-heuristic",
- cl::desc("Use new spilling heuristic"),
- cl::init(false), cl::Hidden);
-
-static cl::opt<bool>
-TrivCoalesceEnds("trivial-coalesce-ends",
- cl::desc("Attempt trivial coalescing of interval ends"),
- cl::init(false), cl::Hidden);
-
-static cl::opt<bool>
-AvoidWAWHazard("avoid-waw-hazard",
- cl::desc("Avoid write-write hazards for some register classes"),
- cl::init(false), cl::Hidden);
-
-static RegisterRegAlloc
-linearscanRegAlloc("linearscan", "linear scan register allocator",
- createLinearScanRegisterAllocator);
-
-namespace {
- // When we allocate a register, add it to a fixed-size queue of
- // registers to skip in subsequent allocations. This trades a small
- // amount of register pressure and increased spills for flexibility in
- // the post-pass scheduler.
- //
- // Note that in a the number of registers used for reloading spills
- // will be one greater than the value of this option.
- //
- // One big limitation of this is that it doesn't differentiate between
- // different register classes. So on x86-64, if there is xmm register
- // pressure, it can caused fewer GPRs to be held in the queue.
- static cl::opt<unsigned>
- NumRecentlyUsedRegs("linearscan-skip-count",
- cl::desc("Number of registers for linearscan to remember"
- "to skip."),
- cl::init(0),
- cl::Hidden);
-
- struct RALinScan : public MachineFunctionPass {
- static char ID;
- RALinScan() : MachineFunctionPass(ID) {
- initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
- initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
- initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
- initializeRegisterCoalescerPass(
- *PassRegistry::getPassRegistry());
- initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
- initializeLiveStacksPass(*PassRegistry::getPassRegistry());
- initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
- initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
- initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
- initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
-
- // Initialize the queue to record recently-used registers.
- if (NumRecentlyUsedRegs > 0)
- RecentRegs.resize(NumRecentlyUsedRegs, 0);
- RecentNext = RecentRegs.begin();
- avoidWAW_ = 0;
- }
-
- typedef std::pair<LiveInterval*, LiveInterval::iterator> IntervalPtr;
- typedef SmallVector<IntervalPtr, 32> IntervalPtrs;
- private:
- /// RelatedRegClasses - This structure is built the first time a function is
- /// compiled, and keeps track of which register classes have registers that
- /// belong to multiple classes or have aliases that are in other classes.
- EquivalenceClasses<const TargetRegisterClass*> RelatedRegClasses;
- DenseMap<unsigned, const TargetRegisterClass*> OneClassForEachPhysReg;
-
- // NextReloadMap - For each register in the map, it maps to the another
- // register which is defined by a reload from the same stack slot and
- // both reloads are in the same basic block.
- DenseMap<unsigned, unsigned> NextReloadMap;
-
- // DowngradedRegs - A set of registers which are being "downgraded", i.e.
- // un-favored for allocation.
- SmallSet<unsigned, 8> DowngradedRegs;
-
- // DowngradeMap - A map from virtual registers to physical registers being
- // downgraded for the virtual registers.
- DenseMap<unsigned, unsigned> DowngradeMap;
-
- MachineFunction* mf_;
- MachineRegisterInfo* mri_;
- const TargetMachine* tm_;
- const TargetRegisterInfo* tri_;
- const TargetInstrInfo* tii_;
- BitVector allocatableRegs_;
- BitVector reservedRegs_;
- LiveIntervals* li_;
- MachineLoopInfo *loopInfo;
- RegisterClassInfo RegClassInfo;
-
- /// handled_ - Intervals are added to the handled_ set in the order of their
- /// start value. This is uses for backtracking.
- std::vector<LiveInterval*> handled_;
-
- /// fixed_ - Intervals that correspond to machine registers.
- ///
- IntervalPtrs fixed_;
-
- /// active_ - Intervals that are currently being processed, and which have a
- /// live range active for the current point.
- IntervalPtrs active_;
-
- /// inactive_ - Intervals that are currently being processed, but which have
- /// a hold at the current point.
- IntervalPtrs inactive_;
-
- typedef std::priority_queue<LiveInterval*,
- SmallVector<LiveInterval*, 64>,
- greater_ptr<LiveInterval> > IntervalHeap;
- IntervalHeap unhandled_;
-
- /// regUse_ - Tracks register usage.
- SmallVector<unsigned, 32> regUse_;
- SmallVector<unsigned, 32> regUseBackUp_;
-
- /// vrm_ - Tracks register assignments.
- VirtRegMap* vrm_;
-
- std::auto_ptr<VirtRegRewriter> rewriter_;
-
- std::auto_ptr<Spiller> spiller_;
-
- // The queue of recently-used registers.
- SmallVector<unsigned, 4> RecentRegs;
- SmallVector<unsigned, 4>::iterator RecentNext;
-
- // Last write-after-write register written.
- unsigned avoidWAW_;
-
- // Record that we just picked this register.
- void recordRecentlyUsed(unsigned reg) {
- assert(reg != 0 && "Recently used register is NOREG!");
- if (!RecentRegs.empty()) {
- *RecentNext++ = reg;
- if (RecentNext == RecentRegs.end())
- RecentNext = RecentRegs.begin();
- }
- }
-
- public:
- virtual const char* getPassName() const {
- return "Linear Scan Register Allocator";
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- AU.addRequired<AliasAnalysis>();
- AU.addPreserved<AliasAnalysis>();
- AU.addRequired<LiveIntervals>();
- AU.addPreserved<SlotIndexes>();
- if (StrongPHIElim)
- AU.addRequiredID(StrongPHIEliminationID);
- // Make sure PassManager knows which analyses to make available
- // to coalescing and which analyses coalescing invalidates.
- AU.addRequiredTransitiveID(RegisterCoalescerPassID);
- AU.addRequired<CalculateSpillWeights>();
- AU.addRequiredID(LiveStacksID);
- AU.addPreservedID(LiveStacksID);
- AU.addRequired<MachineLoopInfo>();
- AU.addPreserved<MachineLoopInfo>();
- AU.addRequired<VirtRegMap>();
- AU.addPreserved<VirtRegMap>();
- AU.addRequired<LiveDebugVariables>();
- AU.addPreserved<LiveDebugVariables>();
- AU.addRequiredID(MachineDominatorsID);
- AU.addPreservedID(MachineDominatorsID);
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- /// runOnMachineFunction - register allocate the whole function
- bool runOnMachineFunction(MachineFunction&);
-
- // Determine if we skip this register due to its being recently used.
- bool isRecentlyUsed(unsigned reg) const {
- return reg == avoidWAW_ ||
- std::find(RecentRegs.begin(), RecentRegs.end(), reg) != RecentRegs.end();
- }
-
- private:
- /// linearScan - the linear scan algorithm
- void linearScan();
-
- /// initIntervalSets - initialize the interval sets.
- ///
- void initIntervalSets();
-
- /// processActiveIntervals - expire old intervals and move non-overlapping
- /// ones to the inactive list.
- void processActiveIntervals(SlotIndex CurPoint);
-
- /// processInactiveIntervals - expire old intervals and move overlapping
- /// ones to the active list.
- void processInactiveIntervals(SlotIndex CurPoint);
-
- /// hasNextReloadInterval - Return the next liveinterval that's being
- /// defined by a reload from the same SS as the specified one.
- LiveInterval *hasNextReloadInterval(LiveInterval *cur);
-
- /// DowngradeRegister - Downgrade a register for allocation.
- void DowngradeRegister(LiveInterval *li, unsigned Reg);
-
- /// UpgradeRegister - Upgrade a register for allocation.
- void UpgradeRegister(unsigned Reg);
-
- /// assignRegOrStackSlotAtInterval - assign a register if one
- /// is available, or spill.
- void assignRegOrStackSlotAtInterval(LiveInterval* cur);
-
- void updateSpillWeights(std::vector<float> &Weights,
- unsigned reg, float weight,
- const TargetRegisterClass *RC);
-
- /// findIntervalsToSpill - Determine the intervals to spill for the
- /// specified interval. It's passed the physical registers whose spill
- /// weight is the lowest among all the registers whose live intervals
- /// conflict with the interval.
- void findIntervalsToSpill(LiveInterval *cur,
- std::vector<std::pair<unsigned,float> > &Candidates,
- unsigned NumCands,
- SmallVector<LiveInterval*, 8> &SpillIntervals);
-
- /// attemptTrivialCoalescing - If a simple interval is defined by a copy,
- /// try to allocate the definition to the same register as the source,
- /// if the register is not defined during the life time of the interval.
- /// This eliminates a copy, and is used to coalesce copies which were not
- /// coalesced away before allocation either due to dest and src being in
- /// different register classes or because the coalescer was overly
- /// conservative.
- unsigned attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg);
-
- ///
- /// Register usage / availability tracking helpers.
- ///
-
- void initRegUses() {
- regUse_.resize(tri_->getNumRegs(), 0);
- regUseBackUp_.resize(tri_->getNumRegs(), 0);
- }
-
- void finalizeRegUses() {
-#ifndef NDEBUG
- // Verify all the registers are "freed".
- bool Error = false;
- for (unsigned i = 0, e = tri_->getNumRegs(); i != e; ++i) {
- if (regUse_[i] != 0) {
- dbgs() << tri_->getName(i) << " is still in use!\n";
- Error = true;
- }
- }
- if (Error)
- llvm_unreachable(0);
-#endif
- regUse_.clear();
- regUseBackUp_.clear();
- }
-
- void addRegUse(unsigned physReg) {
- assert(TargetRegisterInfo::isPhysicalRegister(physReg) &&
- "should be physical register!");
- ++regUse_[physReg];
- for (const unsigned* as = tri_->getAliasSet(physReg); *as; ++as)
- ++regUse_[*as];
- }
-
- void delRegUse(unsigned physReg) {
- assert(TargetRegisterInfo::isPhysicalRegister(physReg) &&
- "should be physical register!");
- assert(regUse_[physReg] != 0);
- --regUse_[physReg];
- for (const unsigned* as = tri_->getAliasSet(physReg); *as; ++as) {
- assert(regUse_[*as] != 0);
- --regUse_[*as];
- }
- }
-
- bool isRegAvail(unsigned physReg) const {
- assert(TargetRegisterInfo::isPhysicalRegister(physReg) &&
- "should be physical register!");
- return regUse_[physReg] == 0;
- }
-
- void backUpRegUses() {
- regUseBackUp_ = regUse_;
- }
-
- void restoreRegUses() {
- regUse_ = regUseBackUp_;
- }
-
- ///
- /// Register handling helpers.
- ///
-
- /// getFreePhysReg - return a free physical register for this virtual
- /// register interval if we have one, otherwise return 0.
- unsigned getFreePhysReg(LiveInterval* cur);
- unsigned getFreePhysReg(LiveInterval* cur,
- const TargetRegisterClass *RC,
- unsigned MaxInactiveCount,
- SmallVector<unsigned, 256> &inactiveCounts,
- bool SkipDGRegs);
-
- /// getFirstNonReservedPhysReg - return the first non-reserved physical
- /// register in the register class.
- unsigned getFirstNonReservedPhysReg(const TargetRegisterClass *RC) {
- ArrayRef<unsigned> O = RegClassInfo.getOrder(RC);
- assert(!O.empty() && "All registers reserved?!");
- return O.front();
- }
-
- void ComputeRelatedRegClasses();
-
- template <typename ItTy>
- void printIntervals(const char* const str, ItTy i, ItTy e) const {
- DEBUG({
- if (str)
- dbgs() << str << " intervals:\n";
-
- for (; i != e; ++i) {
- dbgs() << '\t' << *i->first << " -> ";
-
- unsigned reg = i->first->reg;
- if (TargetRegisterInfo::isVirtualRegister(reg))
- reg = vrm_->getPhys(reg);
-
- dbgs() << tri_->getName(reg) << '\n';
- }
- });
- }
- };
- char RALinScan::ID = 0;
-}
-
-INITIALIZE_PASS_BEGIN(RALinScan, "linearscan-regalloc",
- "Linear Scan Register Allocator", false, false)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination)
-INITIALIZE_PASS_DEPENDENCY(CalculateSpillWeights)
-INITIALIZE_PASS_DEPENDENCY(LiveStacks)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
-INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_PASS_END(RALinScan, "linearscan-regalloc",
- "Linear Scan Register Allocator", false, false)
-
-void RALinScan::ComputeRelatedRegClasses() {
- // First pass, add all reg classes to the union, and determine at least one
- // reg class that each register is in.
- bool HasAliases = false;
- for (TargetRegisterInfo::regclass_iterator RCI = tri_->regclass_begin(),
- E = tri_->regclass_end(); RCI != E; ++RCI) {
- RelatedRegClasses.insert(*RCI);
- for (TargetRegisterClass::iterator I = (*RCI)->begin(), E = (*RCI)->end();
- I != E; ++I) {
- HasAliases = HasAliases || *tri_->getAliasSet(*I) != 0;
-
- const TargetRegisterClass *&PRC = OneClassForEachPhysReg[*I];
- if (PRC) {
- // Already processed this register. Just make sure we know that
- // multiple register classes share a register.
- RelatedRegClasses.unionSets(PRC, *RCI);
- } else {
- PRC = *RCI;
- }
- }
- }
-
- // Second pass, now that we know conservatively what register classes each reg
- // belongs to, add info about aliases. We don't need to do this for targets
- // without register aliases.
- if (HasAliases)
- for (DenseMap<unsigned, const TargetRegisterClass*>::iterator
- I = OneClassForEachPhysReg.begin(), E = OneClassForEachPhysReg.end();
- I != E; ++I)
- for (const unsigned *AS = tri_->getAliasSet(I->first); *AS; ++AS) {
- const TargetRegisterClass *AliasClass =
- OneClassForEachPhysReg.lookup(*AS);
- if (AliasClass)
- RelatedRegClasses.unionSets(I->second, AliasClass);
- }
-}
-
-/// attemptTrivialCoalescing - If a simple interval is defined by a copy, try
-/// allocate the definition the same register as the source register if the
-/// register is not defined during live time of the interval. If the interval is
-/// killed by a copy, try to use the destination register. This eliminates a
-/// copy. This is used to coalesce copies which were not coalesced away before
-/// allocation either due to dest and src being in different register classes or
-/// because the coalescer was overly conservative.
-unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
- unsigned Preference = vrm_->getRegAllocPref(cur.reg);
- if ((Preference && Preference == Reg) || !cur.containsOneValue())
- return Reg;
-
- // We cannot handle complicated live ranges. Simple linear stuff only.
- if (cur.ranges.size() != 1)
- return Reg;
-
- const LiveRange &range = cur.ranges.front();
-
- VNInfo *vni = range.valno;
- if (vni->isUnused() || !vni->def.isValid())
- return Reg;
-
- unsigned CandReg;
- {
- MachineInstr *CopyMI;
- if ((CopyMI = li_->getInstructionFromIndex(vni->def)) && CopyMI->isCopy())
- // Defined by a copy, try to extend SrcReg forward
- CandReg = CopyMI->getOperand(1).getReg();
- else if (TrivCoalesceEnds &&
- (CopyMI = li_->getInstructionFromIndex(range.end.getBaseIndex())) &&
- CopyMI->isCopy() && cur.reg == CopyMI->getOperand(1).getReg())
- // Only used by a copy, try to extend DstReg backwards
- CandReg = CopyMI->getOperand(0).getReg();
- else
- return Reg;
-
- // If the target of the copy is a sub-register then don't coalesce.
- if(CopyMI->getOperand(0).getSubReg())
- return Reg;
- }
-
- if (TargetRegisterInfo::isVirtualRegister(CandReg)) {
- if (!vrm_->isAssignedReg(CandReg))
- return Reg;
- CandReg = vrm_->getPhys(CandReg);
- }
- if (Reg == CandReg)
- return Reg;
-
- const TargetRegisterClass *RC = mri_->getRegClass(cur.reg);
- if (!RC->contains(CandReg))
- return Reg;
-
- if (li_->conflictsWithPhysReg(cur, *vrm_, CandReg))
- return Reg;
-
- // Try to coalesce.
- DEBUG(dbgs() << "Coalescing: " << cur << " -> " << tri_->getName(CandReg)
- << '\n');
- vrm_->clearVirt(cur.reg);
- vrm_->assignVirt2Phys(cur.reg, CandReg);
-
- ++NumCoalesce;
- return CandReg;
-}
-
-bool RALinScan::runOnMachineFunction(MachineFunction &fn) {
- mf_ = &fn;
- mri_ = &fn.getRegInfo();
- tm_ = &fn.getTarget();
- tri_ = tm_->getRegisterInfo();
- tii_ = tm_->getInstrInfo();
- allocatableRegs_ = tri_->getAllocatableSet(fn);
- reservedRegs_ = tri_->getReservedRegs(fn);
- li_ = &getAnalysis<LiveIntervals>();
- loopInfo = &getAnalysis<MachineLoopInfo>();
- RegClassInfo.runOnMachineFunction(fn);
-
- // We don't run the coalescer here because we have no reason to
- // interact with it. If the coalescer requires interaction, it
- // won't do anything. If it doesn't require interaction, we assume
- // it was run as a separate pass.
-
- // If this is the first function compiled, compute the related reg classes.
- if (RelatedRegClasses.empty())
- ComputeRelatedRegClasses();
-
- // Also resize register usage trackers.
- initRegUses();
-
- vrm_ = &getAnalysis<VirtRegMap>();
- if (!rewriter_.get()) rewriter_.reset(createVirtRegRewriter());
-
- spiller_.reset(createSpiller(*this, *mf_, *vrm_));
-
- initIntervalSets();
-
- linearScan();
-
- // Rewrite spill code and update the PhysRegsUsed set.
- rewriter_->runOnMachineFunction(*mf_, *vrm_, li_);
-
- // Write out new DBG_VALUE instructions.
- getAnalysis<LiveDebugVariables>().emitDebugValues(vrm_);
-
- assert(unhandled_.empty() && "Unhandled live intervals remain!");
-
- finalizeRegUses();
-
- fixed_.clear();
- active_.clear();
- inactive_.clear();
- handled_.clear();
- NextReloadMap.clear();
- DowngradedRegs.clear();
- DowngradeMap.clear();
- spiller_.reset(0);
-
- return true;
-}
-
-/// initIntervalSets - initialize the interval sets.
-///
-void RALinScan::initIntervalSets()
-{
- assert(unhandled_.empty() && fixed_.empty() &&
- active_.empty() && inactive_.empty() &&
- "interval sets should be empty on initialization");
-
- handled_.reserve(li_->getNumIntervals());
-
- for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) {
- if (TargetRegisterInfo::isPhysicalRegister(i->second->reg)) {
- if (!i->second->empty() && allocatableRegs_.test(i->second->reg)) {
- mri_->setPhysRegUsed(i->second->reg);
- fixed_.push_back(std::make_pair(i->second, i->second->begin()));
- }
- } else {
- if (i->second->empty()) {
- assignRegOrStackSlotAtInterval(i->second);
- }
- else
- unhandled_.push(i->second);
- }
- }
-}
-
-void RALinScan::linearScan() {
- // linear scan algorithm
- DEBUG({
- dbgs() << "********** LINEAR SCAN **********\n"
- << "********** Function: "
- << mf_->getFunction()->getName() << '\n';
- printIntervals("fixed", fixed_.begin(), fixed_.end());
- });
-
- while (!unhandled_.empty()) {
- // pick the interval with the earliest start point
- LiveInterval* cur = unhandled_.top();
- unhandled_.pop();
- ++NumIters;
- DEBUG(dbgs() << "\n*** CURRENT ***: " << *cur << '\n');
-
- assert(!cur->empty() && "Empty interval in unhandled set.");
-
- processActiveIntervals(cur->beginIndex());
- processInactiveIntervals(cur->beginIndex());
-
- assert(TargetRegisterInfo::isVirtualRegister(cur->reg) &&
- "Can only allocate virtual registers!");
-
- // Allocating a virtual register. try to find a free
- // physical register or spill an interval (possibly this one) in order to
- // assign it one.
- assignRegOrStackSlotAtInterval(cur);
-
- DEBUG({
- printIntervals("active", active_.begin(), active_.end());
- printIntervals("inactive", inactive_.begin(), inactive_.end());
- });
- }
-
- // Expire any remaining active intervals
- while (!active_.empty()) {
- IntervalPtr &IP = active_.back();
- unsigned reg = IP.first->reg;
- DEBUG(dbgs() << "\tinterval " << *IP.first << " expired\n");
- assert(TargetRegisterInfo::isVirtualRegister(reg) &&
- "Can only allocate virtual registers!");
- reg = vrm_->getPhys(reg);
- delRegUse(reg);
- active_.pop_back();
- }
-
- // Expire any remaining inactive intervals
- DEBUG({
- for (IntervalPtrs::reverse_iterator
- i = inactive_.rbegin(); i != inactive_.rend(); ++i)
- dbgs() << "\tinterval " << *i->first << " expired\n";
- });
- inactive_.clear();
-
- // Add live-ins to every BB except for entry. Also perform trivial coalescing.
- MachineFunction::iterator EntryMBB = mf_->begin();
- SmallVector<MachineBasicBlock*, 8> LiveInMBBs;
- for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) {
- LiveInterval &cur = *i->second;
- unsigned Reg = 0;
- bool isPhys = TargetRegisterInfo::isPhysicalRegister(cur.reg);
- if (isPhys)
- Reg = cur.reg;
- else if (vrm_->isAssignedReg(cur.reg))
- Reg = attemptTrivialCoalescing(cur, vrm_->getPhys(cur.reg));
- if (!Reg)
- continue;
- // Ignore splited live intervals.
- if (!isPhys && vrm_->getPreSplitReg(cur.reg))
- continue;
-
- for (LiveInterval::Ranges::const_iterator I = cur.begin(), E = cur.end();
- I != E; ++I) {
- const LiveRange &LR = *I;
- if (li_->findLiveInMBBs(LR.start, LR.end, LiveInMBBs)) {
- for (unsigned i = 0, e = LiveInMBBs.size(); i != e; ++i)
- if (LiveInMBBs[i] != EntryMBB) {
- assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
- "Adding a virtual register to livein set?");
- LiveInMBBs[i]->addLiveIn(Reg);
- }
- LiveInMBBs.clear();
- }
- }
- }
-
- DEBUG(dbgs() << *vrm_);
-
- // Look for physical registers that end up not being allocated even though
- // register allocator had to spill other registers in its register class.
- if (!vrm_->FindUnusedRegisters(li_))
- return;
-}
-
-/// processActiveIntervals - expire old intervals and move non-overlapping ones
-/// to the inactive list.
-void RALinScan::processActiveIntervals(SlotIndex CurPoint)
-{
- DEBUG(dbgs() << "\tprocessing active intervals:\n");
-
- for (unsigned i = 0, e = active_.size(); i != e; ++i) {
- LiveInterval *Interval = active_[i].first;
- LiveInterval::iterator IntervalPos = active_[i].second;
- unsigned reg = Interval->reg;
-
- IntervalPos = Interval->advanceTo(IntervalPos, CurPoint);
-
- if (IntervalPos == Interval->end()) { // Remove expired intervals.
- DEBUG(dbgs() << "\t\tinterval " << *Interval << " expired\n");
- assert(TargetRegisterInfo::isVirtualRegister(reg) &&
- "Can only allocate virtual registers!");
- reg = vrm_->getPhys(reg);
- delRegUse(reg);
-
- // Pop off the end of the list.
- active_[i] = active_.back();
- active_.pop_back();
- --i; --e;
-
- } else if (IntervalPos->start > CurPoint) {
- // Move inactive intervals to inactive list.
- DEBUG(dbgs() << "\t\tinterval " << *Interval << " inactive\n");
- assert(TargetRegisterInfo::isVirtualRegister(reg) &&
- "Can only allocate virtual registers!");
- reg = vrm_->getPhys(reg);
- delRegUse(reg);
- // add to inactive.
- inactive_.push_back(std::make_pair(Interval, IntervalPos));
-
- // Pop off the end of the list.
- active_[i] = active_.back();
- active_.pop_back();
- --i; --e;
- } else {
- // Otherwise, just update the iterator position.
- active_[i].second = IntervalPos;
- }
- }
-}
-
-/// processInactiveIntervals - expire old intervals and move overlapping
-/// ones to the active list.
-void RALinScan::processInactiveIntervals(SlotIndex CurPoint)
-{
- DEBUG(dbgs() << "\tprocessing inactive intervals:\n");
-
- for (unsigned i = 0, e = inactive_.size(); i != e; ++i) {
- LiveInterval *Interval = inactive_[i].first;
- LiveInterval::iterator IntervalPos = inactive_[i].second;
- unsigned reg = Interval->reg;
-
- IntervalPos = Interval->advanceTo(IntervalPos, CurPoint);
-
- if (IntervalPos == Interval->end()) { // remove expired intervals.
- DEBUG(dbgs() << "\t\tinterval " << *Interval << " expired\n");
-
- // Pop off the end of the list.
- inactive_[i] = inactive_.back();
- inactive_.pop_back();
- --i; --e;
- } else if (IntervalPos->start <= CurPoint) {
- // move re-activated intervals in active list
- DEBUG(dbgs() << "\t\tinterval " << *Interval << " active\n");
- assert(TargetRegisterInfo::isVirtualRegister(reg) &&
- "Can only allocate virtual registers!");
- reg = vrm_->getPhys(reg);
- addRegUse(reg);
- // add to active
- active_.push_back(std::make_pair(Interval, IntervalPos));
-
- // Pop off the end of the list.
- inactive_[i] = inactive_.back();
- inactive_.pop_back();
- --i; --e;
- } else {
- // Otherwise, just update the iterator position.
- inactive_[i].second = IntervalPos;
- }
- }
-}
-
-/// updateSpillWeights - updates the spill weights of the specifed physical
-/// register and its weight.
-void RALinScan::updateSpillWeights(std::vector<float> &Weights,
- unsigned reg, float weight,
- const TargetRegisterClass *RC) {
- SmallSet<unsigned, 4> Processed;
- SmallSet<unsigned, 4> SuperAdded;
- SmallVector<unsigned, 4> Supers;
- Weights[reg] += weight;
- Processed.insert(reg);
- for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as) {
- Weights[*as] += weight;
- Processed.insert(*as);
- if (tri_->isSubRegister(*as, reg) &&
- SuperAdded.insert(*as) &&
- RC->contains(*as)) {
- Supers.push_back(*as);
- }
- }
-
- // If the alias is a super-register, and the super-register is in the
- // register class we are trying to allocate. Then add the weight to all
- // sub-registers of the super-register even if they are not aliases.
- // e.g. allocating for GR32, bh is not used, updating bl spill weight.
- // bl should get the same spill weight otherwise it will be chosen
- // as a spill candidate since spilling bh doesn't make ebx available.
- for (unsigned i = 0, e = Supers.size(); i != e; ++i) {
- for (const unsigned *sr = tri_->getSubRegisters(Supers[i]); *sr; ++sr)
- if (!Processed.count(*sr))
- Weights[*sr] += weight;
- }
-}
-
-static
-RALinScan::IntervalPtrs::iterator
-FindIntervalInVector(RALinScan::IntervalPtrs &IP, LiveInterval *LI) {
- for (RALinScan::IntervalPtrs::iterator I = IP.begin(), E = IP.end();
- I != E; ++I)
- if (I->first == LI) return I;
- return IP.end();
-}
-
-static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V,
- SlotIndex Point){
- for (unsigned i = 0, e = V.size(); i != e; ++i) {
- RALinScan::IntervalPtr &IP = V[i];
- LiveInterval::iterator I = std::upper_bound(IP.first->begin(),
- IP.second, Point);
- if (I != IP.first->begin()) --I;
- IP.second = I;
- }
-}
-
-/// getConflictWeight - Return the number of conflicts between cur
-/// live interval and defs and uses of Reg weighted by loop depthes.
-static
-float getConflictWeight(LiveInterval *cur, unsigned Reg, LiveIntervals *li_,
- MachineRegisterInfo *mri_,
- MachineLoopInfo *loopInfo) {
- float Conflicts = 0;
- for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(Reg),
- E = mri_->reg_end(); I != E; ++I) {
- MachineInstr *MI = &*I;
- if (cur->liveAt(li_->getInstructionIndex(MI))) {
- unsigned loopDepth = loopInfo->getLoopDepth(MI->getParent());
- Conflicts += std::pow(10.0f, (float)loopDepth);
- }
- }
- return Conflicts;
-}
-
-/// findIntervalsToSpill - Determine the intervals to spill for the
-/// specified interval. It's passed the physical registers whose spill
-/// weight is the lowest among all the registers whose live intervals
-/// conflict with the interval.
-void RALinScan::findIntervalsToSpill(LiveInterval *cur,
- std::vector<std::pair<unsigned,float> > &Candidates,
- unsigned NumCands,
- SmallVector<LiveInterval*, 8> &SpillIntervals) {
- // We have figured out the *best* register to spill. But there are other
- // registers that are pretty good as well (spill weight within 3%). Spill
- // the one that has fewest defs and uses that conflict with cur.
- float Conflicts[3] = { 0.0f, 0.0f, 0.0f };
- SmallVector<LiveInterval*, 8> SLIs[3];
-
- DEBUG({
- dbgs() << "\tConsidering " << NumCands << " candidates: ";
- for (unsigned i = 0; i != NumCands; ++i)
- dbgs() << tri_->getName(Candidates[i].first) << " ";
- dbgs() << "\n";
- });
-
- // Calculate the number of conflicts of each candidate.
- for (IntervalPtrs::iterator i = active_.begin(); i != active_.end(); ++i) {
- unsigned Reg = i->first->reg;
- unsigned PhysReg = vrm_->getPhys(Reg);
- if (!cur->overlapsFrom(*i->first, i->second))
- continue;
- for (unsigned j = 0; j < NumCands; ++j) {
- unsigned Candidate = Candidates[j].first;
- if (tri_->regsOverlap(PhysReg, Candidate)) {
- if (NumCands > 1)
- Conflicts[j] += getConflictWeight(cur, Reg, li_, mri_, loopInfo);
- SLIs[j].push_back(i->first);
- }
- }
- }
-
- for (IntervalPtrs::iterator i = inactive_.begin(); i != inactive_.end(); ++i){
- unsigned Reg = i->first->reg;
- unsigned PhysReg = vrm_->getPhys(Reg);
- if (!cur->overlapsFrom(*i->first, i->second-1))
- continue;
- for (unsigned j = 0; j < NumCands; ++j) {
- unsigned Candidate = Candidates[j].first;
- if (tri_->regsOverlap(PhysReg, Candidate)) {
- if (NumCands > 1)
- Conflicts[j] += getConflictWeight(cur, Reg, li_, mri_, loopInfo);
- SLIs[j].push_back(i->first);
- }
- }
- }
-
- // Which is the best candidate?
- unsigned BestCandidate = 0;
- float MinConflicts = Conflicts[0];
- for (unsigned i = 1; i != NumCands; ++i) {
- if (Conflicts[i] < MinConflicts) {
- BestCandidate = i;
- MinConflicts = Conflicts[i];
- }
- }
-
- std::copy(SLIs[BestCandidate].begin(), SLIs[BestCandidate].end(),
- std::back_inserter(SpillIntervals));
-}
-
-namespace {
- struct WeightCompare {
- private:
- const RALinScan &Allocator;
-
- public:
- WeightCompare(const RALinScan &Alloc) : Allocator(Alloc) {}
-
- typedef std::pair<unsigned, float> RegWeightPair;
- bool operator()(const RegWeightPair &LHS, const RegWeightPair &RHS) const {
- return LHS.second < RHS.second && !Allocator.isRecentlyUsed(LHS.first);
- }
- };
-}
-
-static bool weightsAreClose(float w1, float w2) {
- if (!NewHeuristic)
- return false;
-
- float diff = w1 - w2;
- if (diff <= 0.02f) // Within 0.02f
- return true;
- return (diff / w2) <= 0.05f; // Within 5%.
-}
-
-LiveInterval *RALinScan::hasNextReloadInterval(LiveInterval *cur) {
- DenseMap<unsigned, unsigned>::iterator I = NextReloadMap.find(cur->reg);
- if (I == NextReloadMap.end())
- return 0;
- return &li_->getInterval(I->second);
-}
-
-void RALinScan::DowngradeRegister(LiveInterval *li, unsigned Reg) {
- for (const unsigned *AS = tri_->getOverlaps(Reg); *AS; ++AS) {
- bool isNew = DowngradedRegs.insert(*AS);
- (void)isNew; // Silence compiler warning.
- assert(isNew && "Multiple reloads holding the same register?");
- DowngradeMap.insert(std::make_pair(li->reg, *AS));
- }
- ++NumDowngrade;
-}
-
-void RALinScan::UpgradeRegister(unsigned Reg) {
- if (Reg) {
- DowngradedRegs.erase(Reg);
- for (const unsigned *AS = tri_->getAliasSet(Reg); *AS; ++AS)
- DowngradedRegs.erase(*AS);
- }
-}
-
-namespace {
- struct LISorter {
- bool operator()(LiveInterval* A, LiveInterval* B) {
- return A->beginIndex() < B->beginIndex();
- }
- };
-}
-
-/// assignRegOrStackSlotAtInterval - assign a register if one is available, or
-/// spill.
-void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
- const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
- DEBUG(dbgs() << "\tallocating current interval from "
- << RC->getName() << ": ");
-
- // This is an implicitly defined live interval, just assign any register.
- if (cur->empty()) {
- unsigned physReg = vrm_->getRegAllocPref(cur->reg);
- if (!physReg)
- physReg = getFirstNonReservedPhysReg(RC);
- DEBUG(dbgs() << tri_->getName(physReg) << '\n');
- // Note the register is not really in use.
- vrm_->assignVirt2Phys(cur->reg, physReg);
- return;
- }
-
- backUpRegUses();
-
- std::vector<std::pair<unsigned, float> > SpillWeightsToAdd;
- SlotIndex StartPosition = cur->beginIndex();
- const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC);
-
- // If start of this live interval is defined by a move instruction and its
- // source is assigned a physical register that is compatible with the target
- // register class, then we should try to assign it the same register.
- // This can happen when the move is from a larger register class to a smaller
- // one, e.g. X86::mov32to32_. These move instructions are not coalescable.
- if (!vrm_->getRegAllocPref(cur->reg) && cur->hasAtLeastOneValue()) {
- VNInfo *vni = cur->begin()->valno;
- if (!vni->isUnused() && vni->def.isValid()) {
- MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
- if (CopyMI && CopyMI->isCopy()) {
- unsigned DstSubReg = CopyMI->getOperand(0).getSubReg();
- unsigned SrcReg = CopyMI->getOperand(1).getReg();
- unsigned SrcSubReg = CopyMI->getOperand(1).getSubReg();
- unsigned Reg = 0;
- if (TargetRegisterInfo::isPhysicalRegister(SrcReg))
- Reg = SrcReg;
- else if (vrm_->isAssignedReg(SrcReg))
- Reg = vrm_->getPhys(SrcReg);
- if (Reg) {
- if (SrcSubReg)
- Reg = tri_->getSubReg(Reg, SrcSubReg);
- if (DstSubReg)
- Reg = tri_->getMatchingSuperReg(Reg, DstSubReg, RC);
- if (Reg && allocatableRegs_[Reg] && RC->contains(Reg))
- mri_->setRegAllocationHint(cur->reg, 0, Reg);
- }
- }
- }
- }
-
- // For every interval in inactive we overlap with, mark the
- // register as not free and update spill weights.
- for (IntervalPtrs::const_iterator i = inactive_.begin(),
- e = inactive_.end(); i != e; ++i) {
- unsigned Reg = i->first->reg;
- assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
- "Can only allocate virtual registers!");
- const TargetRegisterClass *RegRC = mri_->getRegClass(Reg);
- // If this is not in a related reg class to the register we're allocating,
- // don't check it.
- if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader &&
- cur->overlapsFrom(*i->first, i->second-1)) {
- Reg = vrm_->getPhys(Reg);
- addRegUse(Reg);
- SpillWeightsToAdd.push_back(std::make_pair(Reg, i->first->weight));
- }
- }
-
- // Speculatively check to see if we can get a register right now. If not,
- // we know we won't be able to by adding more constraints. If so, we can
- // check to see if it is valid. Doing an exhaustive search of the fixed_ list
- // is very bad (it contains all callee clobbered registers for any functions
- // with a call), so we want to avoid doing that if possible.
- unsigned physReg = getFreePhysReg(cur);
- unsigned BestPhysReg = physReg;
- if (physReg) {
- // We got a register. However, if it's in the fixed_ list, we might
- // conflict with it. Check to see if we conflict with it or any of its
- // aliases.
- SmallSet<unsigned, 8> RegAliases;
- for (const unsigned *AS = tri_->getAliasSet(physReg); *AS; ++AS)
- RegAliases.insert(*AS);
-
- bool ConflictsWithFixed = false;
- for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
- IntervalPtr &IP = fixed_[i];
- if (physReg == IP.first->reg || RegAliases.count(IP.first->reg)) {
- // Okay, this reg is on the fixed list. Check to see if we actually
- // conflict.
- LiveInterval *I = IP.first;
- if (I->endIndex() > StartPosition) {
- LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition);
- IP.second = II;
- if (II != I->begin() && II->start > StartPosition)
- --II;
- if (cur->overlapsFrom(*I, II)) {
- ConflictsWithFixed = true;
- break;
- }
- }
- }
- }
-
- // Okay, the register picked by our speculative getFreePhysReg call turned
- // out to be in use. Actually add all of the conflicting fixed registers to
- // regUse_ so we can do an accurate query.
- if (ConflictsWithFixed) {
- // For every interval in fixed we overlap with, mark the register as not
- // free and update spill weights.
- for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
- IntervalPtr &IP = fixed_[i];
- LiveInterval *I = IP.first;
-
- const TargetRegisterClass *RegRC = OneClassForEachPhysReg[I->reg];
- if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader &&
- I->endIndex() > StartPosition) {
- LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition);
- IP.second = II;
- if (II != I->begin() && II->start > StartPosition)
- --II;
- if (cur->overlapsFrom(*I, II)) {
- unsigned reg = I->reg;
- addRegUse(reg);
- SpillWeightsToAdd.push_back(std::make_pair(reg, I->weight));
- }
- }
- }
-
- // Using the newly updated regUse_ object, which includes conflicts in the
- // future, see if there are any registers available.
- physReg = getFreePhysReg(cur);
- }
- }
-
- // Restore the physical register tracker, removing information about the
- // future.
- restoreRegUses();
-
- // If we find a free register, we are done: assign this virtual to
- // the free physical register and add this interval to the active
- // list.
- if (physReg) {
- DEBUG(dbgs() << tri_->getName(physReg) << '\n');
- assert(RC->contains(physReg) && "Invalid candidate");
- vrm_->assignVirt2Phys(cur->reg, physReg);
- addRegUse(physReg);
- active_.push_back(std::make_pair(cur, cur->begin()));
- handled_.push_back(cur);
-
- // Remember physReg for avoiding a write-after-write hazard in the next
- // instruction.
- if (AvoidWAWHazard &&
- tri_->avoidWriteAfterWrite(mri_->getRegClass(cur->reg)))
- avoidWAW_ = physReg;
-
- // "Upgrade" the physical register since it has been allocated.
- UpgradeRegister(physReg);
- if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) {
- // "Downgrade" physReg to try to keep physReg from being allocated until
- // the next reload from the same SS is allocated.
- mri_->setRegAllocationHint(NextReloadLI->reg, 0, physReg);
- DowngradeRegister(cur, physReg);
- }
- return;
- }
- DEBUG(dbgs() << "no free registers\n");
-
- // Compile the spill weights into an array that is better for scanning.
- std::vector<float> SpillWeights(tri_->getNumRegs(), 0.0f);
- for (std::vector<std::pair<unsigned, float> >::iterator
- I = SpillWeightsToAdd.begin(), E = SpillWeightsToAdd.end(); I != E; ++I)
- updateSpillWeights(SpillWeights, I->first, I->second, RC);
-
- // for each interval in active, update spill weights.
- for (IntervalPtrs::const_iterator i = active_.begin(), e = active_.end();
- i != e; ++i) {
- unsigned reg = i->first->reg;
- assert(TargetRegisterInfo::isVirtualRegister(reg) &&
- "Can only allocate virtual registers!");
- reg = vrm_->getPhys(reg);
- updateSpillWeights(SpillWeights, reg, i->first->weight, RC);
- }
-
- DEBUG(dbgs() << "\tassigning stack slot at interval "<< *cur << ":\n");
-
- // Find a register to spill.
- float minWeight = HUGE_VALF;
- unsigned minReg = 0;
-
- bool Found = false;
- std::vector<std::pair<unsigned,float> > RegsWeights;
- ArrayRef<unsigned> Order = RegClassInfo.getOrder(RC);
- if (!minReg || SpillWeights[minReg] == HUGE_VALF)
- for (unsigned i = 0; i != Order.size(); ++i) {
- unsigned reg = Order[i];
- float regWeight = SpillWeights[reg];
- // Skip recently allocated registers and reserved registers.
- if (minWeight > regWeight && !isRecentlyUsed(reg))
- Found = true;
- RegsWeights.push_back(std::make_pair(reg, regWeight));
- }
-
- // If we didn't find a register that is spillable, try aliases?
- if (!Found) {
- for (unsigned i = 0; i != Order.size(); ++i) {
- unsigned reg = Order[i];
- // No need to worry about if the alias register size < regsize of RC.
- // We are going to spill all registers that alias it anyway.
- for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as)
- RegsWeights.push_back(std::make_pair(*as, SpillWeights[*as]));
- }
- }
-
- // Sort all potential spill candidates by weight.
- std::sort(RegsWeights.begin(), RegsWeights.end(), WeightCompare(*this));
- minReg = RegsWeights[0].first;
- minWeight = RegsWeights[0].second;
- if (minWeight == HUGE_VALF) {
- // All registers must have inf weight. Just grab one!
- minReg = BestPhysReg ? BestPhysReg : getFirstNonReservedPhysReg(RC);
- if (cur->weight == HUGE_VALF ||
- li_->getApproximateInstructionCount(*cur) == 0) {
- // Spill a physical register around defs and uses.
- if (li_->spillPhysRegAroundRegDefsUses(*cur, minReg, *vrm_)) {
- // spillPhysRegAroundRegDefsUses may have invalidated iterator stored
- // in fixed_. Reset them.
- for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
- IntervalPtr &IP = fixed_[i];
- LiveInterval *I = IP.first;
- if (I->reg == minReg || tri_->isSubRegister(minReg, I->reg))
- IP.second = I->advanceTo(I->begin(), StartPosition);
- }
-
- DowngradedRegs.clear();
- assignRegOrStackSlotAtInterval(cur);
- } else {
- assert(false && "Ran out of registers during register allocation!");
- report_fatal_error("Ran out of registers during register allocation!");
- }
- return;
- }
- }
-
- // Find up to 3 registers to consider as spill candidates.
- unsigned LastCandidate = RegsWeights.size() >= 3 ? 3 : 1;
- while (LastCandidate > 1) {
- if (weightsAreClose(RegsWeights[LastCandidate-1].second, minWeight))
- break;
- --LastCandidate;
- }
-
- DEBUG({
- dbgs() << "\t\tregister(s) with min weight(s): ";
-
- for (unsigned i = 0; i != LastCandidate; ++i)
- dbgs() << tri_->getName(RegsWeights[i].first)
- << " (" << RegsWeights[i].second << ")\n";
- });
-
- // If the current has the minimum weight, we need to spill it and
- // add any added intervals back to unhandled, and restart
- // linearscan.
- if (cur->weight != HUGE_VALF && cur->weight <= minWeight) {
- DEBUG(dbgs() << "\t\t\tspilling(c): " << *cur << '\n');
- SmallVector<LiveInterval*, 8> added;
- LiveRangeEdit LRE(*cur, added);
- spiller_->spill(LRE);
-
- std::sort(added.begin(), added.end(), LISorter());
- if (added.empty())
- return; // Early exit if all spills were folded.
-
- // Merge added with unhandled. Note that we have already sorted
- // intervals returned by addIntervalsForSpills by their starting
- // point.
- // This also update the NextReloadMap. That is, it adds mapping from a
- // register defined by a reload from SS to the next reload from SS in the
- // same basic block.
- MachineBasicBlock *LastReloadMBB = 0;
- LiveInterval *LastReload = 0;
- int LastReloadSS = VirtRegMap::NO_STACK_SLOT;
- for (unsigned i = 0, e = added.size(); i != e; ++i) {
- LiveInterval *ReloadLi = added[i];
- if (ReloadLi->weight == HUGE_VALF &&
- li_->getApproximateInstructionCount(*ReloadLi) == 0) {
- SlotIndex ReloadIdx = ReloadLi->beginIndex();
- MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx);
- int ReloadSS = vrm_->getStackSlot(ReloadLi->reg);
- if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) {
- // Last reload of same SS is in the same MBB. We want to try to
- // allocate both reloads the same register and make sure the reg
- // isn't clobbered in between if at all possible.
- assert(LastReload->beginIndex() < ReloadIdx);
- NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg));
- }
- LastReloadMBB = ReloadMBB;
- LastReload = ReloadLi;
- LastReloadSS = ReloadSS;
- }
- unhandled_.push(ReloadLi);
- }
- return;
- }
-
- ++NumBacktracks;
-
- // Push the current interval back to unhandled since we are going
- // to re-run at least this iteration. Since we didn't modify it it
- // should go back right in the front of the list
- unhandled_.push(cur);
-
- assert(TargetRegisterInfo::isPhysicalRegister(minReg) &&
- "did not choose a register to spill?");
-
- // We spill all intervals aliasing the register with
- // minimum weight, rollback to the interval with the earliest
- // start point and let the linear scan algorithm run again
- SmallVector<LiveInterval*, 8> spillIs;
-
- // Determine which intervals have to be spilled.
- findIntervalsToSpill(cur, RegsWeights, LastCandidate, spillIs);
-
- // Set of spilled vregs (used later to rollback properly)
- SmallSet<unsigned, 8> spilled;
-
- // The earliest start of a Spilled interval indicates up to where
- // in handled we need to roll back
- assert(!spillIs.empty() && "No spill intervals?");
- SlotIndex earliestStart = spillIs[0]->beginIndex();
-
- // Spill live intervals of virtual regs mapped to the physical register we
- // want to clear (and its aliases). We only spill those that overlap with the
- // current interval as the rest do not affect its allocation. we also keep
- // track of the earliest start of all spilled live intervals since this will
- // mark our rollback point.
- SmallVector<LiveInterval*, 8> added;
- while (!spillIs.empty()) {
- LiveInterval *sli = spillIs.back();
- spillIs.pop_back();
- DEBUG(dbgs() << "\t\t\tspilling(a): " << *sli << '\n');
- if (sli->beginIndex() < earliestStart)
- earliestStart = sli->beginIndex();
- LiveRangeEdit LRE(*sli, added, 0, &spillIs);
- spiller_->spill(LRE);
- spilled.insert(sli->reg);
- }
-
- // Include any added intervals in earliestStart.
- for (unsigned i = 0, e = added.size(); i != e; ++i) {
- SlotIndex SI = added[i]->beginIndex();
- if (SI < earliestStart)
- earliestStart = SI;
- }
-
- DEBUG(dbgs() << "\t\trolling back to: " << earliestStart << '\n');
-
- // Scan handled in reverse order up to the earliest start of a
- // spilled live interval and undo each one, restoring the state of
- // unhandled.
- while (!handled_.empty()) {
- LiveInterval* i = handled_.back();
- // If this interval starts before t we are done.
- if (!i->empty() && i->beginIndex() < earliestStart)
- break;
- DEBUG(dbgs() << "\t\t\tundo changes for: " << *i << '\n');
- handled_.pop_back();
-
- // When undoing a live interval allocation we must know if it is active or
- // inactive to properly update regUse_ and the VirtRegMap.
- IntervalPtrs::iterator it;
- if ((it = FindIntervalInVector(active_, i)) != active_.end()) {
- active_.erase(it);
- assert(!TargetRegisterInfo::isPhysicalRegister(i->reg));
- if (!spilled.count(i->reg))
- unhandled_.push(i);
- delRegUse(vrm_->getPhys(i->reg));
- vrm_->clearVirt(i->reg);
- } else if ((it = FindIntervalInVector(inactive_, i)) != inactive_.end()) {
- inactive_.erase(it);
- assert(!TargetRegisterInfo::isPhysicalRegister(i->reg));
- if (!spilled.count(i->reg))
- unhandled_.push(i);
- vrm_->clearVirt(i->reg);
- } else {
- assert(TargetRegisterInfo::isVirtualRegister(i->reg) &&
- "Can only allocate virtual registers!");
- vrm_->clearVirt(i->reg);
- unhandled_.push(i);
- }
-
- DenseMap<unsigned, unsigned>::iterator ii = DowngradeMap.find(i->reg);
- if (ii == DowngradeMap.end())
- // It interval has a preference, it must be defined by a copy. Clear the
- // preference now since the source interval allocation may have been
- // undone as well.
- mri_->setRegAllocationHint(i->reg, 0, 0);
- else {
- UpgradeRegister(ii->second);
- }
- }
-
- // Rewind the iterators in the active, inactive, and fixed lists back to the
- // point we reverted to.
- RevertVectorIteratorsTo(active_, earliestStart);
- RevertVectorIteratorsTo(inactive_, earliestStart);
- RevertVectorIteratorsTo(fixed_, earliestStart);
-
- // Scan the rest and undo each interval that expired after t and
- // insert it in active (the next iteration of the algorithm will
- // put it in inactive if required)
- for (unsigned i = 0, e = handled_.size(); i != e; ++i) {
- LiveInterval *HI = handled_[i];
- if (!HI->expiredAt(earliestStart) &&
- HI->expiredAt(cur->beginIndex())) {
- DEBUG(dbgs() << "\t\t\tundo changes for: " << *HI << '\n');
- active_.push_back(std::make_pair(HI, HI->begin()));
- assert(!TargetRegisterInfo::isPhysicalRegister(HI->reg));
- addRegUse(vrm_->getPhys(HI->reg));
- }
- }
-
- // Merge added with unhandled.
- // This also update the NextReloadMap. That is, it adds mapping from a
- // register defined by a reload from SS to the next reload from SS in the
- // same basic block.
- MachineBasicBlock *LastReloadMBB = 0;
- LiveInterval *LastReload = 0;
- int LastReloadSS = VirtRegMap::NO_STACK_SLOT;
- std::sort(added.begin(), added.end(), LISorter());
- for (unsigned i = 0, e = added.size(); i != e; ++i) {
- LiveInterval *ReloadLi = added[i];
- if (ReloadLi->weight == HUGE_VALF &&
- li_->getApproximateInstructionCount(*ReloadLi) == 0) {
- SlotIndex ReloadIdx = ReloadLi->beginIndex();
- MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx);
- int ReloadSS = vrm_->getStackSlot(ReloadLi->reg);
- if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) {
- // Last reload of same SS is in the same MBB. We want to try to
- // allocate both reloads the same register and make sure the reg
- // isn't clobbered in between if at all possible.
- assert(LastReload->beginIndex() < ReloadIdx);
- NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg));
- }
- LastReloadMBB = ReloadMBB;
- LastReload = ReloadLi;
- LastReloadSS = ReloadSS;
- }
- unhandled_.push(ReloadLi);
- }
-}
-
-unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
- const TargetRegisterClass *RC,
- unsigned MaxInactiveCount,
- SmallVector<unsigned, 256> &inactiveCounts,
- bool SkipDGRegs) {
- unsigned FreeReg = 0;
- unsigned FreeRegInactiveCount = 0;
-
- std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(cur->reg);
- // Resolve second part of the hint (if possible) given the current allocation.
- unsigned physReg = Hint.second;
- if (TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg))
- physReg = vrm_->getPhys(physReg);
-
- ArrayRef<unsigned> Order;
- if (Hint.first)
- Order = tri_->getRawAllocationOrder(RC, Hint.first, physReg, *mf_);
- else
- Order = RegClassInfo.getOrder(RC);
-
- assert(!Order.empty() && "No allocatable register in this register class!");
-
- // Scan for the first available register.
- for (unsigned i = 0; i != Order.size(); ++i) {
- unsigned Reg = Order[i];
- // Ignore "downgraded" registers.
- if (SkipDGRegs && DowngradedRegs.count(Reg))
- continue;
- // Skip reserved registers.
- if (reservedRegs_.test(Reg))
- continue;
- // Skip recently allocated registers.
- if (isRegAvail(Reg) && (!SkipDGRegs || !isRecentlyUsed(Reg))) {
- FreeReg = Reg;
- if (FreeReg < inactiveCounts.size())
- FreeRegInactiveCount = inactiveCounts[FreeReg];
- else
- FreeRegInactiveCount = 0;
- break;
- }
- }
-
- // If there are no free regs, or if this reg has the max inactive count,
- // return this register.
- if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount) {
- // Remember what register we picked so we can skip it next time.
- if (FreeReg != 0) recordRecentlyUsed(FreeReg);
- return FreeReg;
- }
-
- // Continue scanning the registers, looking for the one with the highest
- // inactive count. Alkis found that this reduced register pressure very
- // slightly on X86 (in rev 1.94 of this file), though this should probably be
- // reevaluated now.
- for (unsigned i = 0; i != Order.size(); ++i) {
- unsigned Reg = Order[i];
- // Ignore "downgraded" registers.
- if (SkipDGRegs && DowngradedRegs.count(Reg))
- continue;
- // Skip reserved registers.
- if (reservedRegs_.test(Reg))
- continue;
- if (isRegAvail(Reg) && Reg < inactiveCounts.size() &&
- FreeRegInactiveCount < inactiveCounts[Reg] &&
- (!SkipDGRegs || !isRecentlyUsed(Reg))) {
- FreeReg = Reg;
- FreeRegInactiveCount = inactiveCounts[Reg];
- if (FreeRegInactiveCount == MaxInactiveCount)
- break; // We found the one with the max inactive count.
- }
- }
-
- // Remember what register we picked so we can skip it next time.
- recordRecentlyUsed(FreeReg);
-
- return FreeReg;
-}
-
-/// getFreePhysReg - return a free physical register for this virtual register
-/// interval if we have one, otherwise return 0.
-unsigned RALinScan::getFreePhysReg(LiveInterval *cur) {
- SmallVector<unsigned, 256> inactiveCounts;
- unsigned MaxInactiveCount = 0;
-
- const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
- const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC);
-
- for (IntervalPtrs::iterator i = inactive_.begin(), e = inactive_.end();
- i != e; ++i) {
- unsigned reg = i->first->reg;
- assert(TargetRegisterInfo::isVirtualRegister(reg) &&
- "Can only allocate virtual registers!");
-
- // If this is not in a related reg class to the register we're allocating,
- // don't check it.
- const TargetRegisterClass *RegRC = mri_->getRegClass(reg);
- if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader) {
- reg = vrm_->getPhys(reg);
- if (inactiveCounts.size() <= reg)
- inactiveCounts.resize(reg+1);
- ++inactiveCounts[reg];
- MaxInactiveCount = std::max(MaxInactiveCount, inactiveCounts[reg]);
- }
- }
-
- // If copy coalescer has assigned a "preferred" register, check if it's
- // available first.
- unsigned Preference = vrm_->getRegAllocPref(cur->reg);
- if (Preference) {
- DEBUG(dbgs() << "(preferred: " << tri_->getName(Preference) << ") ");
- if (isRegAvail(Preference) &&
- RC->contains(Preference))
- return Preference;
- }
-
- unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts,
- true);
- if (FreeReg)
- return FreeReg;
- return getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, false);
-}
-
-FunctionPass* llvm::createLinearScanRegisterAllocator() {
- return new RALinScan();
-}
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 0d2cf2d6184c..a2846145bc7e 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -32,14 +32,17 @@
#define DEBUG_TYPE "regalloc"
#include "RenderMachineFunction.h"
-#include "Splitter.h"
+#include "Spiller.h"
#include "VirtRegMap.h"
-#include "VirtRegRewriter.h"
#include "RegisterCoalescer.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/RegAllocPBQP.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -54,6 +57,7 @@
#include <limits>
#include <memory>
#include <set>
+#include <sstream>
#include <vector>
using namespace llvm;
@@ -67,10 +71,12 @@ pbqpCoalescing("pbqp-coalescing",
cl::desc("Attempt coalescing during PBQP register allocation."),
cl::init(false), cl::Hidden);
+#ifndef NDEBUG
static cl::opt<bool>
-pbqpPreSplitting("pbqp-pre-splitting",
- cl::desc("Pre-split before PBQP register allocation."),
- cl::init(false), cl::Hidden);
+pbqpDumpGraphs("pbqp-dump-graphs",
+ cl::desc("Dump graphs for each function/round in the compilation unit."),
+ cl::init(false), cl::Hidden);
+#endif
namespace {
@@ -88,11 +94,9 @@ public:
: MachineFunctionPass(ID), builder(b), customPassID(cPassID) {
initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
- initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
initializeLiveStacksPass(*PassRegistry::getPassRegistry());
initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
- initializeLoopSplitterPass(*PassRegistry::getPassRegistry());
initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry());
}
@@ -132,6 +136,7 @@ private:
MachineRegisterInfo *mri;
RenderMachineFunction *rmf;
+ std::auto_ptr<Spiller> spiller;
LiveIntervals *lis;
LiveStacks *lss;
VirtRegMap *vrm;
@@ -141,10 +146,6 @@ private:
/// \brief Finds the initial set of vreg intervals to allocate.
void findVRegIntervalsToAlloc();
- /// \brief Adds a stack interval if the given live interval has been
- /// spilled. Used to support stack slot coloring.
- void addStackInterval(const LiveInterval *spilled,MachineRegisterInfo* mri);
-
/// \brief Given a solved PBQP problem maps this solution back to a register
/// assignment.
bool mapPBQPToRegAlloc(const PBQPRAProblem &problem,
@@ -170,7 +171,7 @@ PBQP::Graph::NodeItr PBQPRAProblem::getNodeForVReg(unsigned vreg) const {
VReg2Node::const_iterator nodeItr = vreg2Node.find(vreg);
assert(nodeItr != vreg2Node.end() && "No node for vreg.");
return nodeItr->second;
-
+
}
const PBQPRAProblem::AllowedSet&
@@ -195,9 +196,9 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
const RegSet &vregs) {
typedef std::vector<const LiveInterval*> LIVector;
-
+ ArrayRef<SlotIndex> regMaskSlots = lis->getRegMaskSlots();
MachineRegisterInfo *mri = &mf->getRegInfo();
- const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo();
+ const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo();
std::auto_ptr<PBQPRAProblem> p(new PBQPRAProblem());
PBQP::Graph &g = p->getGraph();
@@ -214,7 +215,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
BitVector reservedRegs = tri->getReservedRegs(*mf);
- // Iterate over vregs.
+ // Iterate over vregs.
for (RegSet::const_iterator vregItr = vregs.begin(), vregEnd = vregs.end();
vregItr != vregEnd; ++vregItr) {
unsigned vreg = *vregItr;
@@ -224,7 +225,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
// Compute an initial allowed set for the current vreg.
typedef std::vector<unsigned> VRAllowed;
VRAllowed vrAllowed;
- ArrayRef<unsigned> rawOrder = trc->getRawAllocationOrder(*mf);
+ ArrayRef<uint16_t> rawOrder = trc->getRawAllocationOrder(*mf);
for (unsigned i = 0; i != rawOrder.size(); ++i) {
unsigned preg = rawOrder[i];
if (!reservedRegs.test(preg)) {
@@ -232,7 +233,9 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
}
}
- // Remove any physical registers which overlap.
+ RegSet overlappingPRegs;
+
+ // Record physical registers whose ranges overlap.
for (RegSet::const_iterator pregItr = pregs.begin(),
pregEnd = pregs.end();
pregItr != pregEnd; ++pregItr) {
@@ -243,9 +246,41 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
continue;
}
- if (!vregLI->overlaps(*pregLI)) {
- continue;
+ if (vregLI->overlaps(*pregLI))
+ overlappingPRegs.insert(preg);
+ }
+
+ // Record any overlaps with regmask operands.
+ BitVector regMaskOverlaps(tri->getNumRegs());
+ for (ArrayRef<SlotIndex>::iterator rmItr = regMaskSlots.begin(),
+ rmEnd = regMaskSlots.end();
+ rmItr != rmEnd; ++rmItr) {
+ SlotIndex rmIdx = *rmItr;
+ if (vregLI->liveAt(rmIdx)) {
+ MachineInstr *rmMI = lis->getInstructionFromIndex(rmIdx);
+ const uint32_t* regMask = 0;
+ for (MachineInstr::mop_iterator mopItr = rmMI->operands_begin(),
+ mopEnd = rmMI->operands_end();
+ mopItr != mopEnd; ++mopItr) {
+ if (mopItr->isRegMask()) {
+ regMask = mopItr->getRegMask();
+ break;
+ }
+ }
+ assert(regMask != 0 && "Couldn't find register mask.");
+ regMaskOverlaps.setBitsNotInMask(regMask);
}
+ }
+
+ for (unsigned preg = 0; preg < tri->getNumRegs(); ++preg) {
+ if (regMaskOverlaps.test(preg))
+ overlappingPRegs.insert(preg);
+ }
+
+ for (RegSet::const_iterator pregItr = overlappingPRegs.begin(),
+ pregEnd = overlappingPRegs.end();
+ pregItr != pregEnd; ++pregItr) {
+ unsigned preg = *pregItr;
// Remove the register from the allowed set.
VRAllowed::iterator eraseItr =
@@ -256,7 +291,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
}
// Also remove any aliases.
- const unsigned *aliasItr = tri->getAliasSet(preg);
+ const uint16_t *aliasItr = tri->getAliasSet(preg);
if (aliasItr != 0) {
for (; *aliasItr != 0; ++aliasItr) {
VRAllowed::iterator eraseItr =
@@ -270,7 +305,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
}
// Construct the node.
- PBQP::Graph::NodeItr node =
+ PBQP::Graph::NodeItr node =
g.addNode(PBQP::Vector(vrAllowed.size() + 1, 0));
// Record the mapping and allowed set in the problem.
@@ -371,7 +406,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
const float copyFactor = 0.5; // Cost of copy relative to load. Current
// value plucked randomly out of the air.
-
+
PBQP::PBQPNum cBenefit =
copyFactor * LiveIntervals::getSpillWeight(false, true,
loopInfo->getLoopDepth(mbb));
@@ -382,7 +417,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
}
const PBQPRAProblem::AllowedSet &allowed = p->getAllowedSet(src);
- unsigned pregOpt = 0;
+ unsigned pregOpt = 0;
while (pregOpt < allowed.size() && allowed[pregOpt] != dst) {
++pregOpt;
}
@@ -407,7 +442,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
std::swap(allowed1, allowed2);
}
}
-
+
addVirtRegCoalesce(g.getEdgeCosts(edge), *allowed1, *allowed2,
cBenefit);
}
@@ -439,27 +474,29 @@ void PBQPBuilderWithCoalescing::addVirtRegCoalesce(
if (preg1 == preg2) {
costMat[i + 1][j + 1] += -benefit;
- }
+ }
}
}
}
void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
+ au.setPreservesCFG();
+ au.addRequired<AliasAnalysis>();
+ au.addPreserved<AliasAnalysis>();
au.addRequired<SlotIndexes>();
au.addPreserved<SlotIndexes>();
au.addRequired<LiveIntervals>();
//au.addRequiredID(SplitCriticalEdgesID);
- au.addRequiredID(RegisterCoalescerPassID);
if (customPassID)
au.addRequiredID(*customPassID);
au.addRequired<CalculateSpillWeights>();
au.addRequired<LiveStacks>();
au.addPreserved<LiveStacks>();
+ au.addRequired<MachineDominatorTree>();
+ au.addPreserved<MachineDominatorTree>();
au.addRequired<MachineLoopInfo>();
au.addPreserved<MachineLoopInfo>();
- if (pbqpPreSplitting)
- au.addRequired<LoopSplitter>();
au.addRequired<VirtRegMap>();
au.addRequired<RenderMachineFunction>();
MachineFunctionPass::getAnalysisUsage(au);
@@ -488,29 +525,6 @@ void RegAllocPBQP::findVRegIntervalsToAlloc() {
}
}
-void RegAllocPBQP::addStackInterval(const LiveInterval *spilled,
- MachineRegisterInfo* mri) {
- int stackSlot = vrm->getStackSlot(spilled->reg);
-
- if (stackSlot == VirtRegMap::NO_STACK_SLOT) {
- return;
- }
-
- const TargetRegisterClass *RC = mri->getRegClass(spilled->reg);
- LiveInterval &stackInterval = lss->getOrCreateInterval(stackSlot, RC);
-
- VNInfo *vni;
- if (stackInterval.getNumValNums() != 0) {
- vni = stackInterval.getValNumInfo(0);
- } else {
- vni = stackInterval.getNextValue(
- SlotIndex(), 0, lss->getVNInfoAllocator());
- }
-
- LiveInterval &rhsInterval = lis->getInterval(spilled->reg);
- stackInterval.MergeRangesInAsValue(rhsInterval, vni);
-}
-
bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem,
const PBQP::Solution &solution) {
// Set to true if we have any spills
@@ -529,28 +543,22 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem,
unsigned alloc = solution.getSelection(node);
if (problem.isPRegOption(vreg, alloc)) {
- unsigned preg = problem.getPRegForOption(vreg, alloc);
+ unsigned preg = problem.getPRegForOption(vreg, alloc);
DEBUG(dbgs() << "VREG " << vreg << " -> " << tri->getName(preg) << "\n");
assert(preg != 0 && "Invalid preg selected.");
- vrm->assignVirt2Phys(vreg, preg);
+ vrm->assignVirt2Phys(vreg, preg);
} else if (problem.isSpillOption(vreg, alloc)) {
vregsToAlloc.erase(vreg);
- const LiveInterval* spillInterval = &lis->getInterval(vreg);
- double oldWeight = spillInterval->weight;
- rmf->rememberUseDefs(spillInterval);
- std::vector<LiveInterval*> newSpills =
- lis->addIntervalsForSpills(*spillInterval, 0, loopInfo, *vrm);
- addStackInterval(spillInterval, mri);
- rmf->rememberSpills(spillInterval, newSpills);
-
- (void) oldWeight;
+ SmallVector<LiveInterval*, 8> newSpills;
+ LiveRangeEdit LRE(lis->getInterval(vreg), newSpills, *mf, *lis, vrm);
+ spiller->spill(LRE);
+
DEBUG(dbgs() << "VREG " << vreg << " -> SPILLED (Cost: "
- << oldWeight << ", New vregs: ");
+ << LRE.getParent().weight << ", New vregs: ");
// Copy any newly inserted live intervals into the list of regs to
// allocate.
- for (std::vector<LiveInterval*>::const_iterator
- itr = newSpills.begin(), end = newSpills.end();
+ for (LiveRangeEdit::iterator itr = LRE.begin(), end = LRE.end();
itr != end; ++itr) {
assert(!(*itr)->empty() && "Empty spill range.");
DEBUG(dbgs() << (*itr)->reg << " ");
@@ -560,9 +568,9 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem,
DEBUG(dbgs() << ")\n");
// We need another round if spill intervals were added.
- anotherRoundNeeded |= !newSpills.empty();
+ anotherRoundNeeded |= !LRE.empty();
} else {
- assert(false && "Unknown allocation option.");
+ llvm_unreachable("Unknown allocation option.");
}
}
@@ -642,7 +650,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
tm = &mf->getTarget();
tri = tm->getRegisterInfo();
tii = tm->getInstrInfo();
- mri = &mf->getRegInfo();
+ mri = &mf->getRegInfo();
lis = &getAnalysis<LiveIntervals>();
lss = &getAnalysis<LiveStacks>();
@@ -650,7 +658,9 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
rmf = &getAnalysis<RenderMachineFunction>();
vrm = &getAnalysis<VirtRegMap>();
+ spiller.reset(createInlineSpiller(*this, MF, *vrm));
+ mri->freezeReservedRegs(MF);
DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getFunction()->getName() << "\n");
@@ -666,6 +676,12 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
// Find the vreg intervals in need of allocation.
findVRegIntervalsToAlloc();
+ const Function* func = mf->getFunction();
+ std::string fqn =
+ func->getParent()->getModuleIdentifier() + "." +
+ func->getName().str();
+ (void)fqn;
+
// If there are non-empty intervals allocate them using pbqp.
if (!vregsToAlloc.empty()) {
@@ -677,6 +693,20 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
std::auto_ptr<PBQPRAProblem> problem =
builder->build(mf, lis, loopInfo, vregsToAlloc);
+
+#ifndef NDEBUG
+ if (pbqpDumpGraphs) {
+ std::ostringstream rs;
+ rs << round;
+ std::string graphFileName(fqn + "." + rs.str() + ".pbqpgraph");
+ std::string tmp;
+ raw_fd_ostream os(graphFileName.c_str(), tmp);
+ DEBUG(dbgs() << "Dumping graph for round " << round << " to \""
+ << graphFileName << "\"\n");
+ problem->getGraph().dump(os);
+ }
+#endif
+
PBQP::Solution solution =
PBQP::HeuristicSolver<PBQP::Heuristics::Briggs>::solve(
problem->getGraph());
@@ -698,9 +728,12 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n");
// Run rewriter
- std::auto_ptr<VirtRegRewriter> rewriter(createVirtRegRewriter());
+ vrm->rewrite(lis->getSlotIndexes());
- rewriter->runOnMachineFunction(*mf, *vrm, lis);
+ // All machine operands and other references to virtual registers have been
+ // replaced. Remove the virtual registers.
+ vrm->clearAllVirt();
+ mri->clearVirtRegs();
return true;
}
diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp
index 786d279c2b8c..17165fa72665 100644
--- a/lib/CodeGen/RegisterClassInfo.cpp
+++ b/lib/CodeGen/RegisterClassInfo.cpp
@@ -18,12 +18,16 @@
#include "RegisterClassInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/Target/TargetMachine.h"
-
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+static cl::opt<unsigned>
+StressRA("stress-regalloc", cl::Hidden, cl::init(0), cl::value_desc("N"),
+ cl::desc("Limit all regclasses to N registers"));
+
RegisterClassInfo::RegisterClassInfo() : Tag(0), MF(0), TRI(0), CalleeSaved(0)
{}
@@ -39,14 +43,14 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
}
// Does this MF have different CSRs?
- const unsigned *CSR = TRI->getCalleeSavedRegs(MF);
+ const uint16_t *CSR = TRI->getCalleeSavedRegs(MF);
if (Update || CSR != CalleeSaved) {
// Build a CSRNum map. Every CSR alias gets an entry pointing to the last
// overlapping CSR.
CSRNum.clear();
CSRNum.resize(TRI->getNumRegs(), 0);
for (unsigned N = 0; unsigned Reg = CSR[N]; ++N)
- for (const unsigned *AS = TRI->getOverlaps(Reg);
+ for (const uint16_t *AS = TRI->getOverlaps(Reg);
unsigned Alias = *AS; ++AS)
CSRNum[Alias] = N + 1; // 0 means no CSR, 1 means CalleeSaved[0], ...
Update = true;
@@ -81,7 +85,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
// FIXME: Once targets reserve registers instead of removing them from the
// allocation order, we can simply use begin/end here.
- ArrayRef<unsigned> RawOrder = RC->getRawAllocationOrder(*MF);
+ ArrayRef<uint16_t> RawOrder = RC->getRawAllocationOrder(*MF);
for (unsigned i = 0; i != RawOrder.size(); ++i) {
unsigned PhysReg = RawOrder[i];
// Remove reserved registers from the allocation order.
@@ -99,6 +103,10 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
// CSR aliases go after the volatile registers, preserve the target's order.
std::copy(CSRAlias.begin(), CSRAlias.end(), &RCI.Order[N]);
+ // Register allocator stress test. Clip register class to N registers.
+ if (StressRA && RCI.NumRegs > StressRA)
+ RCI.NumRegs = StressRA;
+
// Check if RC is a proper sub-class.
if (const TargetRegisterClass *Super = TRI->getLargestLegalSuperClass(RC))
if (Super != RC && getNumAllocatableRegs(Super) > RCI.NumRegs)
diff --git a/lib/CodeGen/RegisterClassInfo.h b/lib/CodeGen/RegisterClassInfo.h
index 2c1407096cd7..400e1f48ce54 100644
--- a/lib/CodeGen/RegisterClassInfo.h
+++ b/lib/CodeGen/RegisterClassInfo.h
@@ -49,7 +49,7 @@ class RegisterClassInfo {
// Callee saved registers of last MF. Assumed to be valid until the next
// runOnFunction() call.
- const unsigned *CalleeSaved;
+ const uint16_t *CalleeSaved;
// Map register number to CalleeSaved index + 1;
SmallVector<uint8_t, 4> CSRNum;
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 9b414d6212c7..75f88cafdf01 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -13,7 +13,7 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "regcoalescing"
+#define DEBUG_TYPE "regalloc"
#include "RegisterCoalescer.h"
#include "LiveDebugVariables.h"
#include "RegisterClassInfo.h"
@@ -169,10 +169,6 @@ namespace {
/// it as well.
bool RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI);
- /// RemoveCopyFlag - If DstReg is no longer defined by CopyMI, clear the
- /// VNInfo copy flag for DstReg and all aliases.
- void RemoveCopyFlag(unsigned DstReg, const MachineInstr *CopyMI);
-
/// markAsJoined - Remember that CopyMI has already been joined.
void markAsJoined(MachineInstr *CopyMI);
@@ -197,7 +193,7 @@ namespace {
};
} /// end anonymous namespace
-char &llvm::RegisterCoalescerPassID = RegisterCoalescer::ID;
+char &llvm::RegisterCoalescerID = RegisterCoalescer::ID;
INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing",
"Simple Register Coalescing", false, false)
@@ -205,9 +201,6 @@ INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination)
-INITIALIZE_PASS_DEPENDENCY(PHIElimination)
-INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
"Simple Register Coalescing", false, false)
@@ -379,9 +372,6 @@ void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineLoopInfo>();
AU.addPreserved<MachineLoopInfo>();
AU.addPreservedID(MachineDominatorsID);
- AU.addPreservedID(StrongPHIEliminationID);
- AU.addPreservedID(PHIEliminationID);
- AU.addPreservedID(TwoAddressInstructionPassID);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -423,7 +413,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
LiveInterval &IntB =
LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
- SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getDefIndex();
+ SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot();
// BValNo is a value number in B that is defined by a copy from A. 'B3' in
// the example above.
@@ -434,40 +424,19 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
// Get the location that B is defined at. Two options: either this value has
// an unknown definition point or it is defined at CopyIdx. If unknown, we
// can't process it.
- if (!BValNo->isDefByCopy()) return false;
- assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
+ if (BValNo->def != CopyIdx) return false;
// AValNo is the value number in A that defines the copy, A3 in the example.
- SlotIndex CopyUseIdx = CopyIdx.getUseIndex();
+ SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true);
LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx);
// The live range might not exist after fun with physreg coalescing.
if (ALR == IntA.end()) return false;
VNInfo *AValNo = ALR->valno;
- // If it's re-defined by an early clobber somewhere in the live range, then
- // it's not safe to eliminate the copy. FIXME: This is a temporary workaround.
- // See PR3149:
- // 172 %ECX<def> = MOV32rr %reg1039<kill>
- // 180 INLINEASM <es:subl $5,$1
- // sbbl $3,$0>, 10, %EAX<def>, 14, %ECX<earlyclobber,def>, 9,
- // %EAX<kill>,
- // 36, <fi#0>, 1, %reg0, 0, 9, %ECX<kill>, 36, <fi#1>, 1, %reg0, 0
- // 188 %EAX<def> = MOV32rr %EAX<kill>
- // 196 %ECX<def> = MOV32rr %ECX<kill>
- // 204 %ECX<def> = MOV32rr %ECX<kill>
- // 212 %EAX<def> = MOV32rr %EAX<kill>
- // 220 %EAX<def> = MOV32rr %EAX
- // 228 %reg1039<def> = MOV32rr %ECX<kill>
- // The early clobber operand ties ECX input to the ECX def.
- //
- // The live interval of ECX is represented as this:
- // %reg20,inf = [46,47:1)[174,230:0) 0@174-(230) 1@46-(47)
- // The coalescer has no idea there was a def in the middle of [174,230].
- if (AValNo->hasRedefByEC())
- return false;
// If AValNo is defined as a copy from IntB, we can potentially process this.
// Get the instruction that defines this value number.
- if (!CP.isCoalescable(AValNo->getCopy()))
+ MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def);
+ if (!CP.isCoalescable(ACopyMI))
return false;
// Get the LiveRange in IntB that this value number starts with.
@@ -492,7 +461,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
// of its aliases is overlapping the live interval of the virtual register.
// If so, do not coalesce.
if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
- for (const unsigned *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS)
+ for (const uint16_t *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS)
if (LIS->hasInterval(*AS) && IntA.overlaps(LIS->getInterval(*AS))) {
DEBUG({
dbgs() << "\t\tInterfere with alias ";
@@ -511,8 +480,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
// We are about to delete CopyMI, so need to remove it as the 'instruction
// that defines this value #'. Update the valnum with the new defining
// instruction #.
- BValNo->def = FillerStart;
- BValNo->setCopy(0);
+ BValNo->def = FillerStart;
// Okay, we can merge them. We need to insert a new liverange:
// [ValLR.end, BLR.begin) of either value number, then we merge the
@@ -522,12 +490,12 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
// If the IntB live range is assigned to a physical register, and if that
// physreg has sub-registers, update their live intervals as well.
if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
- for (const unsigned *SR = TRI->getSubRegisters(IntB.reg); *SR; ++SR) {
+ for (const uint16_t *SR = TRI->getSubRegisters(IntB.reg); *SR; ++SR) {
if (!LIS->hasInterval(*SR))
continue;
LiveInterval &SRLI = LIS->getInterval(*SR);
SRLI.addRange(LiveRange(FillerStart, FillerEnd,
- SRLI.getNextValue(FillerStart, 0,
+ SRLI.getNextValue(FillerStart,
LIS->getVNInfoAllocator())));
}
}
@@ -554,9 +522,11 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
ValLREndInst->getOperand(UIdx).setIsKill(false);
}
- // If the copy instruction was killing the destination register before the
- // merge, find the last use and trim the live range. That will also add the
- // isKill marker.
+ // Rewrite the copy. If the copy instruction was killing the destination
+ // register before the merge, find the last use and trim the live range. That
+ // will also add the isKill marker.
+ CopyMI->substituteRegister(IntA.reg, IntB.reg, CP.getSubIdx(),
+ *TRI);
if (ALR->end == CopyIdx)
LIS->shrinkToUses(&IntA);
@@ -625,7 +595,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
if (!LIS->hasInterval(CP.getDstReg()))
return false;
- SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getDefIndex();
+ SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot();
LiveInterval &IntA =
LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
@@ -635,13 +605,13 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
// BValNo is a value number in B that is defined by a copy from A. 'B3' in
// the example above.
VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
- if (!BValNo || !BValNo->isDefByCopy())
+ if (!BValNo || BValNo->def != CopyIdx)
return false;
assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
// AValNo is the value number in A that defines the copy, A3 in the example.
- VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getUseIndex());
+ VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true));
assert(AValNo && "COPY source not live");
// If other defs can reach uses of this def, then it's not safe to perform
@@ -651,8 +621,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def);
if (!DefMI)
return false;
- const MCInstrDesc &MCID = DefMI->getDesc();
- if (!MCID.isCommutable())
+ if (!DefMI->isCommutable())
return false;
// If DefMI is a two-address instruction then commuting it will change the
// destination register.
@@ -684,7 +653,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
// Abort if the aliases of IntB.reg have values that are not simply the
// clobbers from the superreg.
if (TargetRegisterInfo::isPhysicalRegister(IntB.reg))
- for (const unsigned *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS)
+ for (const uint16_t *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS)
if (LIS->hasInterval(*AS) &&
HasOtherReachingDefs(IntA, LIS->getInterval(*AS), AValNo, 0))
return false;
@@ -718,7 +687,8 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
return false;
if (NewMI != DefMI) {
LIS->ReplaceMachineInstrInMaps(DefMI, NewMI);
- MBB->insert(DefMI, NewMI);
+ MachineBasicBlock::iterator Pos = DefMI;
+ MBB->insert(Pos, NewMI);
MBB->erase(DefMI);
}
unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false);
@@ -747,7 +717,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
UseMO.setReg(NewReg);
continue;
}
- SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getUseIndex();
+ SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getRegSlot(true);
LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
if (ULR == IntA.end() || ULR->valno != AValNo)
continue;
@@ -765,7 +735,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
// This copy will become a noop. If it's defining a new val#, merge it into
// BValNo.
- SlotIndex DefIdx = UseIdx.getDefIndex();
+ SlotIndex DefIdx = UseIdx.getRegSlot();
VNInfo *DVNI = IntB.getVNInfoAt(DefIdx);
if (!DVNI)
continue;
@@ -779,7 +749,6 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
// is updated.
VNInfo *ValNo = BValNo;
ValNo->def = AValNo->def;
- ValNo->setCopy(0);
for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
AI != AE; ++AI) {
if (AI->valno != AValNo) continue;
@@ -799,7 +768,7 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt,
bool preserveSrcInt,
unsigned DstReg,
MachineInstr *CopyMI) {
- SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getUseIndex();
+ SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true);
LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx);
assert(SrcLR != SrcInt.end() && "Live range not found!");
VNInfo *ValNo = SrcLR->valno;
@@ -809,14 +778,14 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt,
if (!DefMI)
return false;
assert(DefMI && "Defining instruction disappeared");
- const MCInstrDesc &MCID = DefMI->getDesc();
- if (!MCID.isAsCheapAsAMove())
+ if (!DefMI->isAsCheapAsAMove())
return false;
if (!TII->isTriviallyReMaterializable(DefMI, AA))
return false;
bool SawStore = false;
if (!DefMI->isSafeToMove(TII, AA, SawStore))
return false;
+ const MCInstrDesc &MCID = DefMI->getDesc();
if (MCID.getNumDefs() != 1)
return false;
if (!DefMI->isImplicitDef()) {
@@ -831,27 +800,52 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt,
return false;
}
- RemoveCopyFlag(DstReg, CopyMI);
-
MachineBasicBlock *MBB = CopyMI->getParent();
MachineBasicBlock::iterator MII =
llvm::next(MachineBasicBlock::iterator(CopyMI));
TII->reMaterialize(*MBB, MII, DstReg, 0, DefMI, *TRI);
MachineInstr *NewMI = prior(MII);
+ // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86).
+ // We need to remember these so we can add intervals once we insert
+ // NewMI into SlotIndexes.
+ SmallVector<unsigned, 4> NewMIImplDefs;
+ for (unsigned i = NewMI->getDesc().getNumOperands(),
+ e = NewMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = NewMI->getOperand(i);
+ if (MO.isReg()) {
+ assert(MO.isDef() && MO.isImplicit() && MO.isDead() &&
+ TargetRegisterInfo::isPhysicalRegister(MO.getReg()));
+ NewMIImplDefs.push_back(MO.getReg());
+ }
+ }
+
// CopyMI may have implicit operands, transfer them over to the newly
// rematerialized instruction. And update implicit def interval valnos.
for (unsigned i = CopyMI->getDesc().getNumOperands(),
e = CopyMI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = CopyMI->getOperand(i);
- if (MO.isReg() && MO.isImplicit())
- NewMI->addOperand(MO);
- if (MO.isDef())
- RemoveCopyFlag(MO.getReg(), CopyMI);
+ if (MO.isReg()) {
+ assert(MO.isImplicit() && "No explicit operands after implict operands.");
+ // Discard VReg implicit defs.
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ NewMI->addOperand(MO);
+ }
+ }
}
- NewMI->copyImplicitOps(CopyMI);
LIS->ReplaceMachineInstrInMaps(CopyMI, NewMI);
+
+ SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
+ for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) {
+ unsigned reg = NewMIImplDefs[i];
+ LiveInterval &li = LIS->getInterval(reg);
+ VNInfo *DeadDefVN = li.getNextValue(NewMIIdx.getRegSlot(),
+ LIS->getVNInfoAllocator());
+ LiveRange lr(NewMIIdx.getRegSlot(), NewMIIdx.getDeadSlot(), DeadDefVN);
+ li.addRange(lr);
+ }
+
CopyMI->eraseFromParent();
ReMatCopies.insert(CopyMI);
ReMatDefs.insert(DefMI);
@@ -887,7 +881,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI,
DstInt = SrcInt;
SrcInt = 0;
- VNInfo *DeadVNI = DstInt->getVNInfoAt(Idx.getDefIndex());
+ VNInfo *DeadVNI = DstInt->getVNInfoAt(Idx.getRegSlot());
assert(DeadVNI && "No value defined in DstInt");
DstInt->removeValNo(DeadVNI);
@@ -941,13 +935,10 @@ RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) {
SmallVector<unsigned,8> Ops;
bool Reads, Writes;
tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
- bool Kills = false, Deads = false;
// Replace SrcReg with DstReg in all UseMI operands.
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
MachineOperand &MO = UseMI->getOperand(Ops[i]);
- Kills |= MO.isKill();
- Deads |= MO.isDead();
// Make sure we don't create read-modify-write defs accidentally. We
// assume here that a SrcReg def cannot be joined into a live DstReg. If
@@ -967,19 +958,6 @@ RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) {
if (JoinedCopies.count(UseMI))
continue;
- if (SubIdx) {
- // If UseMI was a simple SrcReg def, make sure we didn't turn it into a
- // read-modify-write of DstReg.
- if (Deads)
- UseMI->addRegisterDead(DstReg, TRI);
- else if (!Reads && Writes)
- UseMI->addRegisterDefined(DstReg, TRI);
-
- // Kill flags apply to the whole physical register.
- if (DstIsPhys && Kills)
- UseMI->addRegisterKilled(DstReg, TRI);
- }
-
DEBUG({
dbgs() << "\t\tupdated: ";
if (!UseMI->isDebugValue())
@@ -996,7 +974,7 @@ static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *LIS,
const TargetRegisterInfo *TRI) {
if (li.empty()) {
if (TargetRegisterInfo::isPhysicalRegister(li.reg))
- for (const unsigned* SR = TRI->getSubRegisters(li.reg); *SR; ++SR) {
+ for (const uint16_t* SR = TRI->getSubRegisters(li.reg); *SR; ++SR) {
if (!LIS->hasInterval(*SR))
continue;
LiveInterval &sli = LIS->getInterval(*SR);
@@ -1013,7 +991,7 @@ static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *LIS,
/// the val# it defines. If the live interval becomes empty, remove it as well.
bool RegisterCoalescer::RemoveDeadDef(LiveInterval &li,
MachineInstr *DefMI) {
- SlotIndex DefIdx = LIS->getInstructionIndex(DefMI).getDefIndex();
+ SlotIndex DefIdx = LIS->getInstructionIndex(DefMI).getRegSlot();
LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx);
if (DefIdx != MLR->valno->def)
return false;
@@ -1021,27 +999,6 @@ bool RegisterCoalescer::RemoveDeadDef(LiveInterval &li,
return removeIntervalIfEmpty(li, LIS, TRI);
}
-void RegisterCoalescer::RemoveCopyFlag(unsigned DstReg,
- const MachineInstr *CopyMI) {
- SlotIndex DefIdx = LIS->getInstructionIndex(CopyMI).getDefIndex();
- if (LIS->hasInterval(DstReg)) {
- LiveInterval &LI = LIS->getInterval(DstReg);
- if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
- if (LR->valno->def == DefIdx)
- LR->valno->setCopy(0);
- }
- if (!TargetRegisterInfo::isPhysicalRegister(DstReg))
- return;
- for (const unsigned* AS = TRI->getAliasSet(DstReg); *AS; ++AS) {
- if (!LIS->hasInterval(*AS))
- continue;
- LiveInterval &LI = LIS->getInterval(*AS);
- if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
- if (LR->valno->def == DefIdx)
- LR->valno->setCopy(0);
- }
-}
-
/// shouldJoinPhys - Return true if a copy involving a physreg should be joined.
/// We need to be careful about coalescing a source physical register with a
/// virtual register. Once the coalescing is done, it cannot be broken and these
@@ -1279,7 +1236,7 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) {
}
}
- // SrcReg is guarateed to be the register whose live interval that is
+ // SrcReg is guaranteed to be the register whose live interval that is
// being merged.
LIS->removeInterval(CP.getSrcReg());
@@ -1368,9 +1325,9 @@ static bool RegistersDefinedFromSameValue(LiveIntervals &li,
// FIXME: This is very conservative. For example, we don't handle
// physical registers.
- MachineInstr *MI = VNI->getCopy();
+ MachineInstr *MI = li.getInstructionFromIndex(VNI->def);
- if (!MI->isFullCopy() || CP.isPartial() || CP.isPhys())
+ if (!MI || !MI->isFullCopy() || CP.isPartial() || CP.isPhys())
return false;
unsigned Dst = MI->getOperand(0).getReg();
@@ -1388,11 +1345,9 @@ static bool RegistersDefinedFromSameValue(LiveIntervals &li,
assert(Dst == A);
VNInfo *Other = LR->valno;
- if (!Other->isDefByCopy())
- return false;
- const MachineInstr *OtherMI = Other->getCopy();
+ const MachineInstr *OtherMI = li.getInstructionFromIndex(Other->def);
- if (!OtherMI->isFullCopy())
+ if (!OtherMI || !OtherMI->isFullCopy())
return false;
unsigned OtherDst = OtherMI->getOperand(0).getReg();
@@ -1431,7 +1386,44 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
// than the full interfeence check below. We allow overlapping live ranges
// only when one is a copy of the other.
if (CP.isPhys()) {
- for (const unsigned *AS = TRI->getAliasSet(CP.getDstReg()); *AS; ++AS){
+ // Optimization for reserved registers like ESP.
+ // We can only merge with a reserved physreg if RHS has a single value that
+ // is a copy of CP.DstReg(). The live range of the reserved register will
+ // look like a set of dead defs - we don't properly track the live range of
+ // reserved registers.
+ if (RegClassInfo.isReserved(CP.getDstReg())) {
+ assert(CP.isFlipped() && RHS.containsOneValue() &&
+ "Invalid join with reserved register");
+ // Deny any overlapping intervals. This depends on all the reserved
+ // register live ranges to look like dead defs.
+ for (const uint16_t *AS = TRI->getOverlaps(CP.getDstReg()); *AS; ++AS) {
+ if (!LIS->hasInterval(*AS)) {
+ // Make sure at least DstReg itself exists before attempting a join.
+ if (*AS == CP.getDstReg())
+ LIS->getOrCreateInterval(CP.getDstReg());
+ continue;
+ }
+ if (RHS.overlaps(LIS->getInterval(*AS))) {
+ DEBUG(dbgs() << "\t\tInterference: " << PrintReg(*AS, TRI) << '\n');
+ return false;
+ }
+ }
+ // Skip any value computations, we are not adding new values to the
+ // reserved register. Also skip merging the live ranges, the reserved
+ // register live range doesn't need to be accurate as long as all the
+ // defs are there.
+ return true;
+ }
+
+ // Check if a register mask clobbers DstReg.
+ BitVector UsableRegs;
+ if (LIS->checkRegMaskInterference(RHS, UsableRegs) &&
+ !UsableRegs.test(CP.getDstReg())) {
+ DEBUG(dbgs() << "\t\tRegister mask interference.\n");
+ return false;
+ }
+
+ for (const uint16_t *AS = TRI->getAliasSet(CP.getDstReg()); *AS; ++AS){
if (!LIS->hasInterval(*AS))
continue;
const LiveInterval &LHS = LIS->getInterval(*AS);
@@ -1485,12 +1477,12 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
i != e; ++i) {
VNInfo *VNI = *i;
- if (VNI->isUnused() || !VNI->isDefByCopy()) // Src not defined by a copy?
+ if (VNI->isUnused() || VNI->isPHIDef())
+ continue;
+ MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def);
+ assert(MI && "Missing def");
+ if (!MI->isCopyLike()) // Src not defined by a copy?
continue;
-
- // Never join with a register that has EarlyClobber redefs.
- if (VNI->hasRedefByEC())
- return false;
// Figure out the value # from the RHS.
LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot());
@@ -1499,7 +1491,6 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
// DstReg is known to be a register in the LHS interval. If the src is
// from the RHS interval, we can use its value #.
- MachineInstr *MI = VNI->getCopy();
if (!CP.isCoalescable(MI) &&
!RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, lr, DupCopies))
continue;
@@ -1512,12 +1503,12 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
i != e; ++i) {
VNInfo *VNI = *i;
- if (VNI->isUnused() || !VNI->isDefByCopy()) // Src not defined by a copy?
+ if (VNI->isUnused() || VNI->isPHIDef())
+ continue;
+ MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def);
+ assert(MI && "Missing def");
+ if (!MI->isCopyLike()) // Src not defined by a copy?
continue;
-
- // Never join with a register that has EarlyClobber redefs.
- if (VNI->hasRedefByEC())
- return false;
// Figure out the value # from the LHS.
LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot());
@@ -1526,7 +1517,6 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
// DstReg is known to be a register in the RHS interval. If the src is
// from the LHS interval, we can use its value #.
- MachineInstr *MI = VNI->getCopy();
if (!CP.isCoalescable(MI) &&
!RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, lr, DupCopies))
continue;
@@ -1600,10 +1590,6 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
if (LHSValNoAssignments[I->valno->id] !=
RHSValNoAssignments[J->valno->id])
return false;
- // If it's re-defined by an early clobber somewhere in the live range,
- // then conservatively abort coalescing.
- if (NewVNInfo[LHSValNoAssignments[I->valno->id]]->hasRedefByEC())
- return false;
}
if (I->end < J->end)
@@ -1905,8 +1891,8 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
unsigned Reg = MO.getReg();
if (!Reg)
continue;
+ DeadDefs.push_back(Reg);
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
- DeadDefs.push_back(Reg);
// Remat may also enable register class inflation.
if (RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)))
InflateRegs.push_back(Reg);
@@ -1936,7 +1922,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
// Check for now unnecessary kill flags.
if (LIS->isNotInMIMap(MI)) continue;
- SlotIndex DefIdx = LIS->getInstructionIndex(MI).getDefIndex();
+ SlotIndex DefIdx = LIS->getInstructionIndex(MI).getRegSlot();
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || !MO.isKill()) continue;
@@ -1950,7 +1936,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
// remain alive.
if (!TargetRegisterInfo::isPhysicalRegister(reg))
continue;
- for (const unsigned *SR = TRI->getSubRegisters(reg);
+ for (const uint16_t *SR = TRI->getSubRegisters(reg);
unsigned S = *SR; ++SR)
if (LIS->hasInterval(S) && LIS->getInterval(S).liveAt(DefIdx))
MI->addRegisterDefined(S, TRI);
diff --git a/lib/CodeGen/RegisterCoalescer.h b/lib/CodeGen/RegisterCoalescer.h
index 472c48377fef..310b933cab9b 100644
--- a/lib/CodeGen/RegisterCoalescer.h
+++ b/lib/CodeGen/RegisterCoalescer.h
@@ -1,4 +1,4 @@
-//===-- RegisterCoalescer.h - Register Coalescing Interface ------*- C++ -*-===//
+//===-- RegisterCoalescer.h - Register Coalescing Interface -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains the abstract interface for register coalescers,
+// This file contains the abstract interface for register coalescers,
// allowing them to interact with and query register allocators.
//
//===----------------------------------------------------------------------===//
@@ -47,7 +47,7 @@ namespace llvm {
/// CrossClass - True when both regs are virtual, and newRC is constrained.
bool CrossClass;
- /// Flipped - True when DstReg and SrcReg are reversed from the oriignal
+ /// Flipped - True when DstReg and SrcReg are reversed from the original
/// copy instruction.
bool Flipped;
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index ca02aa1b8143..03bd82e225dc 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -37,7 +37,7 @@ using namespace llvm;
void RegScavenger::setUsed(unsigned Reg) {
RegsAvailable.reset(Reg);
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
unsigned SubReg = *SubRegs; ++SubRegs)
RegsAvailable.reset(SubReg);
}
@@ -45,7 +45,7 @@ void RegScavenger::setUsed(unsigned Reg) {
bool RegScavenger::isAliasUsed(unsigned Reg) const {
if (isUsed(Reg))
return true;
- for (const unsigned *R = TRI->getAliasSet(Reg); *R; ++R)
+ for (const uint16_t *R = TRI->getAliasSet(Reg); *R; ++R)
if (isUsed(*R))
return true;
return false;
@@ -59,9 +59,6 @@ void RegScavenger::initRegState() {
// All registers started out unused.
RegsAvailable.set();
- // Reserved registers are always used.
- RegsAvailable ^= ReservedRegs;
-
if (!MBB)
return;
@@ -86,17 +83,24 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
assert((NumPhysRegs == 0 || NumPhysRegs == TRI->getNumRegs()) &&
"Target changed?");
+ // It is not possible to use the register scavenger after late optimization
+ // passes that don't preserve accurate liveness information.
+ assert(MRI->tracksLiveness() &&
+ "Cannot use register scavenger with inaccurate liveness");
+
// Self-initialize.
if (!MBB) {
NumPhysRegs = TRI->getNumRegs();
RegsAvailable.resize(NumPhysRegs);
+ KillRegs.resize(NumPhysRegs);
+ DefRegs.resize(NumPhysRegs);
// Create reserved registers bitvector.
ReservedRegs = TRI->getReservedRegs(MF);
// Create callee-saved registers bitvector.
CalleeSavedRegs.resize(NumPhysRegs);
- const unsigned *CSRegs = TRI->getCalleeSavedRegs();
+ const uint16_t *CSRegs = TRI->getCalleeSavedRegs(&MF);
if (CSRegs != NULL)
for (unsigned i = 0; CSRegs[i]; ++i)
CalleeSavedRegs.set(CSRegs[i]);
@@ -110,13 +114,7 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
void RegScavenger::addRegWithSubRegs(BitVector &BV, unsigned Reg) {
BV.set(Reg);
- for (const unsigned *R = TRI->getSubRegisters(Reg); *R; R++)
- BV.set(*R);
-}
-
-void RegScavenger::addRegWithAliases(BitVector &BV, unsigned Reg) {
- BV.set(Reg);
- for (const unsigned *R = TRI->getAliasSet(Reg); *R; R++)
+ for (const uint16_t *R = TRI->getSubRegisters(Reg); *R; R++)
BV.set(*R);
}
@@ -148,12 +146,12 @@ void RegScavenger::forward() {
// predicated, conservatively assume "kill" markers do not actually kill the
// register. Similarly ignores "dead" markers.
bool isPred = TII->isPredicated(MI);
- BitVector EarlyClobberRegs(NumPhysRegs);
- BitVector KillRegs(NumPhysRegs);
- BitVector DefRegs(NumPhysRegs);
- BitVector DeadRegs(NumPhysRegs);
+ KillRegs.reset();
+ DefRegs.reset();
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isRegMask())
+ (isPred ? DefRegs : KillRegs).setBitsNotInMask(MO.getRegMask());
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
@@ -164,21 +162,19 @@ void RegScavenger::forward() {
// Ignore undef uses.
if (MO.isUndef())
continue;
- // Two-address operands implicitly kill.
- if (!isPred && (MO.isKill() || MI->isRegTiedToDefOperand(i)))
+ if (!isPred && MO.isKill())
addRegWithSubRegs(KillRegs, Reg);
} else {
assert(MO.isDef());
if (!isPred && MO.isDead())
- addRegWithSubRegs(DeadRegs, Reg);
+ addRegWithSubRegs(KillRegs, Reg);
else
addRegWithSubRegs(DefRegs, Reg);
- if (MO.isEarlyClobber())
- addRegWithAliases(EarlyClobberRegs, Reg);
}
}
// Verify uses and defs.
+#ifndef NDEBUG
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg())
@@ -199,17 +195,18 @@ void RegScavenger::forward() {
// Ideally we would like a way to model this, but leaving the
// insert_subreg around causes both correctness and performance issues.
bool SubUsed = false;
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
unsigned SubReg = *SubRegs; ++SubRegs)
if (isUsed(SubReg)) {
SubUsed = true;
break;
}
- assert(SubUsed && "Using an undefined register!");
+ if (!SubUsed) {
+ MBB->getParent()->verify(NULL, "In Register Scavenger");
+ llvm_unreachable("Using an undefined register!");
+ }
(void)SubUsed;
}
- assert((!EarlyClobberRegs.test(Reg) || MI->isRegTiedToDefOperand(i)) &&
- "Using an early clobbered register!");
} else {
assert(MO.isDef());
#if 0
@@ -221,18 +218,20 @@ void RegScavenger::forward() {
#endif
}
}
+#endif // NDEBUG
// Commit the changes.
setUnused(KillRegs);
- setUnused(DeadRegs);
setUsed(DefRegs);
}
void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) {
+ used = RegsAvailable;
+ used.flip();
if (includeReserved)
- used = ~RegsAvailable;
+ used |= ReservedRegs;
else
- used = ~RegsAvailable & ~ReservedRegs;
+ used.reset(ReservedRegs);
}
unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
@@ -286,6 +285,8 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
// Remove any candidates touched by instruction.
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isRegMask())
+ Candidates.clearBitsNotInMask(MO.getRegMask());
if (!MO.isReg() || MO.isUndef() || !MO.getReg())
continue;
if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
@@ -296,7 +297,7 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
continue;
}
Candidates.reset(MO.getReg());
- for (const unsigned *R = TRI->getAliasSet(MO.getReg()); *R; R++)
+ for (const uint16_t *R = TRI->getAliasSet(MO.getReg()); *R; R++)
Candidates.reset(*R);
}
// If we're not in a virtual reg's live range, this is a valid
@@ -347,9 +348,9 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
// RegsAvailable, as RegsAvailable does not take aliases into account.
// That's what getRegsAvailable() is for.
BitVector Available = getRegsAvailable(RC);
-
- if ((Candidates & Available).any())
- Candidates &= Available;
+ Available &= Candidates;
+ if (Available.any())
+ Candidates = Available;
// Find the register whose use is furthest away.
MachineBasicBlock::iterator UseMI;
diff --git a/lib/CodeGen/RenderMachineFunction.cpp b/lib/CodeGen/RenderMachineFunction.cpp
index 8b02ec44273a..6020908d9112 100644
--- a/lib/CodeGen/RenderMachineFunction.cpp
+++ b/lib/CodeGen/RenderMachineFunction.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/RenderMachineFunction.cpp - MF->HTML -----s-----------===//
+//===-- llvm/CodeGen/RenderMachineFunction.cpp - MF->HTML -----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -560,12 +560,13 @@ namespace llvm {
// For uses/defs recorded use/def indexes override current liveness and
// instruction operands (Only for the interval which records the indexes).
- if (i.isUse() || i.isDef()) {
+ // FIXME: This is all wrong, uses and defs share the same slots.
+ if (i.isEarlyClobber() || i.isRegister()) {
UseDefs::const_iterator udItr = useDefs.find(li);
if (udItr != useDefs.end()) {
const SlotSet &slotSet = udItr->second;
if (slotSet.count(i)) {
- if (i.isUse()) {
+ if (i.isEarlyClobber()) {
return Used;
}
// else
@@ -586,9 +587,9 @@ namespace llvm {
return AliveStack;
}
} else {
- if (i.isDef() && mi->definesRegister(li->reg, tri)) {
+ if (i.isRegister() && mi->definesRegister(li->reg, tri)) {
return Defined;
- } else if (i.isUse() && mi->readsRegister(li->reg)) {
+ } else if (i.isEarlyClobber() && mi->readsRegister(li->reg)) {
return Used;
} else {
if (vrm == 0 ||
@@ -804,7 +805,7 @@ namespace llvm {
os << indent + s(2) << "<tr height=6ex>\n";
// Render the code column.
- if (i.isLoad()) {
+ if (i.isBlock()) {
MachineBasicBlock *mbb = sis->getMBBFromIndex(i);
mi = sis->getInstructionFromIndex(i);
@@ -823,7 +824,7 @@ namespace llvm {
}
os << indent + s(4) << "</td>\n";
} else {
- i = i.getStoreIndex(); // <- Will be incremented to the next index.
+ i = i.getDeadSlot(); // <- Will be incremented to the next index.
continue;
}
}
@@ -952,10 +953,10 @@ namespace llvm {
rItr != rEnd; ++rItr) {
const MachineInstr *mi = &*rItr;
if (mi->readsRegister(li->reg)) {
- useDefs[li].insert(lis->getInstructionIndex(mi).getUseIndex());
+ useDefs[li].insert(lis->getInstructionIndex(mi).getRegSlot(true));
}
if (mi->definesRegister(li->reg)) {
- useDefs[li].insert(lis->getInstructionIndex(mi).getDefIndex());
+ useDefs[li].insert(lis->getInstructionIndex(mi).getRegSlot());
}
}
}
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 1e9b5c89f172..8fd64265fda6 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -31,6 +31,8 @@ static cl::opt<bool> StressSchedOpt(
cl::desc("Stress test instruction scheduling"));
#endif
+void SchedulingPriorityQueue::anchor() { }
+
ScheduleDAG::ScheduleDAG(MachineFunction &mf)
: TM(mf.getTarget()),
TII(TM.getInstrInfo()),
@@ -44,42 +46,17 @@ ScheduleDAG::ScheduleDAG(MachineFunction &mf)
ScheduleDAG::~ScheduleDAG() {}
-/// getInstrDesc helper to handle SDNodes.
-const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
- if (!Node || !Node->isMachineOpcode()) return NULL;
- return &TII->get(Node->getMachineOpcode());
-}
-
-/// dump - dump the schedule.
-void ScheduleDAG::dumpSchedule() const {
- for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
- if (SUnit *SU = Sequence[i])
- SU->dump(this);
- else
- dbgs() << "**** NOOP ****\n";
- }
-}
-
-
-/// Run - perform scheduling.
-///
-void ScheduleDAG::Run(MachineBasicBlock *bb,
- MachineBasicBlock::iterator insertPos) {
- BB = bb;
- InsertPos = insertPos;
-
+/// Clear the DAG state (e.g. between scheduling regions).
+void ScheduleDAG::clearDAG() {
SUnits.clear();
- Sequence.clear();
EntrySU = SUnit();
ExitSU = SUnit();
+}
- Schedule();
-
- DEBUG({
- dbgs() << "*** Final schedule ***\n";
- dumpSchedule();
- dbgs() << '\n';
- });
+/// getInstrDesc helper to handle SDNodes.
+const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
+ if (!Node || !Node->isMachineOpcode()) return NULL;
+ return &TII->get(Node->getMachineOpcode());
}
/// addPred - This adds the specified edge as a pred of the current node if
@@ -313,13 +290,12 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
case SDep::Output: dbgs() << "out "; break;
case SDep::Order: dbgs() << "ch "; break;
}
- dbgs() << "#";
- dbgs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")";
+ dbgs() << "SU(" << I->getSUnit()->NodeNum << ")";
if (I->isArtificial())
dbgs() << " *";
dbgs() << ": Latency=" << I->getLatency();
if (I->isAssignedRegDep())
- dbgs() << " Reg=" << G->TRI->getName(I->getReg());
+ dbgs() << " Reg=" << PrintReg(I->getReg(), G->TRI);
dbgs() << "\n";
}
}
@@ -334,8 +310,7 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
case SDep::Output: dbgs() << "out "; break;
case SDep::Order: dbgs() << "ch "; break;
}
- dbgs() << "#";
- dbgs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")";
+ dbgs() << "SU(" << I->getSUnit()->NodeNum << ")";
if (I->isArtificial())
dbgs() << " *";
dbgs() << ": Latency=" << I->getLatency();
@@ -346,13 +321,12 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
}
#ifndef NDEBUG
-/// VerifySchedule - Verify that all SUnits were scheduled and that
-/// their state is consistent.
+/// VerifyScheduledDAG - Verify that all SUnits were scheduled and that
+/// their state is consistent. Return the number of scheduled nodes.
///
-void ScheduleDAG::VerifySchedule(bool isBottomUp) {
+unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) {
bool AnyNotSched = false;
unsigned DeadNodes = 0;
- unsigned Noops = 0;
for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
if (!SUnits[i].isScheduled) {
if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) {
@@ -393,12 +367,8 @@ void ScheduleDAG::VerifySchedule(bool isBottomUp) {
}
}
}
- for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
- if (!Sequence[i])
- ++Noops;
assert(!AnyNotSched);
- assert(Sequence.size() + DeadNodes - Noops == SUnits.size() &&
- "The number of nodes scheduled doesn't match the expected number!");
+ return SUnits.size() - DeadNodes;
}
#endif
diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp
deleted file mode 100644
index f8b1bc76eb8b..000000000000
--- a/lib/CodeGen/ScheduleDAGEmit.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
-//===---- ScheduleDAGEmit.cpp - Emit routines for the ScheduleDAG class ---===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This implements the Emit routines for the ScheduleDAG class, which creates
-// MachineInstrs according to the computed schedule.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "pre-RA-sched"
-#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
-using namespace llvm;
-
-void ScheduleDAG::EmitNoop() {
- TII->insertNoop(*BB, InsertPos);
-}
-
-void ScheduleDAG::EmitPhysRegCopy(SUnit *SU,
- DenseMap<SUnit*, unsigned> &VRBaseMap) {
- for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- if (I->isCtrl()) continue; // ignore chain preds
- if (I->getSUnit()->CopyDstRC) {
- // Copy to physical register.
- DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit());
- assert(VRI != VRBaseMap.end() && "Node emitted out of order - late");
- // Find the destination physical register.
- unsigned Reg = 0;
- for (SUnit::const_succ_iterator II = SU->Succs.begin(),
- EE = SU->Succs.end(); II != EE; ++II) {
- if (II->isCtrl()) continue; // ignore chain preds
- if (II->getReg()) {
- Reg = II->getReg();
- break;
- }
- }
- BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg)
- .addReg(VRI->second);
- } else {
- // Copy from physical register.
- assert(I->getReg() && "Unknown physical register!");
- unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
- bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
- (void)isNew; // Silence compiler warning.
- assert(isNew && "Node emitted out of order - early");
- BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase)
- .addReg(I->getReg());
- }
- break;
- }
-}
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 34b8ab0b47f2..6be1ab7f5b08 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -13,14 +13,15 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "sched-instrs"
-#include "ScheduleDAGInstrs.h"
#include "llvm/Operator.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -33,25 +34,17 @@ using namespace llvm;
ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
const MachineLoopInfo &mli,
- const MachineDominatorTree &mdt)
+ const MachineDominatorTree &mdt,
+ bool IsPostRAFlag,
+ LiveIntervals *lis)
: ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()),
- InstrItins(mf.getTarget().getInstrItineraryData()),
- Defs(TRI->getNumRegs()), Uses(TRI->getNumRegs()),
- LoopRegs(MLI, MDT), FirstDbgValue(0) {
+ InstrItins(mf.getTarget().getInstrItineraryData()), LIS(lis),
+ IsPostRA(IsPostRAFlag), UnitLatencies(false), LoopRegs(MLI, MDT),
+ FirstDbgValue(0) {
+ assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
DbgValues.clear();
-}
-
-/// Run - perform scheduling.
-///
-void ScheduleDAGInstrs::Run(MachineBasicBlock *bb,
- MachineBasicBlock::iterator begin,
- MachineBasicBlock::iterator end,
- unsigned endcount) {
- BB = bb;
- Begin = begin;
- InsertPosIndex = endcount;
-
- ScheduleDAG::Run(bb, end);
+ assert(!(IsPostRA && MRI.getNumVirtRegs()) &&
+ "Virtual registers must be removed prior to PostRA scheduling");
}
/// getUnderlyingObjectFromInt - This is the function that does the work of
@@ -133,19 +126,58 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
return 0;
}
-void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) {
+void ScheduleDAGInstrs::startBlock(MachineBasicBlock *BB) {
LoopRegs.Deps.clear();
if (MachineLoop *ML = MLI.getLoopFor(BB))
- if (BB == ML->getLoopLatch()) {
- MachineBasicBlock *Header = ML->getHeader();
- for (MachineBasicBlock::livein_iterator I = Header->livein_begin(),
- E = Header->livein_end(); I != E; ++I)
- LoopLiveInRegs.insert(*I);
+ if (BB == ML->getLoopLatch())
LoopRegs.VisitLoop(ML);
- }
}
-/// AddSchedBarrierDeps - Add dependencies from instructions in the current
+void ScheduleDAGInstrs::finishBlock() {
+ // Nothing to do.
+}
+
+/// Initialize the map with the number of registers.
+void Reg2SUnitsMap::setRegLimit(unsigned Limit) {
+ PhysRegSet.setUniverse(Limit);
+ SUnits.resize(Limit);
+}
+
+/// Clear the map without deallocating storage.
+void Reg2SUnitsMap::clear() {
+ for (const_iterator I = reg_begin(), E = reg_end(); I != E; ++I) {
+ SUnits[*I].clear();
+ }
+ PhysRegSet.clear();
+}
+
+/// Initialize the DAG and common scheduler state for the current scheduling
+/// region. This does not actually create the DAG, only clears it. The
+/// scheduling driver may call BuildSchedGraph multiple times per scheduling
+/// region.
+void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned endcount) {
+ BB = bb;
+ RegionBegin = begin;
+ RegionEnd = end;
+ EndIndex = endcount;
+ MISUnitMap.clear();
+
+ // Check to see if the scheduler cares about latencies.
+ UnitLatencies = forceUnitLatencies();
+
+ ScheduleDAG::clearDAG();
+}
+
+/// Close the current scheduling region. Don't clear any state in case the
+/// driver wants to refer to the previous scheduling region.
+void ScheduleDAGInstrs::exitRegion() {
+ // Nothing to do.
+}
+
+/// addSchedBarrierDeps - Add dependencies from instructions in the current
/// list of instructions being scheduled to scheduling barrier by adding
/// the exit SU to the register defs and use list. This is because we want to
/// make sure instructions which define registers that are either used by
@@ -153,11 +185,11 @@ void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) {
/// especially important when the definition latency of the return value(s)
/// are too high to be hidden by the branch or when the liveout registers
/// used by instructions in the fallthrough block.
-void ScheduleDAGInstrs::AddSchedBarrierDeps() {
- MachineInstr *ExitMI = InsertPos != BB->end() ? &*InsertPos : 0;
+void ScheduleDAGInstrs::addSchedBarrierDeps() {
+ MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : 0;
ExitSU.setInstr(ExitMI);
bool AllDepKnown = ExitMI &&
- (ExitMI->getDesc().isCall() || ExitMI->getDesc().isBarrier());
+ (ExitMI->isCall() || ExitMI->isBarrier());
if (ExitMI && AllDepKnown) {
// If it's a call or a barrier, add dependencies on the defs and uses of
// instruction.
@@ -167,29 +199,313 @@ void ScheduleDAGInstrs::AddSchedBarrierDeps() {
unsigned Reg = MO.getReg();
if (Reg == 0) continue;
- assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
- Uses[Reg].push_back(&ExitSU);
+ if (TRI->isPhysicalRegister(Reg))
+ Uses[Reg].push_back(&ExitSU);
+ else {
+ assert(!IsPostRA && "Virtual register encountered after regalloc.");
+ addVRegUseDeps(&ExitSU, i);
+ }
}
} else {
// For others, e.g. fallthrough, conditional branch, assume the exit
// uses all the registers that are livein to the successor blocks.
- SmallSet<unsigned, 8> Seen;
+ assert(Uses.empty() && "Uses in set before adding deps?");
for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
SE = BB->succ_end(); SI != SE; ++SI)
for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
E = (*SI)->livein_end(); I != E; ++I) {
unsigned Reg = *I;
- if (Seen.insert(Reg))
+ if (!Uses.contains(Reg))
Uses[Reg].push_back(&ExitSU);
}
}
}
-void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
- // We'll be allocating one SUnit for each instruction, plus one for
- // the region exit node.
+/// MO is an operand of SU's instruction that defines a physical register. Add
+/// data dependencies from SU to any uses of the physical register.
+void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU,
+ const MachineOperand &MO) {
+ assert(MO.isDef() && "expect physreg def");
+
+ // Ask the target if address-backscheduling is desirable, and if so how much.
+ const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+ unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
+ unsigned DataLatency = SU->Latency;
+
+ for (const uint16_t *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) {
+ if (!Uses.contains(*Alias))
+ continue;
+ std::vector<SUnit*> &UseList = Uses[*Alias];
+ for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
+ SUnit *UseSU = UseList[i];
+ if (UseSU == SU)
+ continue;
+ unsigned LDataLatency = DataLatency;
+ // Optionally add in a special extra latency for nodes that
+ // feed addresses.
+ // TODO: Perhaps we should get rid of
+ // SpecialAddressLatency and just move this into
+ // adjustSchedDependency for the targets that care about it.
+ if (SpecialAddressLatency != 0 && !UnitLatencies &&
+ UseSU != &ExitSU) {
+ MachineInstr *UseMI = UseSU->getInstr();
+ const MCInstrDesc &UseMCID = UseMI->getDesc();
+ int RegUseIndex = UseMI->findRegisterUseOperandIdx(*Alias);
+ assert(RegUseIndex >= 0 && "UseMI doesn't use register!");
+ if (RegUseIndex >= 0 &&
+ (UseMI->mayLoad() || UseMI->mayStore()) &&
+ (unsigned)RegUseIndex < UseMCID.getNumOperands() &&
+ UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass())
+ LDataLatency += SpecialAddressLatency;
+ }
+ // Adjust the dependence latency using operand def/use
+ // information (if any), and then allow the target to
+ // perform its own adjustments.
+ const SDep& dep = SDep(SU, SDep::Data, LDataLatency, *Alias);
+ if (!UnitLatencies) {
+ computeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
+ ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
+ }
+ UseSU->addPred(dep);
+ }
+ }
+}
+
+/// addPhysRegDeps - Add register dependencies (data, anti, and output) from
+/// this SUnit to following instructions in the same scheduling region that
+/// depend the physical register referenced at OperIdx.
+void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
+ const MachineInstr *MI = SU->getInstr();
+ const MachineOperand &MO = MI->getOperand(OperIdx);
+
+ // Optionally add output and anti dependencies. For anti
+ // dependencies we use a latency of 0 because for a multi-issue
+ // target we want to allow the defining instruction to issue
+ // in the same cycle as the using instruction.
+ // TODO: Using a latency of 1 here for output dependencies assumes
+ // there's no cost for reusing registers.
+ SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
+ for (const uint16_t *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) {
+ if (!Defs.contains(*Alias))
+ continue;
+ std::vector<SUnit *> &DefList = Defs[*Alias];
+ for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
+ SUnit *DefSU = DefList[i];
+ if (DefSU == &ExitSU)
+ continue;
+ if (DefSU != SU &&
+ (Kind != SDep::Output || !MO.isDead() ||
+ !DefSU->getInstr()->registerDefIsDead(*Alias))) {
+ if (Kind == SDep::Anti)
+ DefSU->addPred(SDep(SU, Kind, 0, /*Reg=*/*Alias));
+ else {
+ unsigned AOLat = TII->getOutputLatency(InstrItins, MI, OperIdx,
+ DefSU->getInstr());
+ DefSU->addPred(SDep(SU, Kind, AOLat, /*Reg=*/*Alias));
+ }
+ }
+ }
+ }
+
+ if (!MO.isDef()) {
+ // Either insert a new Reg2SUnits entry with an empty SUnits list, or
+ // retrieve the existing SUnits list for this register's uses.
+ // Push this SUnit on the use list.
+ Uses[MO.getReg()].push_back(SU);
+ }
+ else {
+ addPhysRegDataDeps(SU, MO);
+
+ // Either insert a new Reg2SUnits entry with an empty SUnits list, or
+ // retrieve the existing SUnits list for this register's defs.
+ std::vector<SUnit *> &DefList = Defs[MO.getReg()];
+
+ // If a def is going to wrap back around to the top of the loop,
+ // backschedule it.
+ if (!UnitLatencies && DefList.empty()) {
+ LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(MO.getReg());
+ if (I != LoopRegs.Deps.end()) {
+ const MachineOperand *UseMO = I->second.first;
+ unsigned Count = I->second.second;
+ const MachineInstr *UseMI = UseMO->getParent();
+ unsigned UseMOIdx = UseMO - &UseMI->getOperand(0);
+ const MCInstrDesc &UseMCID = UseMI->getDesc();
+ const TargetSubtargetInfo &ST =
+ TM.getSubtarget<TargetSubtargetInfo>();
+ unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
+ // TODO: If we knew the total depth of the region here, we could
+ // handle the case where the whole loop is inside the region but
+ // is large enough that the isScheduleHigh trick isn't needed.
+ if (UseMOIdx < UseMCID.getNumOperands()) {
+ // Currently, we only support scheduling regions consisting of
+ // single basic blocks. Check to see if the instruction is in
+ // the same region by checking to see if it has the same parent.
+ if (UseMI->getParent() != MI->getParent()) {
+ unsigned Latency = SU->Latency;
+ if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass())
+ Latency += SpecialAddressLatency;
+ // This is a wild guess as to the portion of the latency which
+ // will be overlapped by work done outside the current
+ // scheduling region.
+ Latency -= std::min(Latency, Count);
+ // Add the artificial edge.
+ ExitSU.addPred(SDep(SU, SDep::Order, Latency,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false,
+ /*isArtificial=*/true));
+ } else if (SpecialAddressLatency > 0 &&
+ UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) {
+ // The entire loop body is within the current scheduling region
+ // and the latency of this operation is assumed to be greater
+ // than the latency of the loop.
+ // TODO: Recursively mark data-edge predecessors as
+ // isScheduleHigh too.
+ SU->isScheduleHigh = true;
+ }
+ }
+ LoopRegs.Deps.erase(I);
+ }
+ }
+
+ // clear this register's use list
+ if (Uses.contains(MO.getReg()))
+ Uses[MO.getReg()].clear();
+
+ if (!MO.isDead())
+ DefList.clear();
+
+ // Calls will not be reordered because of chain dependencies (see
+ // below). Since call operands are dead, calls may continue to be added
+ // to the DefList making dependence checking quadratic in the size of
+ // the block. Instead, we leave only one call at the back of the
+ // DefList.
+ if (SU->isCall) {
+ while (!DefList.empty() && DefList.back()->isCall)
+ DefList.pop_back();
+ }
+ // Defs are pushed in the order they are visited and never reordered.
+ DefList.push_back(SU);
+ }
+}
+
+/// addVRegDefDeps - Add register output and data dependencies from this SUnit
+/// to instructions that occur later in the same scheduling region if they read
+/// from or write to the virtual register defined at OperIdx.
+///
+/// TODO: Hoist loop induction variable increments. This has to be
+/// reevaluated. Generally, IV scheduling should be done before coalescing.
+void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
+ const MachineInstr *MI = SU->getInstr();
+ unsigned Reg = MI->getOperand(OperIdx).getReg();
+
+ // SSA defs do not have output/anti dependencies.
+ // The current operand is a def, so we have at least one.
+ if (llvm::next(MRI.def_begin(Reg)) == MRI.def_end())
+ return;
+
+ // Add output dependence to the next nearest def of this vreg.
+ //
+ // Unless this definition is dead, the output dependence should be
+ // transitively redundant with antidependencies from this definition's
+ // uses. We're conservative for now until we have a way to guarantee the uses
+ // are not eliminated sometime during scheduling. The output dependence edge
+ // is also useful if output latency exceeds def-use latency.
+ VReg2SUnitMap::iterator DefI = findVRegDef(Reg);
+ if (DefI == VRegDefs.end())
+ VRegDefs.insert(VReg2SUnit(Reg, SU));
+ else {
+ SUnit *DefSU = DefI->SU;
+ if (DefSU != SU && DefSU != &ExitSU) {
+ unsigned OutLatency = TII->getOutputLatency(InstrItins, MI, OperIdx,
+ DefSU->getInstr());
+ DefSU->addPred(SDep(SU, SDep::Output, OutLatency, Reg));
+ }
+ DefI->SU = SU;
+ }
+}
+
+/// addVRegUseDeps - Add a register data dependency if the instruction that
+/// defines the virtual register used at OperIdx is mapped to an SUnit. Add a
+/// register antidependency from this SUnit to instructions that occur later in
+/// the same scheduling region if they write the virtual register.
+///
+/// TODO: Handle ExitSU "uses" properly.
+void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
+ MachineInstr *MI = SU->getInstr();
+ unsigned Reg = MI->getOperand(OperIdx).getReg();
+
+ // Lookup this operand's reaching definition.
+ assert(LIS && "vreg dependencies requires LiveIntervals");
+ SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot();
+ LiveInterval *LI = &LIS->getInterval(Reg);
+ VNInfo *VNI = LI->getVNInfoBefore(UseIdx);
+ // VNI will be valid because MachineOperand::readsReg() is checked by caller.
+ MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def);
+ // Phis and other noninstructions (after coalescing) have a NULL Def.
+ if (Def) {
+ SUnit *DefSU = getSUnit(Def);
+ if (DefSU) {
+ // The reaching Def lives within this scheduling region.
+ // Create a data dependence.
+ //
+ // TODO: Handle "special" address latencies cleanly.
+ const SDep &dep = SDep(DefSU, SDep::Data, DefSU->Latency, Reg);
+ if (!UnitLatencies) {
+ // Adjust the dependence latency using operand def/use information, then
+ // allow the target to perform its own adjustments.
+ computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep));
+ const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+ ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
+ }
+ SU->addPred(dep);
+ }
+ }
+
+ // Add antidependence to the following def of the vreg it uses.
+ VReg2SUnitMap::iterator DefI = findVRegDef(Reg);
+ if (DefI != VRegDefs.end() && DefI->SU != SU)
+ DefI->SU->addPred(SDep(SU, SDep::Anti, 0, Reg));
+}
+
+/// Create an SUnit for each real instruction, numbered in top-down toplological
+/// order. The instruction order A < B, implies that no edge exists from B to A.
+///
+/// Map each real instruction to its SUnit.
+///
+/// After initSUnits, the SUnits vector cannot be resized and the scheduler may
+/// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs
+/// instead of pointers.
+///
+/// MachineScheduler relies on initSUnits numbering the nodes by their order in
+/// the original instruction list.
+void ScheduleDAGInstrs::initSUnits() {
+ // We'll be allocating one SUnit for each real instruction in the region,
+ // which is contained within a basic block.
SUnits.reserve(BB->size());
+ for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) {
+ MachineInstr *MI = I;
+ if (MI->isDebugValue())
+ continue;
+
+ SUnit *SU = newSUnit(MI);
+ MISUnitMap[MI] = SU;
+
+ SU->isCall = MI->isCall();
+ SU->isCommutable = MI->isCommutable();
+
+ // Assign the Latency field of SU using target-provided information.
+ if (UnitLatencies)
+ SU->Latency = 1;
+ else
+ computeLatency(SU);
+ }
+}
+
+void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) {
+ // Create an SUnit for each real instruction.
+ initSUnits();
+
// We build scheduling units by walking a block's instruction list from bottom
// to top.
@@ -203,29 +519,29 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs;
std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
- // Check to see if the scheduler cares about latencies.
- bool UnitLatencies = ForceUnitLatencies();
-
- // Ask the target if address-backscheduling is desirable, and if so how much.
- const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
- unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
-
// Remove any stale debug info; sometimes BuildSchedGraph is called again
// without emitting the info from the previous call.
DbgValues.clear();
FirstDbgValue = NULL;
+ assert(Defs.empty() && Uses.empty() &&
+ "Only BuildGraph should update Defs/Uses");
+ Defs.setRegLimit(TRI->getNumRegs());
+ Uses.setRegLimit(TRI->getNumRegs());
+
+ assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs");
+ // FIXME: Allow SparseSet to reserve space for the creation of virtual
+ // registers during scheduling. Don't artificially inflate the Universe
+ // because we want to assert that vregs are not created during DAG building.
+ VRegDefs.setUniverse(MRI.getNumVirtRegs());
+
// Model data dependencies between instructions being scheduled and the
// ExitSU.
- AddSchedBarrierDeps();
-
- for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) {
- assert(Defs[i].empty() && "Only BuildGraph should push/pop Defs");
- }
+ addSchedBarrierDeps();
// Walk the list of instructions, from bottom moving up.
MachineInstr *PrevMI = NULL;
- for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin;
+ for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin;
MII != MIE; --MII) {
MachineInstr *MI = prior(MII);
if (MI && PrevMI) {
@@ -238,19 +554,11 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
continue;
}
- const MCInstrDesc &MCID = MI->getDesc();
- assert(!MCID.isTerminator() && !MI->isLabel() &&
+ assert(!MI->isTerminator() && !MI->isLabel() &&
"Cannot schedule terminators or labels!");
- // Create the SUnit for this MI.
- SUnit *SU = NewSUnit(MI);
- SU->isCall = MCID.isCall();
- SU->isCommutable = MCID.isCommutable();
- // Assign the Latency field of SU using target-provided information.
- if (UnitLatencies)
- SU->Latency = 1;
- else
- ComputeLatency(SU);
+ SUnit *SU = MISUnitMap[MI];
+ assert(SU && "No SUnit mapped to this MI");
// Add register-based dependencies (data, anti, and output).
for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
@@ -259,152 +567,14 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
unsigned Reg = MO.getReg();
if (Reg == 0) continue;
- assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
-
- std::vector<SUnit *> &UseList = Uses[Reg];
- // Defs are push in the order they are visited and never reordered.
- std::vector<SUnit *> &DefList = Defs[Reg];
- // Optionally add output and anti dependencies. For anti
- // dependencies we use a latency of 0 because for a multi-issue
- // target we want to allow the defining instruction to issue
- // in the same cycle as the using instruction.
- // TODO: Using a latency of 1 here for output dependencies assumes
- // there's no cost for reusing registers.
- SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
- unsigned AOLatency = (Kind == SDep::Anti) ? 0 : 1;
- for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
- SUnit *DefSU = DefList[i];
- if (DefSU == &ExitSU)
- continue;
- if (DefSU != SU &&
- (Kind != SDep::Output || !MO.isDead() ||
- !DefSU->getInstr()->registerDefIsDead(Reg)))
- DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/Reg));
- }
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
- std::vector<SUnit *> &MemDefList = Defs[*Alias];
- for (unsigned i = 0, e = MemDefList.size(); i != e; ++i) {
- SUnit *DefSU = MemDefList[i];
- if (DefSU == &ExitSU)
- continue;
- if (DefSU != SU &&
- (Kind != SDep::Output || !MO.isDead() ||
- !DefSU->getInstr()->registerDefIsDead(*Alias)))
- DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/ *Alias));
- }
- }
-
- if (MO.isDef()) {
- // Add any data dependencies.
- unsigned DataLatency = SU->Latency;
- for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
- SUnit *UseSU = UseList[i];
- if (UseSU == SU)
- continue;
- unsigned LDataLatency = DataLatency;
- // Optionally add in a special extra latency for nodes that
- // feed addresses.
- // TODO: Do this for register aliases too.
- // TODO: Perhaps we should get rid of
- // SpecialAddressLatency and just move this into
- // adjustSchedDependency for the targets that care about it.
- if (SpecialAddressLatency != 0 && !UnitLatencies &&
- UseSU != &ExitSU) {
- MachineInstr *UseMI = UseSU->getInstr();
- const MCInstrDesc &UseMCID = UseMI->getDesc();
- int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg);
- assert(RegUseIndex >= 0 && "UseMI doesn's use register!");
- if (RegUseIndex >= 0 &&
- (UseMCID.mayLoad() || UseMCID.mayStore()) &&
- (unsigned)RegUseIndex < UseMCID.getNumOperands() &&
- UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass())
- LDataLatency += SpecialAddressLatency;
- }
- // Adjust the dependence latency using operand def/use
- // information (if any), and then allow the target to
- // perform its own adjustments.
- const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg);
- if (!UnitLatencies) {
- ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
- ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
- }
- UseSU->addPred(dep);
- }
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
- std::vector<SUnit *> &UseList = Uses[*Alias];
- for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
- SUnit *UseSU = UseList[i];
- if (UseSU == SU)
- continue;
- const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias);
- if (!UnitLatencies) {
- ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
- ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
- }
- UseSU->addPred(dep);
- }
- }
-
- // If a def is going to wrap back around to the top of the loop,
- // backschedule it.
- if (!UnitLatencies && DefList.empty()) {
- LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(Reg);
- if (I != LoopRegs.Deps.end()) {
- const MachineOperand *UseMO = I->second.first;
- unsigned Count = I->second.second;
- const MachineInstr *UseMI = UseMO->getParent();
- unsigned UseMOIdx = UseMO - &UseMI->getOperand(0);
- const MCInstrDesc &UseMCID = UseMI->getDesc();
- // TODO: If we knew the total depth of the region here, we could
- // handle the case where the whole loop is inside the region but
- // is large enough that the isScheduleHigh trick isn't needed.
- if (UseMOIdx < UseMCID.getNumOperands()) {
- // Currently, we only support scheduling regions consisting of
- // single basic blocks. Check to see if the instruction is in
- // the same region by checking to see if it has the same parent.
- if (UseMI->getParent() != MI->getParent()) {
- unsigned Latency = SU->Latency;
- if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass())
- Latency += SpecialAddressLatency;
- // This is a wild guess as to the portion of the latency which
- // will be overlapped by work done outside the current
- // scheduling region.
- Latency -= std::min(Latency, Count);
- // Add the artificial edge.
- ExitSU.addPred(SDep(SU, SDep::Order, Latency,
- /*Reg=*/0, /*isNormalMemory=*/false,
- /*isMustAlias=*/false,
- /*isArtificial=*/true));
- } else if (SpecialAddressLatency > 0 &&
- UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) {
- // The entire loop body is within the current scheduling region
- // and the latency of this operation is assumed to be greater
- // than the latency of the loop.
- // TODO: Recursively mark data-edge predecessors as
- // isScheduleHigh too.
- SU->isScheduleHigh = true;
- }
- }
- LoopRegs.Deps.erase(I);
- }
- }
-
- UseList.clear();
- if (!MO.isDead())
- DefList.clear();
-
- // Calls will not be reordered because of chain dependencies (see
- // below). Since call operands are dead, calls may continue to be added
- // to the DefList making dependence checking quadratic in the size of
- // the block. Instead, we leave only one call at the back of the
- // DefList.
- if (SU->isCall) {
- while (!DefList.empty() && DefList.back()->isCall)
- DefList.pop_back();
- }
- DefList.push_back(SU);
- } else {
- UseList.push_back(SU);
+ if (TRI->isPhysicalRegister(Reg))
+ addPhysRegDeps(SU, j);
+ else {
+ assert(!IsPostRA && "Virtual register encountered!");
+ if (MO.isDef())
+ addVRegDefDeps(SU, j);
+ else if (MO.readsReg()) // ignore undef operands
+ addVRegUseDeps(SU, j);
}
}
@@ -419,9 +589,9 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
// produce more precise dependence information.
#define STORE_LOAD_LATENCY 1
unsigned TrueMemOrderLatency = 0;
- if (MCID.isCall() || MI->hasUnmodeledSideEffects() ||
+ if (MI->isCall() || MI->hasUnmodeledSideEffects() ||
(MI->hasVolatileMemoryRef() &&
- (!MCID.mayLoad() || !MI->isInvariantLoad(AA)))) {
+ (!MI->mayLoad() || !MI->isInvariantLoad(AA)))) {
// Be conservative with these and add dependencies on all memory
// references, even those that are known to not alias.
for (std::map<const Value *, SUnit *>::iterator I =
@@ -460,7 +630,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
PendingLoads.clear();
AliasMemDefs.clear();
AliasMemUses.clear();
- } else if (MCID.mayStore()) {
+ } else if (MI->mayStore()) {
bool MayAlias = true;
TrueMemOrderLatency = STORE_LOAD_LATENCY;
if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
@@ -516,7 +686,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
/*Reg=*/0, /*isNormalMemory=*/false,
/*isMustAlias=*/false,
/*isArtificial=*/true));
- } else if (MCID.mayLoad()) {
+ } else if (MI->mayLoad()) {
bool MayAlias = true;
TrueMemOrderLatency = 0;
if (MI->isInvariantLoad(AA)) {
@@ -558,32 +728,27 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
if (PrevMI)
FirstDbgValue = PrevMI;
- for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) {
- Defs[i].clear();
- Uses[i].clear();
- }
+ Defs.clear();
+ Uses.clear();
+ VRegDefs.clear();
PendingLoads.clear();
}
-void ScheduleDAGInstrs::FinishBlock() {
- // Nothing to do.
-}
-
-void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
+void ScheduleDAGInstrs::computeLatency(SUnit *SU) {
// Compute the latency for the node.
if (!InstrItins || InstrItins->isEmpty()) {
SU->Latency = 1;
// Simplistic target-independent heuristic: assume that loads take
// extra time.
- if (SU->getInstr()->getDesc().mayLoad())
+ if (SU->getInstr()->mayLoad())
SU->Latency += 2;
} else {
SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr());
}
}
-void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
+void ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use,
SDep& dep) const {
if (!InstrItins || InstrItins->isEmpty())
return;
@@ -608,7 +773,9 @@ void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
// %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ...
// What we want is to compute latency between def of %D6/%D7 and use of
// %Q3 instead.
- DefIdx = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI);
+ unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI);
+ if (DefMI->getOperand(Op2).isReg())
+ DefIdx = Op2;
}
MachineInstr *UseMI = Use->getInstr();
// For all uses of the register, calculate the maxmimum latency
@@ -656,43 +823,8 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
return oss.str();
}
-// EmitSchedule - Emit the machine code in scheduled order.
-MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() {
- // For MachineInstr-based scheduling, we're rescheduling the instructions in
- // the block, so start by removing them from the block.
- while (Begin != InsertPos) {
- MachineBasicBlock::iterator I = Begin;
- ++Begin;
- BB->remove(I);
- }
-
- // If first instruction was a DBG_VALUE then put it back.
- if (FirstDbgValue)
- BB->insert(InsertPos, FirstDbgValue);
-
- // Then re-insert them according to the given schedule.
- for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
- if (SUnit *SU = Sequence[i])
- BB->insert(InsertPos, SU->getInstr());
- else
- // Null SUnit* is a noop.
- EmitNoop();
- }
-
- // Update the Begin iterator, as the first instruction in the block
- // may have been scheduled later.
- if (!Sequence.empty())
- Begin = Sequence[0]->getInstr();
-
- // Reinsert any remaining debug_values.
- for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
- DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
- std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
- MachineInstr *DbgValue = P.first;
- MachineInstr *OrigPrivMI = P.second;
- BB->insertAfter(OrigPrivMI, DbgValue);
- }
- DbgValues.clear();
- FirstDbgValue = NULL;
- return BB;
+/// Return the basic block label. It is not necessarilly unique because a block
+/// contains multiple scheduling regions. But it is fine for visualization.
+std::string ScheduleDAGInstrs::getDAGName() const {
+ return "dag." + BB->getFullName();
}
diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h
deleted file mode 100644
index 666bdf548c71..000000000000
--- a/lib/CodeGen/ScheduleDAGInstrs.h
+++ /dev/null
@@ -1,212 +0,0 @@
-//==- ScheduleDAGInstrs.h - MachineInstr Scheduling --------------*- C++ -*-==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the ScheduleDAGInstrs class, which implements
-// scheduling for a MachineInstr-based dependency graph.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SCHEDULEDAGINSTRS_H
-#define SCHEDULEDAGINSTRS_H
-
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/SmallSet.h"
-#include <map>
-
-namespace llvm {
- class MachineLoopInfo;
- class MachineDominatorTree;
-
- /// LoopDependencies - This class analyzes loop-oriented register
- /// dependencies, which are used to guide scheduling decisions.
- /// For example, loop induction variable increments should be
- /// scheduled as soon as possible after the variable's last use.
- ///
- class LLVM_LIBRARY_VISIBILITY LoopDependencies {
- const MachineLoopInfo &MLI;
- const MachineDominatorTree &MDT;
-
- public:
- typedef std::map<unsigned, std::pair<const MachineOperand *, unsigned> >
- LoopDeps;
- LoopDeps Deps;
-
- LoopDependencies(const MachineLoopInfo &mli,
- const MachineDominatorTree &mdt) :
- MLI(mli), MDT(mdt) {}
-
- /// VisitLoop - Clear out any previous state and analyze the given loop.
- ///
- void VisitLoop(const MachineLoop *Loop) {
- assert(Deps.empty() && "stale loop dependencies");
-
- MachineBasicBlock *Header = Loop->getHeader();
- SmallSet<unsigned, 8> LoopLiveIns;
- for (MachineBasicBlock::livein_iterator LI = Header->livein_begin(),
- LE = Header->livein_end(); LI != LE; ++LI)
- LoopLiveIns.insert(*LI);
-
- const MachineDomTreeNode *Node = MDT.getNode(Header);
- const MachineBasicBlock *MBB = Node->getBlock();
- assert(Loop->contains(MBB) &&
- "Loop does not contain header!");
- VisitRegion(Node, MBB, Loop, LoopLiveIns);
- }
-
- private:
- void VisitRegion(const MachineDomTreeNode *Node,
- const MachineBasicBlock *MBB,
- const MachineLoop *Loop,
- const SmallSet<unsigned, 8> &LoopLiveIns) {
- unsigned Count = 0;
- for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
- I != E; ++I) {
- const MachineInstr *MI = I;
- if (MI->isDebugValue())
- continue;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isUse())
- continue;
- unsigned MOReg = MO.getReg();
- if (LoopLiveIns.count(MOReg))
- Deps.insert(std::make_pair(MOReg, std::make_pair(&MO, Count)));
- }
- ++Count; // Not every iteration due to dbg_value above.
- }
-
- const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
- for (std::vector<MachineDomTreeNode*>::const_iterator I =
- Children.begin(), E = Children.end(); I != E; ++I) {
- const MachineDomTreeNode *ChildNode = *I;
- MachineBasicBlock *ChildBlock = ChildNode->getBlock();
- if (Loop->contains(ChildBlock))
- VisitRegion(ChildNode, ChildBlock, Loop, LoopLiveIns);
- }
- }
- };
-
- /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of
- /// MachineInstrs.
- class LLVM_LIBRARY_VISIBILITY ScheduleDAGInstrs : public ScheduleDAG {
- const MachineLoopInfo &MLI;
- const MachineDominatorTree &MDT;
- const MachineFrameInfo *MFI;
- const InstrItineraryData *InstrItins;
-
- /// Defs, Uses - Remember where defs and uses of each physical register
- /// are as we iterate upward through the instructions. This is allocated
- /// here instead of inside BuildSchedGraph to avoid the need for it to be
- /// initialized and destructed for each block.
- std::vector<std::vector<SUnit *> > Defs;
- std::vector<std::vector<SUnit *> > Uses;
-
- /// PendingLoads - Remember where unknown loads are after the most recent
- /// unknown store, as we iterate. As with Defs and Uses, this is here
- /// to minimize construction/destruction.
- std::vector<SUnit *> PendingLoads;
-
- /// LoopRegs - Track which registers are used for loop-carried dependencies.
- ///
- LoopDependencies LoopRegs;
-
- /// LoopLiveInRegs - Track which regs are live into a loop, to help guide
- /// back-edge-aware scheduling.
- ///
- SmallSet<unsigned, 8> LoopLiveInRegs;
-
- protected:
-
- /// DbgValues - Remember instruction that preceeds DBG_VALUE.
- typedef std::vector<std::pair<MachineInstr *, MachineInstr *> >
- DbgValueVector;
- DbgValueVector DbgValues;
- MachineInstr *FirstDbgValue;
-
- public:
- MachineBasicBlock::iterator Begin; // The beginning of the range to
- // be scheduled. The range extends
- // to InsertPos.
- unsigned InsertPosIndex; // The index in BB of InsertPos.
-
- explicit ScheduleDAGInstrs(MachineFunction &mf,
- const MachineLoopInfo &mli,
- const MachineDominatorTree &mdt);
-
- virtual ~ScheduleDAGInstrs() {}
-
- /// NewSUnit - Creates a new SUnit and return a ptr to it.
- ///
- SUnit *NewSUnit(MachineInstr *MI) {
-#ifndef NDEBUG
- const SUnit *Addr = SUnits.empty() ? 0 : &SUnits[0];
-#endif
- SUnits.push_back(SUnit(MI, (unsigned)SUnits.size()));
- assert((Addr == 0 || Addr == &SUnits[0]) &&
- "SUnits std::vector reallocated on the fly!");
- SUnits.back().OrigNode = &SUnits.back();
- return &SUnits.back();
- }
-
- /// Run - perform scheduling.
- ///
- void Run(MachineBasicBlock *bb,
- MachineBasicBlock::iterator begin,
- MachineBasicBlock::iterator end,
- unsigned endindex);
-
- /// BuildSchedGraph - Build SUnits from the MachineBasicBlock that we are
- /// input.
- virtual void BuildSchedGraph(AliasAnalysis *AA);
-
- /// AddSchedBarrierDeps - Add dependencies from instructions in the current
- /// list of instructions being scheduled to scheduling barrier. We want to
- /// make sure instructions which define registers that are either used by
- /// the terminator or are live-out are properly scheduled. This is
- /// especially important when the definition latency of the return value(s)
- /// are too high to be hidden by the branch or when the liveout registers
- /// used by instructions in the fallthrough block.
- void AddSchedBarrierDeps();
-
- /// ComputeLatency - Compute node latency.
- ///
- virtual void ComputeLatency(SUnit *SU);
-
- /// ComputeOperandLatency - Override dependence edge latency using
- /// operand use/def information
- ///
- virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use,
- SDep& dep) const;
-
- virtual MachineBasicBlock *EmitSchedule();
-
- /// StartBlock - Prepare to perform scheduling in the given block.
- ///
- virtual void StartBlock(MachineBasicBlock *BB);
-
- /// Schedule - Order nodes according to selected style, filling
- /// in the Sequence member.
- ///
- virtual void Schedule() = 0;
-
- /// FinishBlock - Clean up after scheduling in the given block.
- ///
- virtual void FinishBlock();
-
- virtual void dumpNode(const SUnit *SU) const;
-
- virtual std::string getGraphNodeLabel(const SUnit *SU) const;
- };
-}
-
-#endif
diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp
index 4b55a2284f85..38feee95a58e 100644
--- a/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -25,7 +25,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/Config/config.h"
#include <fstream>
using namespace llvm;
@@ -42,12 +41,12 @@ namespace llvm {
static bool renderGraphFromBottomUp() {
return true;
}
-
+
static bool hasNodeAddressLabel(const SUnit *Node,
const ScheduleDAG *Graph) {
return true;
}
-
+
/// If you want to override the dot attributes printed for a particular
/// edge, override this method.
static std::string getEdgeAttributes(const SUnit *Node,
@@ -59,7 +58,7 @@ namespace llvm {
return "color=blue,style=dashed";
return "";
}
-
+
std::string getNodeLabel(const SUnit *Node, const ScheduleDAG *Graph);
static std::string getNodeAttributes(const SUnit *N,
@@ -82,18 +81,17 @@ std::string DOTGraphTraits<ScheduleDAG*>::getNodeLabel(const SUnit *SU,
/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
/// rendered using 'dot'.
///
-void ScheduleDAG::viewGraph() {
-// This code is only for debugging!
+void ScheduleDAG::viewGraph(const Twine &Name, const Twine &Title) {
+ // This code is only for debugging!
#ifndef NDEBUG
- if (BB->getBasicBlock())
- ViewGraph(this, "dag." + MF.getFunction()->getNameStr(), false,
- "Scheduling-Units Graph for " + MF.getFunction()->getNameStr() +
- ":" + BB->getBasicBlock()->getNameStr());
- else
- ViewGraph(this, "dag." + MF.getFunction()->getNameStr(), false,
- "Scheduling-Units Graph for " + MF.getFunction()->getNameStr());
+ ViewGraph(this, Name, false, Title);
#else
errs() << "ScheduleDAG::viewGraph is only available in debug builds on "
<< "systems with Graphviz or gv!\n";
#endif // NDEBUG
}
+
+/// Out-of-line implementation with no arguments is handy for gdb.
+void ScheduleDAG::viewGraph() {
+ viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
+}
diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index b80c01ed58b9..3d22035974da 100644
--- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp
+++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -140,8 +140,6 @@ ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
unsigned freeUnits = IS->getUnits();
switch (IS->getReservationKind()) {
- default:
- assert(0 && "Invalid FU reservation");
case InstrStage::Required:
// Required FUs conflict with both reserved and required ones
freeUnits &= ~ReservedScoreboard[StageCycle];
@@ -194,8 +192,6 @@ void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) {
unsigned freeUnits = IS->getUnits();
switch (IS->getReservationKind()) {
- default:
- assert(0 && "Invalid FU reservation");
case InstrStage::Required:
// Required FUs conflict with both reserved and required ones
freeUnits &= ~ReservedScoreboard[cycle + i];
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
index 2282f0e6eb83..a6bdc3be32e0 100644
--- a/lib/CodeGen/SelectionDAG/CMakeLists.txt
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -10,24 +10,16 @@ add_llvm_library(LLVMSelectionDAG
LegalizeTypesGeneric.cpp
LegalizeVectorOps.cpp
LegalizeVectorTypes.cpp
+ ResourcePriorityQueue.cpp
ScheduleDAGFast.cpp
- ScheduleDAGList.cpp
ScheduleDAGRRList.cpp
ScheduleDAGSDNodes.cpp
SelectionDAG.cpp
SelectionDAGBuilder.cpp
+ SelectionDAGDumper.cpp
SelectionDAGISel.cpp
SelectionDAGPrinter.cpp
+ ScheduleDAGVLIW.cpp
TargetLowering.cpp
TargetSelectionDAGInfo.cpp
)
-
-add_llvm_library_dependencies(LLVMSelectionDAG
- LLVMAnalysis
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMSupport
- LLVMTarget
- LLVMTransformUtils
- )
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 7b878688df63..d1b998f8d840 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -22,7 +22,6 @@
#include "llvm/LLVMContext.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLowering.h"
@@ -64,7 +63,24 @@ namespace {
bool LegalTypes;
// Worklist of all of the nodes that need to be simplified.
- std::vector<SDNode*> WorkList;
+ //
+ // This has the semantics that when adding to the worklist,
+ // the item added must be next to be processed. It should
+ // also only appear once. The naive approach to this takes
+ // linear time.
+ //
+ // To reduce the insert/remove time to logarithmic, we use
+ // a set and a vector to maintain our worklist.
+ //
+ // The set contains the items on the worklist, but does not
+ // maintain the order they should be visited.
+ //
+ // The vector maintains the order nodes should be visited, but may
+ // contain duplicate or removed nodes. When choosing a node to
+ // visit, we pop off the order stack until we find an item that is
+ // also in the contents set. All operations are O(log N).
+ SmallPtrSet<SDNode*, 64> WorkListContents;
+ SmallVector<SDNode*, 64> WorkListOrder;
// AA - Used for DAG load/store alias analysis.
AliasAnalysis &AA;
@@ -84,18 +100,17 @@ namespace {
SDValue visit(SDNode *N);
public:
- /// AddToWorkList - Add to the work list making sure it's instance is at the
- /// the back (next to be processed.)
+ /// AddToWorkList - Add to the work list making sure its instance is at the
+ /// back (next to be processed.)
void AddToWorkList(SDNode *N) {
- removeFromWorkList(N);
- WorkList.push_back(N);
+ WorkListContents.insert(N);
+ WorkListOrder.push_back(N);
}
/// removeFromWorkList - remove all instances of N from the worklist.
///
void removeFromWorkList(SDNode *N) {
- WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), N),
- WorkList.end());
+ WorkListContents.erase(N);
}
SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
@@ -159,7 +174,9 @@ namespace {
SDValue visitADD(SDNode *N);
SDValue visitSUB(SDNode *N);
SDValue visitADDC(SDNode *N);
+ SDValue visitSUBC(SDNode *N);
SDValue visitADDE(SDNode *N);
+ SDValue visitSUBE(SDNode *N);
SDValue visitMUL(SDNode *N);
SDValue visitSDIV(SDNode *N);
SDValue visitUDIV(SDNode *N);
@@ -181,7 +198,9 @@ namespace {
SDValue visitSRA(SDNode *N);
SDValue visitSRL(SDNode *N);
SDValue visitCTLZ(SDNode *N);
+ SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
SDValue visitCTTZ(SDNode *N);
+ SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
SDValue visitCTPOP(SDNode *N);
SDValue visitSELECT(SDNode *N);
SDValue visitSELECT_CC(SDNode *N);
@@ -279,7 +298,7 @@ namespace {
public:
DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
- : DAG(D), TLI(D.getTargetLoweringInfo()), Level(Unrestricted),
+ : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {}
/// Run - runs the dag combiner on all nodes in the work list
@@ -362,6 +381,8 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
/// specified expression for the same cost as the expression itself, or 2 if we
/// can compute the negated form more cheaply than the expression itself.
static char isNegatibleForFree(SDValue Op, bool LegalOperations,
+ const TargetLowering &TLI,
+ const TargetOptions *Options,
unsigned Depth = 0) {
// No compile time optimizations on this type.
if (Op.getValueType() == MVT::ppcf128)
@@ -384,34 +405,44 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
return LegalOperations ? 0 : 1;
case ISD::FADD:
// FIXME: determine better conditions for this xform.
- if (!UnsafeFPMath) return 0;
+ if (!Options->UnsafeFPMath) return 0;
+
+ // After operation legalization, it might not be legal to create new FSUBs.
+ if (LegalOperations &&
+ !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType()))
+ return 0;
// fold (fsub (fadd A, B)) -> (fsub (fneg A), B)
- if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+ if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
+ Options, Depth + 1))
return V;
// fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
- return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1);
+ return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
+ Depth + 1);
case ISD::FSUB:
// We can't turn -(A-B) into B-A when we honor signed zeros.
- if (!UnsafeFPMath) return 0;
+ if (!Options->UnsafeFPMath) return 0;
// fold (fneg (fsub A, B)) -> (fsub B, A)
return 1;
case ISD::FMUL:
case ISD::FDIV:
- if (HonorSignDependentRoundingFPMath()) return 0;
+ if (Options->HonorSignDependentRoundingFPMath()) return 0;
// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
- if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+ if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
+ Options, Depth + 1))
return V;
- return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1);
+ return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
+ Depth + 1);
case ISD::FP_EXTEND:
case ISD::FP_ROUND:
case ISD::FSIN:
- return isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1);
+ return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
+ Depth + 1);
}
}
@@ -435,10 +466,12 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
}
case ISD::FADD:
// FIXME: determine better conditions for this xform.
- assert(UnsafeFPMath);
+ assert(DAG.getTarget().Options.UnsafeFPMath);
// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
- if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+ if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
+ DAG.getTargetLoweringInfo(),
+ &DAG.getTarget().Options, Depth+1))
return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, Depth+1),
@@ -450,7 +483,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
Op.getOperand(0));
case ISD::FSUB:
// We can't turn -(A-B) into B-A when we honor signed zeros.
- assert(UnsafeFPMath);
+ assert(DAG.getTarget().Options.UnsafeFPMath);
// fold (fneg (fsub 0, B)) -> B
if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
@@ -463,10 +496,12 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
case ISD::FMUL:
case ISD::FDIV:
- assert(!HonorSignDependentRoundingFPMath());
+ assert(!DAG.getTarget().Options.HonorSignDependentRoundingFPMath());
// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
- if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+ if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
+ DAG.getTargetLoweringInfo(),
+ &DAG.getTarget().Options, Depth+1))
return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, Depth+1),
@@ -944,14 +979,13 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
void DAGCombiner::Run(CombineLevel AtLevel) {
// set the instance variables, so that the various visit routines may use it.
Level = AtLevel;
- LegalOperations = Level >= NoIllegalOperations;
- LegalTypes = Level >= NoIllegalTypes;
+ LegalOperations = Level >= AfterLegalizeVectorOps;
+ LegalTypes = Level >= AfterLegalizeTypes;
// Add all the dag nodes to the worklist.
- WorkList.reserve(DAG.allnodes_size());
for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
E = DAG.allnodes_end(); I != E; ++I)
- WorkList.push_back(I);
+ AddToWorkList(I);
// Create a dummy node (which is not added to allnodes), that adds a reference
// to the root node, preventing it from being deleted, and tracking any
@@ -962,11 +996,17 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
// done. Set it to null to avoid confusion.
DAG.setRoot(SDValue());
- // while the worklist isn't empty, inspect the node on the end of it and
+ // while the worklist isn't empty, find a node and
// try and combine it.
- while (!WorkList.empty()) {
- SDNode *N = WorkList.back();
- WorkList.pop_back();
+ while (!WorkListContents.empty()) {
+ SDNode *N;
+ // The WorkListOrder holds the SDNodes in order, but it may contain duplicates.
+ // In order to avoid a linear scan, we use a set (O(log N)) to hold what the
+ // worklist *should* contain, and check the node we want to visit is should
+ // actually be visited.
+ do {
+ N = WorkListOrder.pop_back_val();
+ } while (!WorkListContents.erase(N));
// If N has no uses, it is dead. Make sure to revisit all N's operands once
// N is deleted from the DAG, since they too may now be dead or may have a
@@ -1050,7 +1090,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::ADD: return visitADD(N);
case ISD::SUB: return visitSUB(N);
case ISD::ADDC: return visitADDC(N);
+ case ISD::SUBC: return visitSUBC(N);
case ISD::ADDE: return visitADDE(N);
+ case ISD::SUBE: return visitSUBE(N);
case ISD::MUL: return visitMUL(N);
case ISD::SDIV: return visitSDIV(N);
case ISD::UDIV: return visitUDIV(N);
@@ -1071,7 +1113,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::SRA: return visitSRA(N);
case ISD::SRL: return visitSRL(N);
case ISD::CTLZ: return visitCTLZ(N);
+ case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
case ISD::CTTZ: return visitCTTZ(N);
+ case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
case ISD::CTPOP: return visitCTPOP(N);
case ISD::SELECT: return visitSELECT(N);
case ISD::SELECT_CC: return visitSELECT_CC(N);
@@ -1408,16 +1452,14 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
if (VT.isInteger() && !VT.isVector()) {
APInt LHSZero, LHSOne;
APInt RHSZero, RHSOne;
- APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
- DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
+ DAG.ComputeMaskedBits(N0, LHSZero, LHSOne);
if (LHSZero.getBoolValue()) {
- DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
+ DAG.ComputeMaskedBits(N1, RHSZero, RHSOne);
// If all possibly-set bits on the LHS are clear on the RHS, return an OR.
// If all possibly-set bits on the RHS are clear on the LHS, return an OR.
- if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||
- (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
+ if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1);
}
}
@@ -1486,8 +1528,8 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
EVT VT = N0.getValueType();
// If the flag result is dead, turn this into an ADD.
- if (N->hasNUsesOfValue(0, 1))
- return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0),
+ if (!N->hasAnyUseOfValue(1))
+ return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, N1),
DAG.getNode(ISD::CARRY_FALSE,
N->getDebugLoc(), MVT::Glue));
@@ -1503,16 +1545,14 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
// fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
APInt LHSZero, LHSOne;
APInt RHSZero, RHSOne;
- APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
- DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
+ DAG.ComputeMaskedBits(N0, LHSZero, LHSOne);
if (LHSZero.getBoolValue()) {
- DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
+ DAG.ComputeMaskedBits(N1, RHSZero, RHSOne);
// If all possibly-set bits on the LHS are clear on the RHS, return an OR.
// If all possibly-set bits on the RHS are clear on the LHS, return an OR.
- if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||
- (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
+ if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1),
DAG.getNode(ISD::CARRY_FALSE,
N->getDebugLoc(), MVT::Glue));
@@ -1535,7 +1575,7 @@ SDValue DAGCombiner::visitADDE(SDNode *N) {
// fold (adde x, y, false) -> (addc x, y)
if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
- return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0);
+ return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N0, N1);
return SDValue();
}
@@ -1645,6 +1685,51 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitSUBC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+
+ // If the flag result is dead, turn this into an SUB.
+ if (!N->hasAnyUseOfValue(1))
+ return CombineTo(N, DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1),
+ DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
+ MVT::Glue));
+
+ // fold (subc x, x) -> 0 + no borrow
+ if (N0 == N1)
+ return CombineTo(N, DAG.getConstant(0, VT),
+ DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
+ MVT::Glue));
+
+ // fold (subc x, 0) -> x + no borrow
+ if (N1C && N1C->isNullValue())
+ return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
+ MVT::Glue));
+
+ // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
+ if (N0C && N0C->isAllOnesValue())
+ return CombineTo(N, DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0),
+ DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
+ MVT::Glue));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSUBE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+
+ // fold (sube x, y, false) -> (subc x, y)
+ if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
+ return DAG.getNode(ISD::SUBC, N->getDebugLoc(), N->getVTList(), N0, N1);
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -1756,7 +1841,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
if (N0C && N1C && !N1C->isNullValue())
return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C);
// fold (sdiv X, 1) -> X
- if (N1C && N1C->getSExtValue() == 1LL)
+ if (N1C && N1C->getAPIntValue() == 1LL)
return N0;
// fold (sdiv X, -1) -> 0-X
if (N1C && N1C->isAllOnesValue())
@@ -1770,17 +1855,15 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
N0, N1);
}
// fold (sdiv X, pow2) -> simple ops after legalize
- if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap() &&
- (isPowerOf2_64(N1C->getSExtValue()) ||
- isPowerOf2_64(-N1C->getSExtValue()))) {
+ if (N1C && !N1C->isNullValue() &&
+ (N1C->getAPIntValue().isPowerOf2() ||
+ (-N1C->getAPIntValue()).isPowerOf2())) {
// If dividing by powers of two is cheap, then don't perform the following
// fold.
if (TLI.isPow2DivCheap())
return SDValue();
- int64_t pow2 = N1C->getSExtValue();
- int64_t abs2 = pow2 > 0 ? pow2 : -pow2;
- unsigned lg2 = Log2_64(abs2);
+ unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
// Splat the sign bit into the register
SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
@@ -1800,7 +1883,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
// If we're dividing by a positive value, we're done. Otherwise, we must
// negate the result.
- if (pow2 > 0)
+ if (N1C->getAPIntValue().isNonNegative())
return SRA;
AddToWorkList(SRA.getNode());
@@ -1810,8 +1893,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
// if integer divide is expensive and we satisfy the requirements, emit an
// alternate sequence.
- if (N1C && (N1C->getSExtValue() < -1 || N1C->getSExtValue() > 1) &&
- !TLI.isIntDivCheap()) {
+ if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) {
SDValue Op = BuildSDIV(N);
if (Op.getNode()) return Op;
}
@@ -2250,6 +2332,67 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
ORNode, N0.getOperand(1));
}
+ // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
+ // Only perform this optimization after type legalization and before
+ // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
+ // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
+ // we don't want to undo this promotion.
+ // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
+ // on scalars.
+ if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ && Level == AfterLegalizeVectorOps) {
+ SDValue In0 = N0.getOperand(0);
+ SDValue In1 = N1.getOperand(0);
+ EVT In0Ty = In0.getValueType();
+ EVT In1Ty = In1.getValueType();
+ // If both incoming values are integers, and the original types are the same.
+ if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
+ SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), In0Ty, In0, In1);
+ SDValue BC = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, Op);
+ AddToWorkList(Op.getNode());
+ return BC;
+ }
+ }
+
+ // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
+ // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
+ // If both shuffles use the same mask, and both shuffle within a single
+ // vector, then it is worthwhile to move the swizzle after the operation.
+ // The type-legalizer generates this pattern when loading illegal
+ // vector types from memory. In many cases this allows additional shuffle
+ // optimizations.
+ if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
+ N0.getOperand(1).getOpcode() == ISD::UNDEF &&
+ N1.getOperand(1).getOpcode() == ISD::UNDEF) {
+ ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
+ ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
+
+ assert(N0.getOperand(0).getValueType() == N1.getOperand(1).getValueType() &&
+ "Inputs to shuffles are not the same type");
+
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // Check that both shuffles use the same mask. The masks are known to be of
+ // the same length because the result vector type is the same.
+ bool SameMask = true;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx0 = SVN0->getMaskElt(i);
+ int Idx1 = SVN1->getMaskElt(i);
+ if (Idx0 != Idx1) {
+ SameMask = false;
+ break;
+ }
+ }
+
+ if (SameMask) {
+ SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT,
+ N0.getOperand(0), N1.getOperand(0));
+ AddToWorkList(Op.getNode());
+ return DAG.getVectorShuffle(VT, N->getDebugLoc(), Op,
+ DAG.getUNDEF(VT), &SVN0->getMask()[0]);
+ }
+ }
+
return SDValue();
}
@@ -2312,6 +2455,88 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
+ // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
+ // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
+ // already be zero by virtue of the width of the base type of the load.
+ //
+ // the 'X' node here can either be nothing or an extract_vector_elt to catch
+ // more cases.
+ if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ N0.getOperand(0).getOpcode() == ISD::LOAD) ||
+ N0.getOpcode() == ISD::LOAD) {
+ LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
+ N0 : N0.getOperand(0) );
+
+ // Get the constant (if applicable) the zero'th operand is being ANDed with.
+ // This can be a pure constant or a vector splat, in which case we treat the
+ // vector as a scalar and use the splat value.
+ APInt Constant = APInt::getNullValue(1);
+ if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+ Constant = C->getAPIntValue();
+ } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
+ APInt SplatValue, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
+ SplatBitSize, HasAnyUndefs);
+ if (IsSplat) {
+ // Undef bits can contribute to a possible optimisation if set, so
+ // set them.
+ SplatValue |= SplatUndef;
+
+ // The splat value may be something like "0x00FFFFFF", which means 0 for
+ // the first vector value and FF for the rest, repeating. We need a mask
+ // that will apply equally to all members of the vector, so AND all the
+ // lanes of the constant together.
+ EVT VT = Vector->getValueType(0);
+ unsigned BitWidth = VT.getVectorElementType().getSizeInBits();
+ Constant = APInt::getAllOnesValue(BitWidth);
+ for (unsigned i = 0, n = VT.getVectorNumElements(); i < n; ++i)
+ Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
+ }
+ }
+
+ // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
+ // actually legal and isn't going to get expanded, else this is a false
+ // optimisation.
+ bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
+ Load->getMemoryVT());
+
+ // Resize the constant to the same size as the original memory access before
+ // extension. If it is still the AllOnesValue then this AND is completely
+ // unneeded.
+ Constant =
+ Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits());
+
+ bool B;
+ switch (Load->getExtensionType()) {
+ default: B = false; break;
+ case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
+ case ISD::ZEXTLOAD:
+ case ISD::NON_EXTLOAD: B = true; break;
+ }
+
+ if (B && Constant.isAllOnesValue()) {
+ // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
+ // preserve semantics once we get rid of the AND.
+ SDValue NewLoad(Load, 0);
+ if (Load->getExtensionType() == ISD::EXTLOAD) {
+ NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
+ Load->getValueType(0), Load->getDebugLoc(),
+ Load->getChain(), Load->getBasePtr(),
+ Load->getOffset(), Load->getMemoryVT(),
+ Load->getMemOperand());
+ // Replace uses of the EXTLOAD with the new ZEXTLOAD.
+ CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
+ }
+
+ // Fold the AND away, taking care not to fold to the old load node if we
+ // replaced it.
+ CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
+
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
// fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
@@ -3323,7 +3548,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
// (and (srl x, (sub c1, c2), MASK)
- if (N1C && N0.getOpcode() == ISD::SRL &&
+ // Only fold this if the inner shift has no other uses -- if it does, folding
+ // this will increase the total number of instructions.
+ if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
N0.getOperand(1).getOpcode() == ISD::Constant) {
uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
if (c1 < VT.getSizeInBits()) {
@@ -3603,8 +3830,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (N1C && N0.getOpcode() == ISD::CTLZ &&
N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) {
APInt KnownZero, KnownOne;
- APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
- DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne);
+ DAG.ComputeMaskedBits(N0.getOperand(0), KnownZero, KnownOne);
// If any of the input bits are KnownOne, then the input couldn't be all
// zeros, thus the result of the srl will always be zero.
@@ -3612,7 +3838,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// If all of the bits input the to ctlz node are known to be zero, then
// the result of the ctlz is "32" and the result of the shift is one.
- APInt UnknownBits = ~KnownZero & Mask;
+ APInt UnknownBits = ~KnownZero;
if (UnknownBits == 0) return DAG.getConstant(1, VT);
// Otherwise, check to see if there is exactly one bit input to the ctlz.
@@ -3713,6 +3939,16 @@ SDValue DAGCombiner::visitCTLZ(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ctlz_zero_undef c1) -> c2
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0);
+ return SDValue();
+}
+
SDValue DAGCombiner::visitCTTZ(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -3723,6 +3959,16 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (cttz_zero_undef c1) -> c2
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0);
+ return SDValue();
+}
+
SDValue DAGCombiner::visitCTPOP(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -4108,12 +4354,17 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// Only do this before legalize for now.
if (VT.isVector() && !LegalOperations) {
EVT N0VT = N0.getOperand(0).getValueType();
- // We know that the # elements of the results is the same as the
- // # elements of the compare (and the # elements of the compare result
- // for that matter). Check to see that they are the same size. If so,
- // we know that the element size of the sext'd result matches the
- // element size of the compare operands.
- if (VT.getSizeInBits() == N0VT.getSizeInBits())
+ // On some architectures (such as SSE/NEON/etc) the SETCC result type is
+ // of the same size as the compared operands. Only optimize sext(setcc())
+ // if this is the case.
+ EVT SVT = TLI.getSetCCResultType(N0VT);
+
+ // We know that the # elements of the results is the same as the
+ // # elements of the compare (and the # elements of the compare result
+ // for that matter). Check to see that they are the same size. If so,
+ // we know that the element size of the sext'd result matches the
+ // element size of the compare operands.
+ if (VT.getSizeInBits() == SVT.getSizeInBits())
return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get());
@@ -4127,11 +4378,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
EVT MatchingVectorType =
EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
N0VT.getVectorNumElements());
- SDValue VsetCC =
- DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
- N0.getOperand(1),
- cast<CondCodeSDNode>(N0.getOperand(2))->get());
- return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
+
+ if (SVT == MatchingVectorType) {
+ SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType,
+ N0.getOperand(0), N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
+ }
}
}
@@ -4162,6 +4415,44 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
return SDValue();
}
+// isTruncateOf - If N is a truncate of some other value, return true, record
+// the value being truncated in Op and which of Op's bits are zero in KnownZero.
+// This function computes KnownZero to avoid a duplicated call to
+// ComputeMaskedBits in the caller.
+static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
+ APInt &KnownZero) {
+ APInt KnownOne;
+ if (N->getOpcode() == ISD::TRUNCATE) {
+ Op = N->getOperand(0);
+ DAG.ComputeMaskedBits(Op, KnownZero, KnownOne);
+ return true;
+ }
+
+ if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
+ cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
+ return false;
+
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ assert(Op0.getValueType() == Op1.getValueType());
+
+ ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0);
+ ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1);
+ if (COp0 && COp0->isNullValue())
+ Op = Op1;
+ else if (COp1 && COp1->isNullValue())
+ Op = Op0;
+ else
+ return false;
+
+ DAG.ComputeMaskedBits(Op, KnownZero, KnownOne);
+
+ if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue())
+ return false;
+
+ return true;
+}
+
SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -4175,6 +4466,30 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT,
N0.getOperand(0));
+ // fold (zext (truncate x)) -> (zext x) or
+ // (zext (truncate x)) -> (truncate x)
+ // This is valid when the truncated bits of x are already zero.
+ // FIXME: We should extend this to work for vectors too.
+ SDValue Op;
+ APInt KnownZero;
+ if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) {
+ APInt TruncatedBits =
+ (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
+ APInt(Op.getValueSizeInBits(), 0) :
+ APInt::getBitsSet(Op.getValueSizeInBits(),
+ N0.getValueSizeInBits(),
+ std::min(Op.getValueSizeInBits(),
+ VT.getSizeInBits()));
+ if (TruncatedBits == (KnownZero & TruncatedBits)) {
+ if (VT.bitsGT(Op.getValueType()))
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, Op);
+ if (VT.bitsLT(Op.getValueType()))
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
+
+ return Op;
+ }
+ }
+
// fold (zext (truncate (load x))) -> (zext (smaller load x))
// fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
if (N0.getOpcode() == ISD::TRUNCATE) {
@@ -4567,6 +4882,16 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
switch (V.getOpcode()) {
default: break;
+ case ISD::Constant: {
+ const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
+ assert(CV != 0 && "Const value should be ConstSDNode.");
+ const APInt &CVal = CV->getAPIntValue();
+ APInt NewVal = CVal & Mask;
+ if (NewVal != CVal) {
+ return DAG.getConstant(NewVal, V.getValueType());
+ }
+ break;
+ }
case ISD::OR:
case ISD::XOR:
// If the LHS or RHS don't contribute bits to the or, drop them.
@@ -4705,7 +5030,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
if (ExtType == ISD::NON_EXTLOAD)
Load = DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
LN0->getPointerInfo().getWithOffset(PtrOff),
- LN0->isVolatile(), LN0->isNonTemporal(), NewAlign);
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->isInvariant(), NewAlign);
else
Load = DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(),NewPtr,
LN0->getPointerInfo().getWithOffset(PtrOff),
@@ -4844,6 +5170,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ bool isLE = TLI.isLittleEndian();
// noop truncate
if (N0.getValueType() == N->getValueType(0))
@@ -4871,6 +5198,44 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
return N0.getOperand(0);
}
+ // Fold extract-and-trunc into a narrow extract. For example:
+ // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
+ // i32 y = TRUNCATE(i64 x)
+ // -- becomes --
+ // v16i8 b = BITCAST (v2i64 val)
+ // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
+ //
+ // Note: We only run this optimization after type legalization (which often
+ // creates this pattern) and before operation legalization after which
+ // we need to be more careful about the vector instructions that we generate.
+ if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ LegalTypes && !LegalOperations && N0->hasOneUse()) {
+
+ EVT VecTy = N0.getOperand(0).getValueType();
+ EVT ExTy = N0.getValueType();
+ EVT TrTy = N->getValueType(0);
+
+ unsigned NumElem = VecTy.getVectorNumElements();
+ unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
+
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
+ assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
+
+ SDValue EltNo = N0->getOperand(1);
+ if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
+ int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+
+ int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
+
+ SDValue V = DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+ NVT, N0.getOperand(0));
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ N->getDebugLoc(), TrTy, V,
+ DAG.getConstant(Index, MVT::i32));
+ }
+ }
+
// See if we can simplify the input to this truncate through knowledge that
// only the low bits are being used.
// For example "trunc (or (shl x, 8), y)" // -> trunc y
@@ -4934,7 +5299,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
(!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(),
LD1->getBasePtr(), LD1->getPointerInfo(),
- false, false, Align);
+ false, false, false, Align);
}
return SDValue();
@@ -5004,7 +5369,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(),
LN0->getBasePtr(), LN0->getPointerInfo(),
LN0->isVolatile(), LN0->isNonTemporal(),
- OrigAlign);
+ LN0->isInvariant(), OrigAlign);
AddToWorkList(N);
CombineTo(N0.getNode(),
DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
@@ -5017,7 +5382,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
// fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
// This often reduces constant pool loads.
- if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) &&
+ if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(VT)) ||
+ (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(VT))) &&
N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) {
SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT,
N0.getOperand(0));
@@ -5247,20 +5613,24 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
if (N0CFP && !N1CFP)
return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0);
// fold (fadd A, 0) -> A
- if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+ if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
+ N1CFP->getValueAPF().isZero())
return N0;
// fold (fadd A, (fneg B)) -> (fsub A, B)
- if (isNegatibleForFree(N1, LegalOperations) == 2)
+ if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
+ isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0,
GetNegatedExpression(N1, DAG, LegalOperations));
// fold (fadd (fneg A), B) -> (fsub B, A)
- if (isNegatibleForFree(N0, LegalOperations) == 2)
+ if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
+ isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1,
GetNegatedExpression(N0, DAG, LegalOperations));
// If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
- if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FADD &&
- N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
+ if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
+ N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
+ isa<ConstantFPSDNode>(N0.getOperand(1)))
return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0),
DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
N0.getOperand(1), N1));
@@ -5285,20 +5655,39 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
if (N0CFP && N1CFP && VT != MVT::ppcf128)
return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1);
// fold (fsub A, 0) -> A
- if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ N1CFP && N1CFP->getValueAPF().isZero())
return N0;
// fold (fsub 0, B) -> -B
- if (UnsafeFPMath && N0CFP && N0CFP->getValueAPF().isZero()) {
- if (isNegatibleForFree(N1, LegalOperations))
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ N0CFP && N0CFP->getValueAPF().isZero()) {
+ if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
return GetNegatedExpression(N1, DAG, LegalOperations);
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1);
}
// fold (fsub A, (fneg B)) -> (fadd A, B)
- if (isNegatibleForFree(N1, LegalOperations))
+ if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0,
GetNegatedExpression(N1, DAG, LegalOperations));
+ // If 'unsafe math' is enabled, fold
+ // (fsub x, (fadd x, y)) -> (fneg y) &
+ // (fsub x, (fadd y, x)) -> (fneg y)
+ if (DAG.getTarget().Options.UnsafeFPMath) {
+ if (N1.getOpcode() == ISD::FADD) {
+ SDValue N10 = N1->getOperand(0);
+ SDValue N11 = N1->getOperand(1);
+
+ if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI,
+ &DAG.getTarget().Options))
+ return GetNegatedExpression(N11, DAG, LegalOperations);
+ else if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI,
+ &DAG.getTarget().Options))
+ return GetNegatedExpression(N10, DAG, LegalOperations);
+ }
+ }
+
return SDValue();
}
@@ -5308,6 +5697,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// fold vector ops
if (VT.isVector()) {
@@ -5322,10 +5712,12 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (N0CFP && !N1CFP)
return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0);
// fold (fmul A, 0) -> 0
- if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ N1CFP && N1CFP->getValueAPF().isZero())
return N1;
// fold (fmul A, 0) -> 0, vector edition.
- if (UnsafeFPMath && ISD::isBuildVectorAllZeros(N1.getNode()))
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ ISD::isBuildVectorAllZeros(N1.getNode()))
return N1;
// fold (fmul X, 2.0) -> (fadd X, X)
if (N1CFP && N1CFP->isExactlyValue(+2.0))
@@ -5336,8 +5728,10 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0);
// fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
- if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) {
- if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) {
+ if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,
+ &DAG.getTarget().Options)) {
+ if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI,
+ &DAG.getTarget().Options)) {
// Both can be negated for free, check to see if at least one is cheaper
// negated.
if (LHSNeg == 2 || RHSNeg == 2)
@@ -5348,7 +5742,8 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
}
// If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
- if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FMUL &&
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ N1CFP && N0.getOpcode() == ISD::FMUL &&
N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0),
DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
@@ -5363,6 +5758,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// fold vector ops
if (VT.isVector()) {
@@ -5374,10 +5770,30 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
if (N0CFP && N1CFP && VT != MVT::ppcf128)
return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1);
+ // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
+ if (N1CFP && VT != MVT::ppcf128 && DAG.getTarget().Options.UnsafeFPMath) {
+ // Compute the reciprocal 1.0 / c2.
+ APFloat N1APF = N1CFP->getValueAPF();
+ APFloat Recip(N1APF.getSemantics(), 1); // 1.0
+ APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
+ // Only do the transform if the reciprocal is a legal fp immediate that
+ // isn't too nasty (eg NaN, denormal, ...).
+ if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
+ (!LegalOperations ||
+ // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
+ // backend)... we should handle this gracefully after Legalize.
+ // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
+ TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
+ TLI.isFPImmLegal(Recip, VT)))
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0,
+ DAG.getConstantFP(Recip, VT));
+ }
// (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
- if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) {
- if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) {
+ if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,
+ &DAG.getTarget().Options)) {
+ if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI,
+ &DAG.getTarget().Options)) {
// Both can be negated for free, check to see if at least one is cheaper
// negated.
if (LHSNeg == 2 || RHSNeg == 2)
@@ -5463,7 +5879,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
// fold (sint_to_fp c1) -> c1fp
if (N0C && OpVT != MVT::ppcf128 &&
// ...but only if the target supports immediate floating-point values
- (Level == llvm::Unrestricted ||
+ (!LegalOperations ||
TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
@@ -5488,7 +5904,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
// fold (uint_to_fp c1) -> c1fp
if (N0C && OpVT != MVT::ppcf128 &&
// ...but only if the target supports immediate floating-point values
- (Level == llvm::Unrestricted ||
+ (!LegalOperations ||
TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
@@ -5630,12 +6046,13 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- if (isNegatibleForFree(N0, LegalOperations))
+ if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
+ &DAG.getTarget().Options))
return GetNegatedExpression(N0, DAG, LegalOperations);
// Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
// constant pool values.
- if (N0.getOpcode() == ISD::BITCAST &&
+ if (!TLI.isFNegFree(VT) && N0.getOpcode() == ISD::BITCAST &&
!VT.isVector() &&
N0.getNode()->hasOneUse() &&
N0.getOperand(0).getValueType().isInteger()) {
@@ -5671,7 +6088,8 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
// Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading
// constant pool values.
- if (N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&
+ if (!TLI.isFAbsFree(VT) &&
+ N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&
N0.getOperand(0).getValueType().isInteger() &&
!N0.getOperand(0).getValueType().isVector()) {
SDValue Int = N0.getOperand(0);
@@ -5860,6 +6278,47 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) {
return SDValue();
}
+/// canFoldInAddressingMode - Return true if 'Use' is a load or a store that
+/// uses N as its base pointer and that N may be folded in the load / store
+/// addressing mode.
+static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ EVT VT;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
+ if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
+ return false;
+ VT = Use->getValueType(0);
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
+ if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
+ return false;
+ VT = ST->getValue().getValueType();
+ } else
+ return false;
+
+ TargetLowering::AddrMode AM;
+ if (N->getOpcode() == ISD::ADD) {
+ ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (Offset)
+ // [reg +/- imm]
+ AM.BaseOffs = Offset->getSExtValue();
+ else
+ // [reg +/- reg]
+ AM.Scale = 1;
+ } else if (N->getOpcode() == ISD::SUB) {
+ ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (Offset)
+ // [reg +/- imm]
+ AM.BaseOffs = -Offset->getSExtValue();
+ else
+ // [reg +/- reg]
+ AM.Scale = 1;
+ } else
+ return false;
+
+ return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext()));
+}
+
/// CombineToPreIndexedLoadStore - Try turning a load / store into a
/// pre-indexed load / store when the base pointer is an add or subtract
/// and it has other uses besides the load / store. After the
@@ -5867,7 +6326,7 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) {
/// the add / subtract in and all of its other uses are redirected to the
/// new load / store.
bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
- if (!LegalOperations)
+ if (Level < AfterLegalizeDAG)
return false;
bool isLoad = true;
@@ -5946,10 +6405,9 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
if (N->hasPredecessorHelper(Use, Visited, Worklist))
return false;
- if (!((Use->getOpcode() == ISD::LOAD &&
- cast<LoadSDNode>(Use)->getBasePtr() == Ptr) ||
- (Use->getOpcode() == ISD::STORE &&
- cast<StoreSDNode>(Use)->getBasePtr() == Ptr)))
+ // If Ptr may be folded in addressing mode of other use, then it's
+ // not profitable to do this transformation.
+ if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
RealUse = true;
}
@@ -5999,7 +6457,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
/// load / store effectively and all of its uses are redirected to the
/// new load / store.
bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
- if (!LegalOperations)
+ if (Level < AfterLegalizeDAG)
return false;
bool isLoad = true;
@@ -6046,7 +6504,8 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
continue;
// Try turning it into a post-indexed load / store except when
- // 1) All uses are load / store ops that use it as base ptr.
+ // 1) All uses are load / store ops that use it as base ptr (and
+ // it may be folded as addressing mmode).
// 2) Op must be independent of N, i.e. Op is neither a predecessor
// nor a successor of N. Otherwise, if Op is folded that would
// create a cycle.
@@ -6069,10 +6528,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
for (SDNode::use_iterator III = Use->use_begin(),
EEE = Use->use_end(); III != EEE; ++III) {
SDNode *UseUse = *III;
- if (!((UseUse->getOpcode() == ISD::LOAD &&
- cast<LoadSDNode>(UseUse)->getBasePtr().getNode() == Use) ||
- (UseUse->getOpcode() == ISD::STORE &&
- cast<StoreSDNode>(UseUse)->getBasePtr().getNode() == Use)))
+ if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
RealUse = true;
}
@@ -6139,7 +6595,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
if (!LD->isVolatile()) {
if (N->getValueType(1) == MVT::Other) {
// Unindexed loads.
- if (N->hasNUsesOfValue(0, 0)) {
+ if (!N->hasAnyUseOfValue(0)) {
// It's not safe to use the two value CombineTo variant here. e.g.
// v1, chain2 = load chain1, loc
// v2, chain3 = load chain2, loc
@@ -6164,7 +6620,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
} else {
// Indexed loads.
assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
- if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) {
+ if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) {
SDValue Undef = DAG.getUNDEF(N->getValueType(0));
DEBUG(dbgs() << "\nReplacing.7 ";
N->dump(&DAG);
@@ -6222,7 +6678,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(),
BetterChain, Ptr, LD->getPointerInfo(),
LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
+ LD->isInvariant(), LD->getAlignment());
} else {
ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(),
LD->getValueType(0),
@@ -6486,7 +6942,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
LD->getChain(), NewPtr,
LD->getPointerInfo().getWithOffset(PtrOff),
LD->isVolatile(), LD->isNonTemporal(),
- NewAlign);
+ LD->isInvariant(), NewAlign);
SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD,
DAG.getConstant(NewImm, NewVT));
SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(),
@@ -6546,7 +7002,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(),
LD->getChain(), LD->getBasePtr(),
LD->getPointerInfo(),
- false, false, LDAlign);
+ false, false, false, LDAlign);
SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(),
NewLD, ST->getBasePtr(),
@@ -6823,13 +7279,14 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// (vextract (scalar_to_vector val, 0) -> val
SDValue InVec = N->getOperand(0);
+ EVT VT = InVec.getValueType();
+ EVT NVT = N->getValueType(0);
if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
// Check if the result type doesn't match the inserted element type. A
// SCALAR_TO_VECTOR may truncate the inserted element and the
// EXTRACT_VECTOR_ELT may widen the extracted vector.
SDValue InOp = InVec.getOperand(0);
- EVT NVT = N->getValueType(0);
if (InOp.getValueType() != NVT) {
assert(InOp.getValueType().isInteger() && NVT.isInteger());
return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT);
@@ -6837,6 +7294,38 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
return InOp;
}
+ SDValue EltNo = N->getOperand(1);
+ bool ConstEltNo = isa<ConstantSDNode>(EltNo);
+
+ // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
+ // We only perform this optimization before the op legalization phase because
+ // we may introduce new vector instructions which are not backed by TD patterns.
+ // For example on AVX, extracting elements from a wide vector without using
+ // extract_subvector.
+ if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE
+ && ConstEltNo && !LegalOperations) {
+ int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ int NumElem = VT.getVectorNumElements();
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
+ // Find the new index to extract from.
+ int OrigElt = SVOp->getMaskElt(Elt);
+
+ // Extracting an undef index is undef.
+ if (OrigElt == -1)
+ return DAG.getUNDEF(NVT);
+
+ // Select the right vector half to extract from.
+ if (OrigElt < NumElem) {
+ InVec = InVec->getOperand(0);
+ } else {
+ InVec = InVec->getOperand(1);
+ OrigElt -= NumElem;
+ }
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), NVT,
+ InVec, DAG.getConstant(OrigElt, MVT::i32));
+ }
+
// Perform only after legalization to ensure build_vector / vector_shuffle
// optimizations have already been done.
if (!LegalOperations) return SDValue();
@@ -6844,17 +7333,24 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
// (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
// (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
- SDValue EltNo = N->getOperand(1);
- if (isa<ConstantSDNode>(EltNo)) {
+ if (ConstEltNo) {
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
bool NewLoad = false;
bool BCNumEltsChanged = false;
- EVT VT = InVec.getValueType();
EVT ExtVT = VT.getVectorElementType();
EVT LVT = ExtVT;
+ // If the result of load has to be truncated, then it's not necessarily
+ // profitable.
+ if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
+ return SDValue();
+
if (InVec.getOpcode() == ISD::BITCAST) {
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
EVT BCVT = InVec.getOperand(0).getValueType();
if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
return SDValue();
@@ -6872,12 +7368,20 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
} else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
InVec.getOperand(0).getValueType() == ExtVT &&
ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
LN0 = cast<LoadSDNode>(InVec.getOperand(0));
} else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
// (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
// =>
// (load $addr+1*size)
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
// If the bit convert changed the number of elements, it is unsafe
// to examine the mask.
if (BCNumEltsChanged)
@@ -6888,14 +7392,21 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
- if (InVec.getOpcode() == ISD::BITCAST)
+ if (InVec.getOpcode() == ISD::BITCAST) {
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
InVec = InVec.getOperand(0);
+ }
if (ISD::isNormalLoad(InVec.getNode())) {
LN0 = cast<LoadSDNode>(InVec);
Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
}
}
+ // Make sure we found a non-volatile load and the extractelement is
+ // the only use.
if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
return SDValue();
@@ -6929,9 +7440,45 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
DAG.getConstant(PtrOff, PtrType));
}
- return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
- LN0->getPointerInfo().getWithOffset(PtrOff),
- LN0->isVolatile(), LN0->isNonTemporal(), Align);
+ // The replacement we need to do here is a little tricky: we need to
+ // replace an extractelement of a load with a load.
+ // Use ReplaceAllUsesOfValuesWith to do the replacement.
+ // Note that this replacement assumes that the extractvalue is the only
+ // use of the load; that's okay because we don't want to perform this
+ // transformation in other cases anyway.
+ SDValue Load;
+ SDValue Chain;
+ if (NVT.bitsGT(LVT)) {
+ // If the result type of vextract is wider than the load, then issue an
+ // extending load instead.
+ ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT)
+ ? ISD::ZEXTLOAD : ISD::EXTLOAD;
+ Load = DAG.getExtLoad(ExtType, N->getDebugLoc(), NVT, LN0->getChain(),
+ NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff),
+ LVT, LN0->isVolatile(), LN0->isNonTemporal(),Align);
+ Chain = Load.getValue(1);
+ } else {
+ Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
+ LN0->getPointerInfo().getWithOffset(PtrOff),
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->isInvariant(), Align);
+ Chain = Load.getValue(1);
+ if (NVT.bitsLT(LVT))
+ Load = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Load);
+ else
+ Load = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), NVT, Load);
+ }
+ WorkListRemover DeadNodes(*this);
+ SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) };
+ SDValue To[] = { Load, Chain };
+ DAG.ReplaceAllUsesOfValuesWith(From, To, 2, &DeadNodes);
+ // Since we're explcitly calling ReplaceAllUses, add the new node to the
+ // worklist explicitly as well.
+ AddToWorkList(Load.getNode());
+ AddUsersToWorkList(Load.getNode()); // Add users too
+ // Make sure to revisit this node to clean it up; it will usually be dead.
+ AddToWorkList(N);
+ return SDValue(N, 0);
}
return SDValue();
@@ -6939,11 +7486,122 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
unsigned NumInScalars = N->getNumOperands();
+ DebugLoc dl = N->getDebugLoc();
EVT VT = N->getValueType(0);
+ // Check to see if this is a BUILD_VECTOR of a bunch of values
+ // which come from any_extend or zero_extend nodes. If so, we can create
+ // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
+ // optimizations. We do not handle sign-extend because we can't fill the sign
+ // using shuffles.
+ EVT SourceType = MVT::Other;
+ bool AllAnyExt = true;
+ bool AllUndef = true;
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ SDValue In = N->getOperand(i);
+ // Ignore undef inputs.
+ if (In.getOpcode() == ISD::UNDEF) continue;
+ AllUndef = false;
+
+ bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
+ bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
+
+ // Abort if the element is not an extension.
+ if (!ZeroExt && !AnyExt) {
+ SourceType = MVT::Other;
+ break;
+ }
+
+ // The input is a ZeroExt or AnyExt. Check the original type.
+ EVT InTy = In.getOperand(0).getValueType();
+
+ // Check that all of the widened source types are the same.
+ if (SourceType == MVT::Other)
+ // First time.
+ SourceType = InTy;
+ else if (InTy != SourceType) {
+ // Multiple income types. Abort.
+ SourceType = MVT::Other;
+ break;
+ }
+
+ // Check if all of the extends are ANY_EXTENDs.
+ AllAnyExt &= AnyExt;
+ }
+
+ if (AllUndef)
+ return DAG.getUNDEF(VT);
+
+ // In order to have valid types, all of the inputs must be extended from the
+ // same source type and all of the inputs must be any or zero extend.
+ // Scalar sizes must be a power of two.
+ EVT OutScalarTy = N->getValueType(0).getScalarType();
+ bool ValidTypes = SourceType != MVT::Other &&
+ isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
+ isPowerOf2_32(SourceType.getSizeInBits());
+
+ // We perform this optimization post type-legalization because
+ // the type-legalizer often scalarizes integer-promoted vectors.
+ // Performing this optimization before may create bit-casts which
+ // will be type-legalized to complex code sequences.
+ // We perform this optimization only before the operation legalizer because we
+ // may introduce illegal operations.
+ // Create a new simpler BUILD_VECTOR sequence which other optimizations can
+ // turn into a single shuffle instruction.
+ if ((Level == AfterLegalizeVectorOps || Level == AfterLegalizeTypes) &&
+ ValidTypes) {
+ bool isLE = TLI.isLittleEndian();
+ unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
+ assert(ElemRatio > 1 && "Invalid element size ratio");
+ SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
+ DAG.getConstant(0, SourceType);
+
+ unsigned NewBVElems = ElemRatio * N->getValueType(0).getVectorNumElements();
+ SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
+
+ // Populate the new build_vector
+ for (unsigned i=0; i < N->getNumOperands(); ++i) {
+ SDValue Cast = N->getOperand(i);
+ assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
+ Cast.getOpcode() == ISD::ZERO_EXTEND ||
+ Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode");
+ SDValue In;
+ if (Cast.getOpcode() == ISD::UNDEF)
+ In = DAG.getUNDEF(SourceType);
+ else
+ In = Cast->getOperand(0);
+ unsigned Index = isLE ? (i * ElemRatio) :
+ (i * ElemRatio + (ElemRatio - 1));
+
+ assert(Index < Ops.size() && "Invalid index");
+ Ops[Index] = In;
+ }
+
+ // The type of the new BUILD_VECTOR node.
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
+ assert(VecVT.getSizeInBits() == N->getValueType(0).getSizeInBits() &&
+ "Invalid vector size");
+ // Check if the new vector type is legal.
+ if (!isTypeLegal(VecVT)) return SDValue();
+
+ // Make the new BUILD_VECTOR.
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ VecVT, &Ops[0], Ops.size());
+
+ // The new BUILD_VECTOR node has the potential to be further optimized.
+ AddToWorkList(BV.getNode());
+ // Bitcast to the desired type.
+ return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), BV);
+ }
// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
// operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
// at most two distinct vectors, turn this into a shuffle node.
+
+ // May only combine to shuffle after legalize if shuffle is legal.
+ if (LegalOperations &&
+ !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT))
+ return SDValue();
+
SDValue VecIn1, VecIn2;
for (unsigned i = 0; i != NumInScalars; ++i) {
// Ignore undef inputs.
@@ -6957,15 +7615,8 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
break;
}
- // If the input vector type disagrees with the result of the build_vector,
- // we can't make a shuffle.
+ // We allow up to two distinct input vectors.
SDValue ExtractedFromVec = N->getOperand(i).getOperand(0);
- if (ExtractedFromVec.getValueType() != VT) {
- VecIn1 = VecIn2 = SDValue(0, 0);
- break;
- }
-
- // Otherwise, remember this. We allow up to two distinct input vectors.
if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
continue;
@@ -6980,7 +7631,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
}
}
- // If everything is good, we can make a shuffle operation.
+ // If everything is good, we can make a shuffle operation.
if (VecIn1.getNode()) {
SmallVector<int, 8> Mask;
for (unsigned i = 0; i != NumInScalars; ++i) {
@@ -7006,14 +7657,39 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
Mask.push_back(Idx+NumInScalars);
}
- // Add count and size info.
+ // We can't generate a shuffle node with mismatched input and output types.
+ // Attempt to transform a single input vector to the correct type.
+ if ((VT != VecIn1.getValueType())) {
+ // We don't support shuffeling between TWO values of different types.
+ if (VecIn2.getNode() != 0)
+ return SDValue();
+
+ // We only support widening of vectors which are half the size of the
+ // output registers. For example XMM->YMM widening on X86 with AVX.
+ if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits())
+ return SDValue();
+
+ // Widen the input vector by adding undef values.
+ VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
+ VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
+ }
+
+ // If VecIn2 is unused then change it to undef.
+ VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+
+ // Check that we were able to transform all incoming values to the same type.
+ if (VecIn2.getValueType() != VecIn1.getValueType() ||
+ VecIn1.getValueType() != VT)
+ return SDValue();
+
+ // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
if (!isTypeLegal(VT))
return SDValue();
// Return the new VECTOR_SHUFFLE node.
SDValue Ops[2];
Ops[0] = VecIn1;
- Ops[1] = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+ Ops[1] = VecIn2;
return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]);
}
@@ -7045,19 +7721,23 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
if (NVT != SmallVT || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
return SDValue();
- // Combine:
- // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
- // Into:
- // indicies are equal => V1
- // otherwise => (extract_subvec V1, ExtIdx)
- //
- SDValue InsIdx = N->getOperand(1);
- SDValue ExtIdx = V->getOperand(2);
+ // Only handle cases where both indexes are constants with the same type.
+ ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
- if (InsIdx == ExtIdx)
- return V->getOperand(1);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT,
- V->getOperand(0), N->getOperand(1));
+ if (InsIdx && ExtIdx &&
+ InsIdx->getValueType(0).getSizeInBits() <= 64 &&
+ ExtIdx->getValueType(0).getSizeInBits() <= 64) {
+ // Combine:
+ // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
+ // Into:
+ // indices are equal => V1
+ // otherwise => (extract_subvec V1, ExtIdx)
+ if (InsIdx->getZExtValue() == ExtIdx->getZExtValue())
+ return V->getOperand(1);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT,
+ V->getOperand(0), N->getOperand(1));
+ }
}
return SDValue();
@@ -7068,15 +7748,63 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
unsigned NumElts = VT.getVectorNumElements();
SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
+
+ // Canonicalize shuffle undef, undef -> undef
+ if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
+ return DAG.getUNDEF(VT);
+
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
- assert(N0.getValueType().getVectorNumElements() == NumElts &&
- "Vector shuffle must be normalized in DAG");
+ // Canonicalize shuffle v, v -> v, undef
+ if (N0 == N1) {
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (Idx >= (int)NumElts) Idx -= NumElts;
+ NewMask.push_back(Idx);
+ }
+ return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, DAG.getUNDEF(VT),
+ &NewMask[0]);
+ }
+
+ // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
+ if (N0.getOpcode() == ISD::UNDEF) {
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (Idx >= 0) {
+ if (Idx < (int)NumElts)
+ Idx += NumElts;
+ else
+ Idx -= NumElts;
+ }
+ NewMask.push_back(Idx);
+ }
+ return DAG.getVectorShuffle(VT, N->getDebugLoc(), N1, DAG.getUNDEF(VT),
+ &NewMask[0]);
+ }
- // FIXME: implement canonicalizations from DAG.getVectorShuffle()
+ // Remove references to rhs if it is undef
+ if (N1.getOpcode() == ISD::UNDEF) {
+ bool Changed = false;
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (Idx >= (int)NumElts) {
+ Idx = -1;
+ Changed = true;
+ }
+ NewMask.push_back(Idx);
+ }
+ if (Changed)
+ return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, N1, &NewMask[0]);
+ }
// If it is a splat, check if the argument vector is another splat or a
// build_vector with all scalar elements the same.
- ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
SDNode *V = N0.getNode();
@@ -7115,6 +7843,40 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return N0;
}
}
+
+ // If this shuffle node is simply a swizzle of another shuffle node,
+ // and it reverses the swizzle of the previous shuffle then we can
+ // optimize shuffle(shuffle(x, undef), undef) -> x.
+ if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
+ N1.getOpcode() == ISD::UNDEF) {
+
+ ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
+
+ // Shuffle nodes can only reverse shuffles with a single non-undef value.
+ if (N0.getOperand(1).getOpcode() != ISD::UNDEF)
+ return SDValue();
+
+ // The incoming shuffle must be of the same type as the result of the
+ // current shuffle.
+ assert(OtherSV->getOperand(0).getValueType() == VT &&
+ "Shuffle types don't match");
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ assert(Idx < (int)NumElts && "Index references undef operand");
+ // Next, this index comes from the first value, which is the incoming
+ // shuffle. Adopt the incoming index.
+ if (Idx >= 0)
+ Idx = OtherSV->getMaskElt(Idx);
+
+ // The combined shuffle must map each index to itself.
+ if (Idx >= 0 && (unsigned)Idx != i)
+ return SDValue();
+ }
+
+ return OtherSV->getOperand(0);
+ }
+
return SDValue();
}
@@ -7190,7 +7952,8 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
SDValue Elt = RHS.getOperand(i);
if (!isa<ConstantSDNode>(Elt))
return SDValue();
- else if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
+
+ if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
Indices.push_back(i);
else if (cast<ConstantSDNode>(Elt)->isNullValue())
Indices.push_back(NumElts);
@@ -7261,8 +8024,19 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
}
EVT VT = LHSOp.getValueType();
- assert(RHSOp.getValueType() == VT &&
- "SimplifyVBinOp with different BUILD_VECTOR element types");
+ EVT RVT = RHSOp.getValueType();
+ if (RVT != VT) {
+ // Integer BUILD_VECTOR operands may have types larger than the element
+ // size (e.g., when the element type is not legal). Prior to type
+ // legalization, the types may not match between the two BUILD_VECTORS.
+ // Truncate one of the operands to make them match.
+ if (RVT.getSizeInBits() > VT.getSizeInBits()) {
+ RHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, RHSOp);
+ } else {
+ LHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), RVT, LHSOp);
+ VT = RVT;
+ }
+ }
SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), VT,
LHSOp, RHSOp);
if (FoldOp.getOpcode() != ISD::UNDEF &&
@@ -7374,8 +8148,8 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
if ((LLD->hasAnyUseOfValue(1) &&
(LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
- (LLD->hasAnyUseOfValue(1) &&
- (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))))
+ (RLD->hasAnyUseOfValue(1) &&
+ (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
return false;
Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
@@ -7393,7 +8167,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
// FIXME: Discards pointer info.
LLD->getChain(), Addr, MachinePointerInfo(),
LLD->isVolatile(), LLD->isNonTemporal(),
- LLD->getAlignment());
+ LLD->isInvariant(), LLD->getAlignment());
} else {
Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ?
RLD->getExtensionType() : LLD->getExtensionType(),
@@ -7509,7 +8283,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
AddToWorkList(CPIdx.getNode());
return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(), false,
- false, Alignment);
+ false, false, Alignment);
}
}
@@ -7517,8 +8291,6 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
// Check to see if we can perform the "gzip trick", transforming
// (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT &&
- N0.getValueType().isInteger() &&
- N2.getValueType().isInteger() &&
(N1C->isNullValue() || // (a < 0) ? b : 0
(N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0
EVT XType = N0.getValueType();
@@ -7720,7 +8492,7 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
SDValue DAGCombiner::BuildSDIV(SDNode *N) {
std::vector<SDNode*> Built;
- SDValue S = TLI.BuildSDIV(N, DAG, &Built);
+ SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, &Built);
for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
ii != ee; ++ii)
@@ -7734,7 +8506,7 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) {
/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
SDValue DAGCombiner::BuildUDIV(SDNode *N) {
std::vector<SDNode*> Built;
- SDValue S = TLI.BuildUDIV(N, DAG, &Built);
+ SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, &Built);
for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
ii != ee; ++ii)
@@ -7856,30 +8628,20 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
/// FindAliasInfo - Extracts the relevant alias information from the memory
/// node. Returns true if the operand was a load.
bool DAGCombiner::FindAliasInfo(SDNode *N,
- SDValue &Ptr, int64_t &Size,
- const Value *&SrcValue,
- int &SrcValueOffset,
- unsigned &SrcValueAlign,
- const MDNode *&TBAAInfo) const {
- if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
- Ptr = LD->getBasePtr();
- Size = LD->getMemoryVT().getSizeInBits() >> 3;
- SrcValue = LD->getSrcValue();
- SrcValueOffset = LD->getSrcValueOffset();
- SrcValueAlign = LD->getOriginalAlignment();
- TBAAInfo = LD->getTBAAInfo();
- return true;
- }
- if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
- Ptr = ST->getBasePtr();
- Size = ST->getMemoryVT().getSizeInBits() >> 3;
- SrcValue = ST->getSrcValue();
- SrcValueOffset = ST->getSrcValueOffset();
- SrcValueAlign = ST->getOriginalAlignment();
- TBAAInfo = ST->getTBAAInfo();
- return false;
- }
- llvm_unreachable("FindAliasInfo expected a memory operand");
+ SDValue &Ptr, int64_t &Size,
+ const Value *&SrcValue,
+ int &SrcValueOffset,
+ unsigned &SrcValueAlign,
+ const MDNode *&TBAAInfo) const {
+ LSBaseSDNode *LS = cast<LSBaseSDNode>(N);
+
+ Ptr = LS->getBasePtr();
+ Size = LS->getMemoryVT().getSizeInBits() >> 3;
+ SrcValue = LS->getSrcValue();
+ SrcValueOffset = LS->getSrcValueOffset();
+ SrcValueAlign = LS->getOriginalAlignment();
+ TBAAInfo = LS->getTBAAInfo();
+ return isa<LoadSDNode>(LS);
}
/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index e8f8c73d6883..0c1ac6982d2a 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -39,6 +39,7 @@
//
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "isel"
#include "llvm/Function.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Instructions.h"
@@ -58,8 +59,15 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
using namespace llvm;
+STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by "
+ "target-independent selector");
+STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by "
+ "target-specific selector");
+STATISTIC(NumFastIselDead, "Number of dead insts removed on failure");
+
/// startNewBlock - Set the current block to which generated machine
/// instructions will be appended, and clear the local CSE map.
///
@@ -96,6 +104,11 @@ bool FastISel::hasTrivialKill(const Value *V) const {
!hasTrivialKill(Cast->getOperand(0)))
return false;
+ // GEPs with all zero indices are trivially coalesced by fast-isel.
+ if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I))
+ if (GEP->hasAllZeroIndices() && !hasTrivialKill(GEP->getOperand(0)))
+ return false;
+
// Only instructions with a single use in the same basic block are considered
// to have trivial kills.
return I->hasOneUse() &&
@@ -123,15 +136,8 @@ unsigned FastISel::getRegForValue(const Value *V) {
return 0;
}
- // Look up the value to see if we already have a register for it. We
- // cache values defined by Instructions across blocks, and other values
- // only locally. This is because Instructions already have the SSA
- // def-dominates-use requirement enforced.
- DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V);
- if (I != FuncInfo.ValueMap.end())
- return I->second;
-
- unsigned Reg = LocalValueMap[V];
+ // Look up the value to see if we already have a register for it.
+ unsigned Reg = lookUpRegForValue(V);
if (Reg != 0)
return Reg;
@@ -186,7 +192,7 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
uint32_t IntBitWidth = IntVT.getSizeInBits();
bool isExact;
(void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,
- APFloat::rmTowardZero, &isExact);
+ APFloat::rmTowardZero, &isExact);
if (isExact) {
APInt IntVal(IntBitWidth, x);
@@ -297,6 +303,18 @@ void FastISel::recomputeInsertPt() {
++FuncInfo.InsertPt;
}
+void FastISel::removeDeadCode(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator E) {
+ assert (I && E && std::distance(I, E) > 0 && "Invalid iterator!");
+ while (I != E) {
+ MachineInstr *Dead = &*I;
+ ++I;
+ Dead->eraseFromParent();
+ ++NumFastIselDead;
+ }
+ recomputeInsertPt();
+}
+
FastISel::SavePoint FastISel::enterLocalValueArea() {
MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt;
DebugLoc OldDL = DL;
@@ -377,6 +395,13 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
ISDOpcode = ISD::SRA;
}
+ // Transform "urem x, pow2" -> "and x, pow2-1".
+ if (ISDOpcode == ISD::UREM && isa<BinaryOperator>(I) &&
+ isPowerOf2_64(Imm)) {
+ --Imm;
+ ISDOpcode = ISD::AND;
+ }
+
unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0,
Op0IsKill, Imm, VT.getSimpleVT());
if (ResultReg == 0) return false;
@@ -427,6 +452,11 @@ bool FastISel::SelectGetElementPtr(const User *I) {
bool NIsKill = hasTrivialKill(I->getOperand(0));
+ // Keep a running tab of the total offset to coalesce multiple N = N + Offset
+ // into a single N = N + TotalOffset.
+ uint64_t TotalOffs = 0;
+ // FIXME: What's a good SWAG number for MaxOffs?
+ uint64_t MaxOffs = 2048;
Type *Ty = I->getOperand(0)->getType();
MVT VT = TLI.getPointerTy();
for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1,
@@ -436,14 +466,15 @@ bool FastISel::SelectGetElementPtr(const User *I) {
unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
if (Field) {
// N = N + Offset
- uint64_t Offs = TD.getStructLayout(StTy)->getElementOffset(Field);
- // FIXME: This can be optimized by combining the add with a
- // subsequent one.
- N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT);
- if (N == 0)
- // Unhandled operand. Halt "fast" selection and bail.
- return false;
- NIsKill = true;
+ TotalOffs += TD.getStructLayout(StTy)->getElementOffset(Field);
+ if (TotalOffs >= MaxOffs) {
+ N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ NIsKill = true;
+ TotalOffs = 0;
+ }
}
Ty = StTy->getElementType(Field);
} else {
@@ -452,14 +483,26 @@ bool FastISel::SelectGetElementPtr(const User *I) {
// If this is a constant subscript, handle it quickly.
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
if (CI->isZero()) continue;
- uint64_t Offs =
+ // N = N + Offset
+ TotalOffs +=
TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
- N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT);
+ if (TotalOffs >= MaxOffs) {
+ N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ NIsKill = true;
+ TotalOffs = 0;
+ }
+ continue;
+ }
+ if (TotalOffs) {
+ N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
if (N == 0)
// Unhandled operand. Halt "fast" selection and bail.
return false;
NIsKill = true;
- continue;
+ TotalOffs = 0;
}
// N = N + Idx * ElementSize;
@@ -484,6 +527,12 @@ bool FastISel::SelectGetElementPtr(const User *I) {
return false;
}
}
+ if (TotalOffs) {
+ N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ }
// We successfully emitted code for the given LLVM Instruction.
UpdateValueMap(I, N);
@@ -512,21 +561,32 @@ bool FastISel::SelectCall(const User *I) {
return true;
}
+ MachineModuleInfo &MMI = FuncInfo.MF->getMMI();
+ ComputeUsesVAFloatArgument(*Call, &MMI);
+
const Function *F = Call->getCalledFunction();
if (!F) return false;
// Handle selected intrinsic function calls.
switch (F->getIntrinsicID()) {
default: break;
+ // At -O0 we don't care about the lifetime intrinsics.
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ return true;
case Intrinsic::dbg_declare: {
const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call);
if (!DIVariable(DI->getVariable()).Verify() ||
- !FuncInfo.MF->getMMI().hasDebugInfo())
+ !FuncInfo.MF->getMMI().hasDebugInfo()) {
+ DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
return true;
+ }
const Value *Address = DI->getAddress();
- if (!Address || isa<UndefValue>(Address) || isa<AllocaInst>(Address))
+ if (!Address || isa<UndefValue>(Address)) {
+ DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
return true;
+ }
unsigned Reg = 0;
unsigned Offset = 0;
@@ -534,16 +594,36 @@ bool FastISel::SelectCall(const User *I) {
// Some arguments' frame index is recorded during argument lowering.
Offset = FuncInfo.getArgumentFrameIndex(Arg);
if (Offset)
- Reg = TRI.getFrameRegister(*FuncInfo.MF);
+ Reg = TRI.getFrameRegister(*FuncInfo.MF);
}
if (!Reg)
- Reg = getRegForValue(Address);
+ Reg = lookUpRegForValue(Address);
+
+ // If we have a VLA that has a "use" in a metadata node that's then used
+ // here but it has no other uses, then we have a problem. E.g.,
+ //
+ // int foo (const int *x) {
+ // char a[*x];
+ // return 0;
+ // }
+ //
+ // If we assign 'a' a vreg and fast isel later on has to use the selection
+ // DAG isel, it will want to copy the value to the vreg. However, there are
+ // no uses, which goes counter to what selection DAG isel expects.
+ if (!Reg && !Address->use_empty() && isa<Instruction>(Address) &&
+ (!isa<AllocaInst>(Address) ||
+ !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(Address))))
+ Reg = FuncInfo.InitializeRegForValue(Address);
if (Reg)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(TargetOpcode::DBG_VALUE))
.addReg(Reg, RegState::Debug).addImm(Offset)
.addMetadata(DI->getVariable());
+ else
+ // We can't yet handle anything else here because it would require
+ // generating code, thus altering codegen because of debug info.
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
return true;
}
case Intrinsic::dbg_value: {
@@ -581,60 +661,6 @@ bool FastISel::SelectCall(const User *I) {
}
return true;
}
- case Intrinsic::eh_exception: {
- EVT VT = TLI.getValueType(Call->getType());
- if (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)!=TargetLowering::Expand)
- break;
-
- assert(FuncInfo.MBB->isLandingPad() &&
- "Call to eh.exception not in landing pad!");
- unsigned Reg = TLI.getExceptionAddressRegister();
- const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
- unsigned ResultReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- ResultReg).addReg(Reg);
- UpdateValueMap(Call, ResultReg);
- return true;
- }
- case Intrinsic::eh_selector: {
- EVT VT = TLI.getValueType(Call->getType());
- if (TLI.getOperationAction(ISD::EHSELECTION, VT) != TargetLowering::Expand)
- break;
- if (FuncInfo.MBB->isLandingPad())
- AddCatchInfo(*Call, &FuncInfo.MF->getMMI(), FuncInfo.MBB);
- else {
-#ifndef NDEBUG
- FuncInfo.CatchInfoLost.insert(Call);
-#endif
- // FIXME: Mark exception selector register as live in. Hack for PR1508.
- unsigned Reg = TLI.getExceptionSelectorRegister();
- if (Reg) FuncInfo.MBB->addLiveIn(Reg);
- }
-
- unsigned Reg = TLI.getExceptionSelectorRegister();
- EVT SrcVT = TLI.getPointerTy();
- const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT);
- unsigned ResultReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- ResultReg).addReg(Reg);
-
- bool ResultRegIsKill = hasTrivialKill(Call);
-
- // Cast the register to the type of the selector.
- if (SrcVT.bitsGT(MVT::i32))
- ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE,
- ResultReg, ResultRegIsKill);
- else if (SrcVT.bitsLT(MVT::i32))
- ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32,
- ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill);
- if (ResultReg == 0)
- // Unhandled operand. Halt "fast" selection and bail.
- return false;
-
- UpdateValueMap(Call, ResultReg);
-
- return true;
- }
case Intrinsic::objectsize: {
ConstantInt *CI = cast<ConstantInt>(Call->getArgOperand(1));
unsigned long long Res = CI->isZero() ? -1ULL : 0;
@@ -726,8 +752,8 @@ bool FastISel::SelectBitCast(const User *I) {
// First, try to perform the bitcast by inserting a reg-reg copy.
unsigned ResultReg = 0;
if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) {
- TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT);
- TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT);
+ const TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT);
+ const TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT);
// Don't attempt a cross-class copy. It will likely fail.
if (SrcClass == DstClass) {
ResultReg = createResultReg(DstClass);
@@ -758,17 +784,33 @@ FastISel::SelectInstruction(const Instruction *I) {
DL = I->getDebugLoc();
+ MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt;
+
// First, try doing target-independent selection.
if (SelectOperator(I, I->getOpcode())) {
+ ++NumFastIselSuccessIndependent;
DL = DebugLoc();
return true;
}
+ // Remove dead code. However, ignore call instructions since we've flushed
+ // the local value map and recomputed the insert point.
+ if (!isa<CallInst>(I)) {
+ recomputeInsertPt();
+ if (SavedInsertPt != FuncInfo.InsertPt)
+ removeDeadCode(FuncInfo.InsertPt, SavedInsertPt);
+ }
// Next, try calling the target to attempt to handle the instruction.
+ SavedInsertPt = FuncInfo.InsertPt;
if (TargetSelectInstruction(I)) {
+ ++NumFastIselSuccessTarget;
DL = DebugLoc();
return true;
}
+ // Check for dead code and remove as necessary.
+ recomputeInsertPt();
+ if (SavedInsertPt != FuncInfo.InsertPt)
+ removeDeadCode(FuncInfo.InsertPt, SavedInsertPt);
DL = DebugLoc();
return false;
@@ -779,8 +821,11 @@ FastISel::SelectInstruction(const Instruction *I) {
/// the CFG.
void
FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) {
- if (FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
- // The unconditional fall-through case, which needs no instructions.
+
+ if (FuncInfo.MBB->getBasicBlock()->size() > 1 && FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
+ // For more accurate line information if this is the only instruction
+ // in the block then emit it, otherwise we have the unconditional
+ // fall-through case, which needs no instructions.
} else {
// The unconditional branch case.
TII.InsertBranch(*FuncInfo.MBB, MSucc, NULL,
@@ -1354,8 +1399,8 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
// exactly one register for each non-void instruction.
EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
- // Promote MVT::i1.
- if (VT == MVT::i1)
+ // Handle integer promotions, though, because they're common and easy.
+ if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT);
else {
FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index b052740a1abe..8dde919079d9 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "function-lowering-info"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
@@ -68,7 +69,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
GetReturnInfo(Fn->getReturnType(),
Fn->getAttributes().getRetAttributes(), Outs, TLI);
CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), *MF,
- Fn->isVarArg(),
+ Fn->isVarArg(),
Outs, Fn->getContext());
// Initialize the mapping of values to registers. This is only set up for
@@ -92,14 +93,16 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
// candidate. I.e., it would trigger the creation of a stack protector.
bool MayNeedSP =
(AI->isArrayAllocation() ||
- (TySize > 8 && isa<ArrayType>(Ty) &&
+ (TySize >= 8 && isa<ArrayType>(Ty) &&
cast<ArrayType>(Ty)->getElementType()->isIntegerTy(8)));
StaticAllocaMap[AI] =
- MF->getFrameInfo()->CreateStackObject(TySize, Align, false, MayNeedSP);
+ MF->getFrameInfo()->CreateStackObject(TySize, Align, false,
+ MayNeedSP);
}
for (; BB != EB; ++BB)
- for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+ I != E; ++I) {
// Mark values used outside their block as exported, by allocating
// a virtual register for them.
if (isUsedOutsideOfDefiningBlock(I))
@@ -355,7 +358,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
/// argument. This overrides previous frame index entry for this argument,
/// if any.
void FunctionLoweringInfo::setArgumentFrameIndex(const Argument *A,
- int FI) {
+ int FI) {
ByValArgFrameIndexMap[A] = FI;
}
@@ -367,10 +370,34 @@ int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) {
ByValArgFrameIndexMap.find(A);
if (I != ByValArgFrameIndexMap.end())
return I->second;
- DEBUG(dbgs() << "Argument does not have assigned frame index!");
+ DEBUG(dbgs() << "Argument does not have assigned frame index!\n");
return 0;
}
+/// ComputeUsesVAFloatArgument - Determine if any floating-point values are
+/// being passed to this variadic function, and set the MachineModuleInfo's
+/// usesVAFloatArgument flag if so. This flag is used to emit an undefined
+/// reference to _fltused on Windows, which will link in MSVCRT's
+/// floating-point support.
+void llvm::ComputeUsesVAFloatArgument(const CallInst &I,
+ MachineModuleInfo *MMI)
+{
+ FunctionType *FT = cast<FunctionType>(
+ I.getCalledValue()->getType()->getContainedType(0));
+ if (FT->isVarArg() && !MMI->usesVAFloatArgument()) {
+ for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+ Type* T = I.getArgOperand(i)->getType();
+ for (po_iterator<Type*> i = po_begin(T), e = po_end(T);
+ i != e; ++i) {
+ if (i->isFloatingPointTy()) {
+ MMI->setUsesVAFloatArgument(true);
+ return;
+ }
+ }
+ }
+ }
+}
+
/// AddCatchInfo - Extract the personality and type infos from an eh.selector
/// call, and add them to the specified machine basic block.
void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
@@ -425,34 +452,6 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
}
}
-void llvm::CopyCatchInfo(const BasicBlock *SuccBB, const BasicBlock *LPad,
- MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) {
- SmallPtrSet<const BasicBlock*, 4> Visited;
-
- // The 'eh.selector' call may not be in the direct successor of a basic block,
- // but could be several successors deeper. If we don't find it, try going one
- // level further. <rdar://problem/8824861>
- while (Visited.insert(SuccBB)) {
- for (BasicBlock::const_iterator I = SuccBB->begin(), E = --SuccBB->end();
- I != E; ++I)
- if (const EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) {
- // Apply the catch info to LPad.
- AddCatchInfo(*EHSel, MMI, FLI.MBBMap[LPad]);
-#ifndef NDEBUG
- if (!FLI.MBBMap[SuccBB]->isLandingPad())
- FLI.CatchInfoFound.insert(EHSel);
-#endif
- return;
- }
-
- const BranchInst *Br = dyn_cast<BranchInst>(SuccBB->getTerminator());
- if (Br && Br->isUnconditional())
- SuccBB = Br->getSuccessor(0);
- else
- break;
- }
-}
-
/// AddLandingPadInfo - Extract the exception handling information from the
/// landingpad instruction and add them to the specified machine module info.
void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI,
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 2ff66f8f8715..1467d887789c 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -294,7 +294,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
const TargetRegisterClass *DstRC = 0;
if (IIOpNum < II->getNumOperands())
DstRC = TII->getRegClass(*II, IIOpNum, TRI);
- assert((DstRC || (MCID.isVariadic() && IIOpNum >= MCID.getNumOperands())) &&
+ assert((DstRC || (MI->isVariadic() && IIOpNum >= MCID.getNumOperands())) &&
"Don't have operand info for this instruction!");
if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) {
unsigned NewVReg = MRI->createVirtualRegister(DstRC);
@@ -351,6 +351,8 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
MI->addOperand(MachineOperand::CreateFPImm(CFP));
} else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
MI->addOperand(MachineOperand::CreateReg(R->getReg(), false));
+ } else if (RegisterMaskSDNode *RM = dyn_cast<RegisterMaskSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateRegMask(RM->getRegMask()));
} else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) {
MI->addOperand(MachineOperand::CreateGA(TGA->getGlobal(), TGA->getOffset(),
TGA->getTargetFlags()));
@@ -574,14 +576,19 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
for (unsigned i = 1; i != NumOps; ++i) {
SDValue Op = Node->getOperand(i);
if ((i & 1) == 0) {
- unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
- unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
- const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
- const TargetRegisterClass *SRC =
+ RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(i-1));
+ // Skip physical registers as they don't have a vreg to get and we'll
+ // insert copies for them in TwoAddressInstructionPass anyway.
+ if (!R || !TargetRegisterInfo::isPhysicalRegister(R->getReg())) {
+ unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
+ unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
+ const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
+ const TargetRegisterClass *SRC =
TRI->getMatchingSuperRegClass(RC, TRC, SubIdx);
- if (SRC && SRC != RC) {
- MRI->setRegClass(NewVReg, SRC);
- RC = SRC;
+ if (SRC && SRC != RC) {
+ MRI->setRegClass(NewVReg, SRC);
+ RC = SRC;
+ }
}
}
AddOperand(MI, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false,
@@ -700,33 +707,6 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
// Create the new machine instruction.
MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II);
- // The MachineInstr constructor adds implicit-def operands. Scan through
- // these to determine which are dead.
- if (MI->getNumOperands() != 0 &&
- Node->getValueType(Node->getNumValues()-1) == MVT::Glue) {
- // First, collect all used registers.
- SmallVector<unsigned, 8> UsedRegs;
- for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser())
- if (F->getOpcode() == ISD::CopyFromReg)
- UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
- else {
- // Collect declared implicit uses.
- const MCInstrDesc &MCID = TII->get(F->getMachineOpcode());
- UsedRegs.append(MCID.getImplicitUses(),
- MCID.getImplicitUses() + MCID.getNumImplicitUses());
- // In addition to declared implicit uses, we must also check for
- // direct RegisterSDNode operands.
- for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
- if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
- unsigned Reg = R->getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
- UsedRegs.push_back(Reg);
- }
- }
- // Then mark unused registers as dead.
- MI->setPhysRegsDeadExcept(UsedRegs, *TRI);
- }
-
// Add result register values for things that are defined by this
// instruction.
if (NumResults)
@@ -751,30 +731,63 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
// hook knows where in the block to insert the replacement code.
MBB->insert(InsertPos, MI);
+ // The MachineInstr may also define physregs instead of virtregs. These
+ // physreg values can reach other instructions in different ways:
+ //
+ // 1. When there is a use of a Node value beyond the explicitly defined
+ // virtual registers, we emit a CopyFromReg for one of the implicitly
+ // defined physregs. This only happens when HasPhysRegOuts is true.
+ //
+ // 2. A CopyFromReg reading a physreg may be glued to this instruction.
+ //
+ // 3. A glued instruction may implicitly use a physreg.
+ //
+ // 4. A glued instruction may use a RegisterSDNode operand.
+ //
+ // Collect all the used physreg defs, and make sure that any unused physreg
+ // defs are marked as dead.
+ SmallVector<unsigned, 8> UsedRegs;
+
// Additional results must be physical register defs.
if (HasPhysRegOuts) {
for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {
unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()];
- if (Node->hasAnyUseOfValue(i))
- EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
- // If there are no uses, mark the register as dead now, so that
- // MachineLICM/Sink can see that it's dead. Don't do this if the
- // node has a Glue value, for the benefit of targets still using
- // Glue for values in physregs.
- else if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)
- MI->addRegisterDead(Reg, TRI);
+ if (!Node->hasAnyUseOfValue(i))
+ continue;
+ // This implicitly defined physreg has a use.
+ UsedRegs.push_back(Reg);
+ EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
}
}
- // If the instruction has implicit defs and the node doesn't, mark the
- // implicit def as dead. If the node has any glue outputs, we don't do this
- // because we don't know what implicit defs are being used by glued nodes.
- if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)
- if (const unsigned *IDList = II.getImplicitDefs()) {
- for (unsigned i = NumResults, e = II.getNumDefs()+II.getNumImplicitDefs();
- i != e; ++i)
- MI->addRegisterDead(IDList[i-II.getNumDefs()], TRI);
+ // Scan the glue chain for any used physregs.
+ if (Node->getValueType(Node->getNumValues()-1) == MVT::Glue) {
+ for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) {
+ if (F->getOpcode() == ISD::CopyFromReg) {
+ UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
+ continue;
+ } else if (F->getOpcode() == ISD::CopyToReg) {
+ // Skip CopyToReg nodes that are internal to the glue chain.
+ continue;
+ }
+ // Collect declared implicit uses.
+ const MCInstrDesc &MCID = TII->get(F->getMachineOpcode());
+ UsedRegs.append(MCID.getImplicitUses(),
+ MCID.getImplicitUses() + MCID.getNumImplicitUses());
+ // In addition to declared implicit uses, we must also check for
+ // direct RegisterSDNode operands.
+ for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
+ if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
+ unsigned Reg = R->getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ UsedRegs.push_back(Reg);
+ }
}
+ }
+
+ // Finally mark unused registers as dead.
+ if (!UsedRegs.empty() || II.getImplicitDefs())
+ MI->setPhysRegsDeadExcept(UsedRegs, *TRI);
// Run post-isel target hook to adjust this instruction if needed.
#ifdef NDEBUG
@@ -794,10 +807,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
Node->dump();
#endif
llvm_unreachable("This target-independent node should have been selected!");
- break;
case ISD::EntryToken:
llvm_unreachable("EntryToken should have been excluded from the schedule!");
- break;
case ISD::MERGE_VALUES:
case ISD::TokenFactor: // fall thru
break;
diff --git a/lib/CodeGen/SelectionDAG/LLVMBuild.txt b/lib/CodeGen/SelectionDAG/LLVMBuild.txt
new file mode 100644
index 000000000000..81d2e000a2e8
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/CodeGen/SelectionDAG/LLVMBuild.txt -----------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = SelectionDAG
+parent = CodeGen
+required_libraries = Analysis CodeGen Core MC Support Target TransformUtils
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 63255ae2ebd9..a96a99781f4e 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -46,37 +46,18 @@ using namespace llvm;
/// will attempt merge setcc and brc instructions into brcc's.
///
namespace {
-class SelectionDAGLegalize {
+class SelectionDAGLegalize : public SelectionDAG::DAGUpdateListener {
const TargetMachine &TM;
const TargetLowering &TLI;
SelectionDAG &DAG;
- // Libcall insertion helpers.
-
- /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been
- /// legalized. We use this to ensure that calls are properly serialized
- /// against each other, including inserted libcalls.
- SDValue LastCALLSEQ_END;
-
- /// IsLegalizingCall - This member is used *only* for purposes of providing
- /// helpful assertions that a libcall isn't created while another call is
- /// being legalized (which could lead to non-serialized call sequences).
- bool IsLegalizingCall;
-
- /// LegalizedNodes - For nodes that are of legal width, and that have more
- /// than one use, this map indicates what regularized operand to use. This
- /// allows us to avoid legalizing the same thing more than once.
- DenseMap<SDValue, SDValue> LegalizedNodes;
+ /// LegalizePosition - The iterator for walking through the node list.
+ SelectionDAG::allnodes_iterator LegalizePosition;
- void AddLegalizedOperand(SDValue From, SDValue To) {
- LegalizedNodes.insert(std::make_pair(From, To));
- // If someone requests legalization of the new node, return itself.
- if (From != To)
- LegalizedNodes.insert(std::make_pair(To, To));
+ /// LegalizedNodes - The set of nodes which have already been legalized.
+ SmallPtrSet<SDNode *, 16> LegalizedNodes;
- // Transfer SDDbgValues.
- DAG.TransferDbgValues(From, To);
- }
+ // Libcall insertion helpers.
public:
explicit SelectionDAGLegalize(SelectionDAG &DAG);
@@ -84,9 +65,8 @@ public:
void LegalizeDAG();
private:
- /// LegalizeOp - Return a legal replacement for the given operation, with
- /// all legal operands.
- SDValue LegalizeOp(SDValue O);
+ /// LegalizeOp - Legalizes the given operation.
+ void LegalizeOp(SDNode *Node);
SDValue OptimizeFloatStore(StoreSDNode *ST);
@@ -105,10 +85,7 @@ private:
/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl,
SDValue N1, SDValue N2,
- SmallVectorImpl<int> &Mask) const;
-
- bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
- SmallPtrSet<SDNode*, 32> &NodesLeadingTo);
+ ArrayRef<int> Mask) const;
void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
DebugLoc dl);
@@ -150,10 +127,46 @@ private:
SDValue ExpandInsertToVectorThroughStack(SDValue Op);
SDValue ExpandVectorBuildThroughStack(SDNode* Node);
+ SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP);
+
std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
- void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
- void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ void ExpandNode(SDNode *Node);
+ void PromoteNode(SDNode *Node);
+
+ void ForgetNode(SDNode *N) {
+ LegalizedNodes.erase(N);
+ if (LegalizePosition == SelectionDAG::allnodes_iterator(N))
+ ++LegalizePosition;
+ }
+
+public:
+ // DAGUpdateListener implementation.
+ virtual void NodeDeleted(SDNode *N, SDNode *E) {
+ ForgetNode(N);
+ }
+ virtual void NodeUpdated(SDNode *N) {}
+
+ // Node replacement helpers
+ void ReplacedNode(SDNode *N) {
+ if (N->use_empty()) {
+ DAG.RemoveDeadNode(N, this);
+ } else {
+ ForgetNode(N);
+ }
+ }
+ void ReplaceNode(SDNode *Old, SDNode *New) {
+ DAG.ReplaceAllUsesWith(Old, New, this);
+ ReplacedNode(Old);
+ }
+ void ReplaceNode(SDValue Old, SDValue New) {
+ DAG.ReplaceAllUsesWith(Old, New, this);
+ ReplacedNode(Old.getNode());
+ }
+ void ReplaceNode(SDNode *Old, const SDValue *New) {
+ DAG.ReplaceAllUsesWith(Old, New, this);
+ ReplacedNode(Old);
+ }
};
}
@@ -164,7 +177,7 @@ private:
SDValue
SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl,
SDValue N1, SDValue N2,
- SmallVectorImpl<int> &Mask) const {
+ ArrayRef<int> Mask) const {
unsigned NumMaskElts = VT.getVectorNumElements();
unsigned NumDestElts = NVT.getVectorNumElements();
unsigned NumEltsGrowth = NumDestElts / NumMaskElts;
@@ -195,145 +208,37 @@ SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag)
}
void SelectionDAGLegalize::LegalizeDAG() {
- LastCALLSEQ_END = DAG.getEntryNode();
- IsLegalizingCall = false;
-
- // The legalize process is inherently a bottom-up recursive process (users
- // legalize their uses before themselves). Given infinite stack space, we
- // could just start legalizing on the root and traverse the whole graph. In
- // practice however, this causes us to run out of stack space on large basic
- // blocks. To avoid this problem, compute an ordering of the nodes where each
- // node is only legalized after all of its operands are legalized.
DAG.AssignTopologicalOrder();
- for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
- E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I)
- LegalizeOp(SDValue(I, 0));
- // Finally, it's possible the root changed. Get the new root.
- SDValue OldRoot = DAG.getRoot();
- assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
- DAG.setRoot(LegalizedNodes[OldRoot]);
-
- LegalizedNodes.clear();
-
- // Remove dead nodes now.
- DAG.RemoveDeadNodes();
-}
-
-
-/// FindCallEndFromCallStart - Given a chained node that is part of a call
-/// sequence, find the CALLSEQ_END node that terminates the call sequence.
-static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) {
- // Nested CALLSEQ_START/END constructs aren't yet legal,
- // but we can DTRT and handle them correctly here.
- if (Node->getOpcode() == ISD::CALLSEQ_START)
- depth++;
- else if (Node->getOpcode() == ISD::CALLSEQ_END) {
- depth--;
- if (depth == 0)
- return Node;
- }
- if (Node->use_empty())
- return 0; // No CallSeqEnd
-
- // The chain is usually at the end.
- SDValue TheChain(Node, Node->getNumValues()-1);
- if (TheChain.getValueType() != MVT::Other) {
- // Sometimes it's at the beginning.
- TheChain = SDValue(Node, 0);
- if (TheChain.getValueType() != MVT::Other) {
- // Otherwise, hunt for it.
- for (unsigned i = 1, e = Node->getNumValues(); i != e; ++i)
- if (Node->getValueType(i) == MVT::Other) {
- TheChain = SDValue(Node, i);
- break;
- }
-
- // Otherwise, we walked into a node without a chain.
- if (TheChain.getValueType() != MVT::Other)
- return 0;
+ // Visit all the nodes. We start in topological order, so that we see
+ // nodes with their original operands intact. Legalization can produce
+ // new nodes which may themselves need to be legalized. Iterate until all
+ // nodes have been legalized.
+ for (;;) {
+ bool AnyLegalized = false;
+ for (LegalizePosition = DAG.allnodes_end();
+ LegalizePosition != DAG.allnodes_begin(); ) {
+ --LegalizePosition;
+
+ SDNode *N = LegalizePosition;
+ if (LegalizedNodes.insert(N)) {
+ AnyLegalized = true;
+ LegalizeOp(N);
+ }
}
- }
-
- for (SDNode::use_iterator UI = Node->use_begin(),
- E = Node->use_end(); UI != E; ++UI) {
-
- // Make sure to only follow users of our token chain.
- SDNode *User = *UI;
- for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i)
- if (User->getOperand(i) == TheChain)
- if (SDNode *Result = FindCallEndFromCallStart(User, depth))
- return Result;
- }
- return 0;
-}
-
-/// FindCallStartFromCallEnd - Given a chained node that is part of a call
-/// sequence, find the CALLSEQ_START node that initiates the call sequence.
-static SDNode *FindCallStartFromCallEnd(SDNode *Node) {
- int nested = 0;
- assert(Node && "Didn't find callseq_start for a call??");
- while (Node->getOpcode() != ISD::CALLSEQ_START || nested) {
- Node = Node->getOperand(0).getNode();
- assert(Node->getOperand(0).getValueType() == MVT::Other &&
- "Node doesn't have a token chain argument!");
- switch (Node->getOpcode()) {
- default:
+ if (!AnyLegalized)
break;
- case ISD::CALLSEQ_START:
- if (!nested)
- return Node;
- nested--;
- break;
- case ISD::CALLSEQ_END:
- nested++;
- break;
- }
- }
- return 0;
-}
-
-/// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to
-/// see if any uses can reach Dest. If no dest operands can get to dest,
-/// legalize them, legalize ourself, and return false, otherwise, return true.
-///
-/// Keep track of the nodes we fine that actually do lead to Dest in
-/// NodesLeadingTo. This avoids retraversing them exponential number of times.
-///
-bool SelectionDAGLegalize::LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
- SmallPtrSet<SDNode*, 32> &NodesLeadingTo) {
- if (N == Dest) return true; // N certainly leads to Dest :)
-
- // If we've already processed this node and it does lead to Dest, there is no
- // need to reprocess it.
- if (NodesLeadingTo.count(N)) return true;
-
- // If the first result of this node has been already legalized, then it cannot
- // reach N.
- if (LegalizedNodes.count(SDValue(N, 0))) return false;
-
- // Okay, this node has not already been legalized. Check and legalize all
- // operands. If none lead to Dest, then we can legalize this node.
- bool OperandsLeadToDest = false;
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
- OperandsLeadToDest |= // If an operand leads to Dest, so do we.
- LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest,
- NodesLeadingTo);
- if (OperandsLeadToDest) {
- NodesLeadingTo.insert(N);
- return true;
}
- // Okay, this node looks safe, legalize it and return false.
- LegalizeOp(SDValue(N, 0));
- return false;
+ // Remove dead nodes now.
+ DAG.RemoveDeadNodes();
}
/// ExpandConstantFP - Expands the ConstantFP node to an integer constant or
/// a load from the constant pool.
-static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
- SelectionDAG &DAG, const TargetLowering &TLI) {
+SDValue
+SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
bool Extend = false;
DebugLoc dl = CFP->getDebugLoc();
@@ -369,20 +274,27 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
- if (Extend)
- return DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT,
- DAG.getEntryNode(),
- CPIdx, MachinePointerInfo::getConstantPool(),
- VT, false, false, Alignment);
- return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(), false, false,
- Alignment);
+ if (Extend) {
+ SDValue Result =
+ DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT,
+ DAG.getEntryNode(),
+ CPIdx, MachinePointerInfo::getConstantPool(),
+ VT, false, false, Alignment);
+ return Result;
+ }
+ SDValue Result =
+ DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(), false, false, false,
+ Alignment);
+ return Result;
}
/// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores.
-static
-SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
- const TargetLowering &TLI) {
+static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ SelectionDAGLegalize *DAGLegalize) {
+ assert(ST->getAddressingMode() == ISD::UNINDEXED &&
+ "unaligned indexed stores not implemented!");
SDValue Chain = ST->getChain();
SDValue Ptr = ST->getBasePtr();
SDValue Val = ST->getValue();
@@ -397,8 +309,10 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
// same size, then a (misaligned) int store.
// FIXME: Does not handle truncating floating point stores!
SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
- return DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
- ST->isVolatile(), ST->isNonTemporal(), Alignment);
+ Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
+ ST->isVolatile(), ST->isNonTemporal(), Alignment);
+ DAGLegalize->ReplaceNode(SDValue(ST, 0), Result);
+ return;
}
// Do a (aligned) store to a stack slot, then copy from the stack slot
// to the final destination using (unaligned) integer loads and stores.
@@ -427,7 +341,7 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
// Load one integer register's worth from the stack slot.
SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr,
MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
// Store it to the final location. Remember the store.
Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
ST->getPointerInfo().getWithOffset(Offset),
@@ -458,8 +372,11 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
ST->isNonTemporal(),
MinAlign(ST->getAlignment(), Offset)));
// The order of the stores doesn't matter - say it with a TokenFactor.
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
- Stores.size());
+ SDValue Result =
+ DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
+ Stores.size());
+ DAGLegalize->ReplaceNode(SDValue(ST, 0), Result);
+ return;
}
assert(ST->getMemoryVT().isInteger() &&
!ST->getMemoryVT().isVector() &&
@@ -488,13 +405,18 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
NewStoredVT, ST->isVolatile(), ST->isNonTemporal(),
Alignment);
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
+ SDValue Result =
+ DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
+ DAGLegalize->ReplaceNode(SDValue(ST, 0), Result);
}
/// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads.
-static
-SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
- const TargetLowering &TLI) {
+static void
+ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ SDValue &ValResult, SDValue &ChainResult) {
+ assert(LD->getAddressingMode() == ISD::UNINDEXED &&
+ "unaligned indexed loads not implemented!");
SDValue Chain = LD->getChain();
SDValue Ptr = LD->getBasePtr();
EVT VT = LD->getValueType(0);
@@ -507,13 +429,15 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
// then bitconvert to floating point or vector.
SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(),
LD->isVolatile(),
- LD->isNonTemporal(), LD->getAlignment());
+ LD->isNonTemporal(),
+ LD->isInvariant(), LD->getAlignment());
SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
if (VT.isFloatingPoint() && LoadedVT != VT)
Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result);
- SDValue Ops[] = { Result, Chain };
- return DAG.getMergeValues(Ops, 2, dl);
+ ValResult = Result;
+ ChainResult = Chain;
+ return;
}
// Copy the value to a (aligned) stack slot using (unaligned) integer
@@ -537,6 +461,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr,
LD->getPointerInfo().getWithOffset(Offset),
LD->isVolatile(), LD->isNonTemporal(),
+ LD->isInvariant(),
MinAlign(LD->getAlignment(), Offset));
// Follow the load with a store to the stack slot. Remember the store.
Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
@@ -572,8 +497,9 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
MachinePointerInfo(), LoadedVT, false, false, 0);
// Callers expect a MERGE_VALUES node.
- SDValue Ops[] = { Load, TF };
- return DAG.getMergeValues(Ops, 2, dl);
+ ValResult = Load;
+ ChainResult = TF;
+ return;
}
assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
"Unaligned load of unsupported type.");
@@ -626,8 +552,8 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
Hi.getValue(1));
- SDValue Ops[] = { Result, TF };
- return DAG.getMergeValues(Ops, 2, dl);
+ ValResult = Result;
+ ChainResult = TF;
}
/// PerformInsertVectorEltInMemory - Some target cannot handle a variable
@@ -672,7 +598,8 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
false, false, 0);
// Load the updated vector.
return DAG.getLoad(VT, dl, Ch, StackPtr,
- MachinePointerInfo::getFixedStack(SPFI), false, false, 0);
+ MachinePointerInfo::getFixedStack(SPFI), false, false,
+ false, 0);
}
@@ -763,11 +690,10 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
/// LegalizeOp - Return a legal replacement for the given operation, with
/// all legal operands.
-SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
- if (Op.getOpcode() == ISD::TargetConstant) // Allow illegal target nodes.
- return Op;
+void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
+ if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes.
+ return;
- SDNode *Node = Op.getNode();
DebugLoc dl = Node->getDebugLoc();
for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
@@ -782,13 +708,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Node->getOperand(i).getOpcode() == ISD::TargetConstant) &&
"Unexpected illegal type!");
- // Note that LegalizeOp may be reentered even from single-use nodes, which
- // means that we always must cache transformed nodes.
- DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
- if (I != LegalizedNodes.end()) return I->second;
-
SDValue Tmp1, Tmp2, Tmp3, Tmp4;
- SDValue Result = Op;
bool isCustom = false;
// Figure out the correct action; the way to query this varies by opcode
@@ -798,10 +718,15 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
case ISD::INTRINSIC_W_CHAIN:
case ISD::INTRINSIC_WO_CHAIN:
case ISD::INTRINSIC_VOID:
- case ISD::VAARG:
case ISD::STACKSAVE:
Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
break;
+ case ISD::VAARG:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getValueType(0));
+ if (Action != TargetLowering::Promote)
+ Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
+ break;
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
case ISD::EXTRACT_VECTOR_ELT:
@@ -865,7 +790,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
case ISD::FRAME_TO_ARGS_OFFSET:
case ISD::EH_SJLJ_SETJMP:
case ISD::EH_SJLJ_LONGJMP:
- case ISD::EH_SJLJ_DISPATCHSETUP:
// These operations lie about being legal: when they claim to be legal,
// they should actually be expanded.
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
@@ -882,17 +806,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
if (Action == TargetLowering::Legal)
Action = TargetLowering::Custom;
break;
- case ISD::BUILD_VECTOR:
- // A weird case: legalization for BUILD_VECTOR never legalizes the
- // operands!
- // FIXME: This really sucks... changing it isn't semantically incorrect,
- // but it massively pessimizes the code for floating-point BUILD_VECTORs
- // because ConstantFP operands get legalized into constant pool loads
- // before the BUILD_VECTOR code can see them. It doesn't usually bite,
- // though, because BUILD_VECTORS usually get lowered into other nodes
- // which get legalized properly.
- SimpleFinishLegalizing = false;
- break;
default:
if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
Action = TargetLowering::Legal;
@@ -903,22 +816,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
}
if (SimpleFinishLegalizing) {
- SmallVector<SDValue, 8> Ops, ResultVals;
+ SmallVector<SDValue, 8> Ops;
for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
- Ops.push_back(LegalizeOp(Node->getOperand(i)));
+ Ops.push_back(Node->getOperand(i));
switch (Node->getOpcode()) {
default: break;
- case ISD::BR:
- case ISD::BRIND:
- case ISD::BR_JT:
- case ISD::BR_CC:
- case ISD::BRCOND:
- // Branches tweak the chain to include LastCALLSEQ_END
- Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0],
- LastCALLSEQ_END);
- Ops[0] = LegalizeOp(Ops[0]);
- LastCALLSEQ_END = DAG.getEntryNode();
- break;
case ISD::SHL:
case ISD::SRL:
case ISD::SRA:
@@ -926,57 +828,66 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
case ISD::ROTR:
// Legalizing shifts/rotates requires adjusting the shift amount
// to the appropriate width.
- if (!Ops[1].getValueType().isVector())
- Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(),
- Ops[1]));
+ if (!Ops[1].getValueType().isVector()) {
+ SDValue SAO = DAG.getShiftAmountOperand(Ops[0].getValueType(), Ops[1]);
+ HandleSDNode Handle(SAO);
+ LegalizeOp(SAO.getNode());
+ Ops[1] = Handle.getValue();
+ }
break;
case ISD::SRL_PARTS:
case ISD::SRA_PARTS:
case ISD::SHL_PARTS:
// Legalizing shifts/rotates requires adjusting the shift amount
// to the appropriate width.
- if (!Ops[2].getValueType().isVector())
- Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(),
- Ops[2]));
+ if (!Ops[2].getValueType().isVector()) {
+ SDValue SAO = DAG.getShiftAmountOperand(Ops[0].getValueType(), Ops[2]);
+ HandleSDNode Handle(SAO);
+ LegalizeOp(SAO.getNode());
+ Ops[2] = Handle.getValue();
+ }
break;
}
- Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), Ops.data(),
- Ops.size()), 0);
+ SDNode *NewNode = DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size());
+ if (NewNode != Node) {
+ DAG.ReplaceAllUsesWith(Node, NewNode, this);
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ DAG.TransferDbgValues(SDValue(Node, i), SDValue(NewNode, i));
+ ReplacedNode(Node);
+ Node = NewNode;
+ }
switch (Action) {
case TargetLowering::Legal:
- for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
- ResultVals.push_back(Result.getValue(i));
- break;
+ return;
case TargetLowering::Custom:
// FIXME: The handling for custom lowering with multiple results is
// a complete mess.
- Tmp1 = TLI.LowerOperation(Result, DAG);
+ Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG);
if (Tmp1.getNode()) {
+ SmallVector<SDValue, 8> ResultVals;
for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) {
if (e == 1)
ResultVals.push_back(Tmp1);
else
ResultVals.push_back(Tmp1.getValue(i));
}
- break;
+ if (Tmp1.getNode() != Node || Tmp1.getResNo() != 0) {
+ DAG.ReplaceAllUsesWith(Node, ResultVals.data(), this);
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ DAG.TransferDbgValues(SDValue(Node, i), ResultVals[i]);
+ ReplacedNode(Node);
+ }
+ return;
}
// FALL THROUGH
case TargetLowering::Expand:
- ExpandNode(Result.getNode(), ResultVals);
- break;
+ ExpandNode(Node);
+ return;
case TargetLowering::Promote:
- PromoteNode(Result.getNode(), ResultVals);
- break;
- }
- if (!ResultVals.empty()) {
- for (unsigned i = 0, e = ResultVals.size(); i != e; ++i) {
- if (ResultVals[i] != SDValue(Node, i))
- ResultVals[i] = LegalizeOp(ResultVals[i]);
- AddLegalizedOperand(SDValue(Node, i), ResultVals[i]);
- }
- return ResultVals[Op.getResNo()];
+ PromoteNode(Node);
+ return;
}
}
@@ -987,160 +898,24 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Node->dump( &DAG);
dbgs() << "\n";
#endif
- assert(0 && "Do not know how to legalize this operator!");
+ llvm_unreachable("Do not know how to legalize this operator!");
- case ISD::SRA:
- case ISD::SRL:
- case ISD::SHL: {
- // Scalarize vector SRA/SRL/SHL.
- EVT VT = Node->getValueType(0);
- assert(VT.isVector() && "Unable to legalize non-vector shift");
- assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal");
- unsigned NumElem = VT.getVectorNumElements();
-
- SmallVector<SDValue, 8> Scalars;
- for (unsigned Idx = 0; Idx < NumElem; Idx++) {
- SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- VT.getScalarType(),
- Node->getOperand(0), DAG.getIntPtrConstant(Idx));
- SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- VT.getScalarType(),
- Node->getOperand(1), DAG.getIntPtrConstant(Idx));
- Scalars.push_back(DAG.getNode(Node->getOpcode(), dl,
- VT.getScalarType(), Ex, Sh));
- }
- Result = DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0),
- &Scalars[0], Scalars.size());
- break;
- }
-
- case ISD::BUILD_VECTOR:
- switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) {
- default: assert(0 && "This action is not supported yet!");
- case TargetLowering::Custom:
- Tmp3 = TLI.LowerOperation(Result, DAG);
- if (Tmp3.getNode()) {
- Result = Tmp3;
- break;
- }
- // FALLTHROUGH
- case TargetLowering::Expand:
- Result = ExpandBUILD_VECTOR(Result.getNode());
- break;
- }
- break;
- case ISD::CALLSEQ_START: {
- SDNode *CallEnd = FindCallEndFromCallStart(Node);
-
- // Recursively Legalize all of the inputs of the call end that do not lead
- // to this call start. This ensures that any libcalls that need be inserted
- // are inserted *before* the CALLSEQ_START.
- {SmallPtrSet<SDNode*, 32> NodesLeadingTo;
- for (unsigned i = 0, e = CallEnd->getNumOperands(); i != e; ++i)
- LegalizeAllNodesNotLeadingTo(CallEnd->getOperand(i).getNode(), Node,
- NodesLeadingTo);
- }
-
- // Now that we have legalized all of the inputs (which may have inserted
- // libcalls), create the new CALLSEQ_START node.
- Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
-
- // Merge in the last call to ensure that this call starts after the last
- // call ended.
- if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) {
- Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- Tmp1, LastCALLSEQ_END);
- Tmp1 = LegalizeOp(Tmp1);
- }
-
- // Do not try to legalize the target-specific arguments (#1+).
- if (Tmp1 != Node->getOperand(0)) {
- SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
- Ops[0] = Tmp1;
- Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), &Ops[0],
- Ops.size()), Result.getResNo());
- }
-
- // Remember that the CALLSEQ_START is legalized.
- AddLegalizedOperand(Op.getValue(0), Result);
- if (Node->getNumValues() == 2) // If this has a flag result, remember it.
- AddLegalizedOperand(Op.getValue(1), Result.getValue(1));
-
- // Now that the callseq_start and all of the non-call nodes above this call
- // sequence have been legalized, legalize the call itself. During this
- // process, no libcalls can/will be inserted, guaranteeing that no calls
- // can overlap.
- assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!");
- // Note that we are selecting this call!
- LastCALLSEQ_END = SDValue(CallEnd, 0);
- IsLegalizingCall = true;
-
- // Legalize the call, starting from the CALLSEQ_END.
- LegalizeOp(LastCALLSEQ_END);
- assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!");
- return Result;
- }
+ case ISD::CALLSEQ_START:
case ISD::CALLSEQ_END:
- // If the CALLSEQ_START node hasn't been legalized first, legalize it. This
- // will cause this node to be legalized as well as handling libcalls right.
- if (LastCALLSEQ_END.getNode() != Node) {
- LegalizeOp(SDValue(FindCallStartFromCallEnd(Node), 0));
- DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
- assert(I != LegalizedNodes.end() &&
- "Legalizing the call start should have legalized this node!");
- return I->second;
- }
-
- // Otherwise, the call start has been legalized and everything is going
- // according to plan. Just legalize ourselves normally here.
- Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
- // Do not try to legalize the target-specific arguments (#1+), except for
- // an optional flag input.
- if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Glue){
- if (Tmp1 != Node->getOperand(0)) {
- SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
- Ops[0] = Tmp1;
- Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
- &Ops[0], Ops.size()),
- Result.getResNo());
- }
- } else {
- Tmp2 = LegalizeOp(Node->getOperand(Node->getNumOperands()-1));
- if (Tmp1 != Node->getOperand(0) ||
- Tmp2 != Node->getOperand(Node->getNumOperands()-1)) {
- SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
- Ops[0] = Tmp1;
- Ops.back() = Tmp2;
- Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
- &Ops[0], Ops.size()),
- Result.getResNo());
- }
- }
- assert(IsLegalizingCall && "Call sequence imbalance between start/end?");
- // This finishes up call legalization.
- IsLegalizingCall = false;
-
- // If the CALLSEQ_END node has a flag, remember that we legalized it.
- AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0));
- if (Node->getNumValues() == 2)
- AddLegalizedOperand(SDValue(Node, 1), Result.getValue(1));
- return Result.getValue(Op.getResNo());
+ break;
case ISD::LOAD: {
LoadSDNode *LD = cast<LoadSDNode>(Node);
- Tmp1 = LegalizeOp(LD->getChain()); // Legalize the chain.
- Tmp2 = LegalizeOp(LD->getBasePtr()); // Legalize the base pointer.
+ Tmp1 = LD->getChain(); // Legalize the chain.
+ Tmp2 = LD->getBasePtr(); // Legalize the base pointer.
ISD::LoadExtType ExtType = LD->getExtensionType();
if (ExtType == ISD::NON_EXTLOAD) {
EVT VT = Node->getValueType(0);
- Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
- Tmp1, Tmp2, LD->getOffset()),
- Result.getResNo());
- Tmp3 = Result.getValue(0);
- Tmp4 = Result.getValue(1);
+ Tmp3 = SDValue(Node, 0);
+ Tmp4 = SDValue(Node, 1);
switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
- default: assert(0 && "This action is not supported yet!");
+ default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal:
// If this is an unaligned load and the target doesn't support it,
// expand it.
@@ -1148,20 +923,16 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
if (LD->getAlignment() < ABIAlignment){
- Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
- DAG, TLI);
- Tmp3 = Result.getOperand(0);
- Tmp4 = Result.getOperand(1);
- Tmp3 = LegalizeOp(Tmp3);
- Tmp4 = LegalizeOp(Tmp4);
+ ExpandUnalignedLoad(cast<LoadSDNode>(Node),
+ DAG, TLI, Tmp3, Tmp4);
}
}
break;
case TargetLowering::Custom:
Tmp1 = TLI.LowerOperation(Tmp3, DAG);
if (Tmp1.getNode()) {
- Tmp3 = LegalizeOp(Tmp1);
- Tmp4 = LegalizeOp(Tmp1.getValue(1));
+ Tmp3 = Tmp1;
+ Tmp4 = Tmp1.getValue(1);
}
break;
case TargetLowering::Promote: {
@@ -1172,17 +943,19 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getPointerInfo(),
LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
- Tmp3 = LegalizeOp(DAG.getNode(ISD::BITCAST, dl, VT, Tmp1));
- Tmp4 = LegalizeOp(Tmp1.getValue(1));
+ LD->isInvariant(), LD->getAlignment());
+ Tmp3 = DAG.getNode(ISD::BITCAST, dl, VT, Tmp1);
+ Tmp4 = Tmp1.getValue(1);
break;
}
}
- // Since loads produce two values, make sure to remember that we
- // legalized both of them.
- AddLegalizedOperand(SDValue(Node, 0), Tmp3);
- AddLegalizedOperand(SDValue(Node, 1), Tmp4);
- return Op.getResNo() ? Tmp4 : Tmp3;
+ if (Tmp4.getNode() != Node) {
+ assert(Tmp3.getNode() != Node && "Load must be completely replaced");
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp3);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp4);
+ ReplacedNode(Node);
+ }
+ return;
}
EVT SrcVT = LD->getMemoryVT();
@@ -1213,9 +986,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
ISD::LoadExtType NewExtType =
ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
- Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
- Tmp1, Tmp2, LD->getPointerInfo(),
- NVT, isVolatile, isNonTemporal, Alignment);
+ SDValue Result =
+ DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
+ Tmp1, Tmp2, LD->getPointerInfo(),
+ NVT, isVolatile, isNonTemporal, Alignment);
Ch = Result.getValue(1); // The chain.
@@ -1230,8 +1004,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Result.getValueType(), Result,
DAG.getValueType(SrcVT));
- Tmp1 = LegalizeOp(Result);
- Tmp2 = LegalizeOp(Ch);
+ Tmp1 = Result;
+ Tmp2 = Ch;
} else if (SrcWidth & (SrcWidth - 1)) {
// If not loading a power-of-2 number of bits, expand as two loads.
assert(!SrcVT.isVector() && "Unsupported extload!");
@@ -1274,7 +1048,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
TLI.getShiftAmountTy(Hi.getValueType())));
// Join the hi and lo parts.
- Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
} else {
// Big endian - avoid unaligned loads.
// EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
@@ -1304,29 +1078,25 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
TLI.getShiftAmountTy(Hi.getValueType())));
// Join the hi and lo parts.
- Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
}
- Tmp1 = LegalizeOp(Result);
- Tmp2 = LegalizeOp(Ch);
+ Tmp2 = Ch;
} else {
switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
- default: assert(0 && "This action is not supported yet!");
+ default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Custom:
isCustom = true;
// FALLTHROUGH
case TargetLowering::Legal:
- Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
- Tmp1, Tmp2, LD->getOffset()),
- Result.getResNo());
- Tmp1 = Result.getValue(0);
- Tmp2 = Result.getValue(1);
+ Tmp1 = SDValue(Node, 0);
+ Tmp2 = SDValue(Node, 1);
if (isCustom) {
- Tmp3 = TLI.LowerOperation(Result, DAG);
+ Tmp3 = TLI.LowerOperation(SDValue(Node, 0), DAG);
if (Tmp3.getNode()) {
- Tmp1 = LegalizeOp(Tmp3);
- Tmp2 = LegalizeOp(Tmp3.getValue(1));
+ Tmp1 = Tmp3;
+ Tmp2 = Tmp3.getValue(1);
}
} else {
// If this is an unaligned load and the target doesn't support it,
@@ -1337,12 +1107,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
unsigned ABIAlignment =
TLI.getTargetData()->getABITypeAlignment(Ty);
if (LD->getAlignment() < ABIAlignment){
- Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
- DAG, TLI);
- Tmp1 = Result.getOperand(0);
- Tmp2 = Result.getOperand(1);
- Tmp1 = LegalizeOp(Tmp1);
- Tmp2 = LegalizeOp(Tmp2);
+ ExpandUnalignedLoad(cast<LoadSDNode>(Node),
+ DAG, TLI, Tmp1, Tmp2);
}
}
}
@@ -1352,7 +1118,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2,
LD->getPointerInfo(),
LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
+ LD->isInvariant(), LD->getAlignment());
unsigned ExtendOp;
switch (ExtType) {
case ISD::EXTLOAD:
@@ -1363,95 +1129,13 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
default: llvm_unreachable("Unexpected extend load type!");
}
- Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
- Tmp1 = LegalizeOp(Result); // Relegalize new nodes.
- Tmp2 = LegalizeOp(Load.getValue(1));
+ Tmp1 = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
+ Tmp2 = Load.getValue(1);
break;
}
- // If this is a promoted vector load, and the vector element types are
- // legal, then scalarize it.
- if (ExtType == ISD::EXTLOAD && SrcVT.isVector() &&
- TLI.isTypeLegal(Node->getValueType(0).getScalarType())) {
- SmallVector<SDValue, 8> LoadVals;
- SmallVector<SDValue, 8> LoadChains;
- unsigned NumElem = SrcVT.getVectorNumElements();
- unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
-
- for (unsigned Idx=0; Idx<NumElem; Idx++) {
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
- DAG.getIntPtrConstant(Stride));
- SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl,
- Node->getValueType(0).getScalarType(),
- Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride),
- SrcVT.getScalarType(),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
-
- LoadVals.push_back(ScalarLoad.getValue(0));
- LoadChains.push_back(ScalarLoad.getValue(1));
- }
- Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &LoadChains[0], LoadChains.size());
- SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl,
- Node->getValueType(0), &LoadVals[0], LoadVals.size());
-
- Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes.
- Tmp2 = LegalizeOp(Result.getValue(0)); // Relegalize new nodes.
- break;
- }
-
- // If this is a promoted vector load, and the vector element types are
- // illegal, create the promoted vector from bitcasted segments.
- if (ExtType == ISD::EXTLOAD && SrcVT.isVector()) {
- EVT MemElemTy = Node->getValueType(0).getScalarType();
- EVT SrcSclrTy = SrcVT.getScalarType();
- unsigned SizeRatio =
- (MemElemTy.getSizeInBits() / SrcSclrTy.getSizeInBits());
-
- SmallVector<SDValue, 8> LoadVals;
- SmallVector<SDValue, 8> LoadChains;
- unsigned NumElem = SrcVT.getVectorNumElements();
- unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
-
- for (unsigned Idx=0; Idx<NumElem; Idx++) {
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
- DAG.getIntPtrConstant(Stride));
- SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl,
- SrcVT.getScalarType(),
- Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride),
- SrcVT.getScalarType(),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
- if (TLI.isBigEndian()) {
- // MSB (which is garbage, comes first)
- LoadVals.push_back(ScalarLoad.getValue(0));
- for (unsigned i = 0; i<SizeRatio-1; ++i)
- LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType()));
- } else {
- // LSB (which is data, comes first)
- for (unsigned i = 0; i<SizeRatio-1; ++i)
- LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType()));
- LoadVals.push_back(ScalarLoad.getValue(0));
- }
- LoadChains.push_back(ScalarLoad.getValue(1));
- }
-
- Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &LoadChains[0], LoadChains.size());
- EVT TempWideVector = EVT::getVectorVT(*DAG.getContext(),
- SrcVT.getScalarType(), NumElem*SizeRatio);
- SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl,
- TempWideVector, &LoadVals[0], LoadVals.size());
-
- // Cast to the correct type
- ValRes = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), ValRes);
-
- Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes.
- Tmp2 = LegalizeOp(Result.getValue(0)); // Relegalize new nodes.
- break;
-
- }
+ assert(!SrcVT.isVector() &&
+ "Vector Loads are handled in LegalizeVectorOps");
// FIXME: This does not work for vectors on most targets. Sign- and
// zero-extend operations are currently folded into extending loads,
@@ -1461,10 +1145,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
"EXTLOAD should always be supported!");
// Turn the unsupported load into an EXTLOAD followed by an explicit
// zero/sign extend inreg.
- Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
- Tmp1, Tmp2, LD->getPointerInfo(), SrcVT,
- LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
+ SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
+ Tmp1, Tmp2, LD->getPointerInfo(), SrcVT,
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
SDValue ValRes;
if (ExtType == ISD::SEXTLOAD)
ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
@@ -1472,42 +1156,41 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Result, DAG.getValueType(SrcVT));
else
ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType());
- Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes.
- Tmp2 = LegalizeOp(Result.getValue(1)); // Relegalize new nodes.
+ Tmp1 = ValRes;
+ Tmp2 = Result.getValue(1);
break;
}
}
// Since loads produce two values, make sure to remember that we legalized
// both of them.
- AddLegalizedOperand(SDValue(Node, 0), Tmp1);
- AddLegalizedOperand(SDValue(Node, 1), Tmp2);
- return Op.getResNo() ? Tmp2 : Tmp1;
+ if (Tmp2.getNode() != Node) {
+ assert(Tmp1.getNode() != Node && "Load must be completely replaced");
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp1);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp2);
+ ReplacedNode(Node);
+ }
+ break;
}
case ISD::STORE: {
StoreSDNode *ST = cast<StoreSDNode>(Node);
- Tmp1 = LegalizeOp(ST->getChain()); // Legalize the chain.
- Tmp2 = LegalizeOp(ST->getBasePtr()); // Legalize the pointer.
+ Tmp1 = ST->getChain();
+ Tmp2 = ST->getBasePtr();
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
if (!ST->isTruncatingStore()) {
if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
- Result = SDValue(OptStore, 0);
+ ReplaceNode(ST, OptStore);
break;
}
{
- Tmp3 = LegalizeOp(ST->getValue());
- Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
- Tmp1, Tmp3, Tmp2,
- ST->getOffset()),
- Result.getResNo());
-
+ Tmp3 = ST->getValue();
EVT VT = Tmp3.getValueType();
switch (TLI.getOperationAction(ISD::STORE, VT)) {
- default: assert(0 && "This action is not supported yet!");
+ default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal:
// If this is an unaligned store and the target doesn't support it,
// expand it.
@@ -1515,27 +1198,31 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
if (ST->getAlignment() < ABIAlignment)
- Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()),
- DAG, TLI);
+ ExpandUnalignedStore(cast<StoreSDNode>(Node),
+ DAG, TLI, this);
}
break;
case TargetLowering::Custom:
- Tmp1 = TLI.LowerOperation(Result, DAG);
- if (Tmp1.getNode()) Result = Tmp1;
+ Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG);
+ if (Tmp1.getNode())
+ ReplaceNode(SDValue(Node, 0), Tmp1);
break;
- case TargetLowering::Promote:
+ case TargetLowering::Promote: {
assert(VT.isVector() && "Unknown legal promote case!");
Tmp3 = DAG.getNode(ISD::BITCAST, dl,
TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3);
- Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
- ST->getPointerInfo(), isVolatile,
- isNonTemporal, Alignment);
+ SDValue Result =
+ DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
+ ST->getPointerInfo(), isVolatile,
+ isNonTemporal, Alignment);
+ ReplaceNode(SDValue(Node, 0), Result);
break;
}
+ }
break;
}
} else {
- Tmp3 = LegalizeOp(ST->getValue());
+ Tmp3 = ST->getValue();
EVT StVT = ST->getMemoryVT();
unsigned StWidth = StVT.getSizeInBits();
@@ -1547,8 +1234,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
StVT.getStoreSizeInBits());
Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT);
- Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
- NVT, isVolatile, isNonTemporal, Alignment);
+ SDValue Result =
+ DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ NVT, isVolatile, isNonTemporal, Alignment);
+ ReplaceNode(SDValue(Node, 0), Result);
} else if (StWidth & (StWidth - 1)) {
// If not storing a power-of-2 number of bits, expand as two stores.
assert(!StVT.isVector() && "Unsupported truncstore!");
@@ -1602,17 +1291,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
}
// The order of the stores doesn't matter.
- Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ ReplaceNode(SDValue(Node, 0), Result);
} else {
- if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() ||
- Tmp2 != ST->getBasePtr())
- Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
- Tmp1, Tmp3, Tmp2,
- ST->getOffset()),
- Result.getResNo());
-
switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
- default: assert(0 && "This action is not supported yet!");
+ default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal:
// If this is an unaligned store and the target doesn't support it,
// expand it.
@@ -1620,120 +1303,24 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
if (ST->getAlignment() < ABIAlignment)
- Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()),
- DAG, TLI);
+ ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
}
break;
case TargetLowering::Custom:
- Result = TLI.LowerOperation(Result, DAG);
+ ReplaceNode(SDValue(Node, 0),
+ TLI.LowerOperation(SDValue(Node, 0), DAG));
break;
case TargetLowering::Expand:
-
- EVT WideScalarVT = Tmp3.getValueType().getScalarType();
- EVT NarrowScalarVT = StVT.getScalarType();
-
- if (StVT.isVector()) {
- unsigned NumElem = StVT.getVectorNumElements();
- // The type of the data we want to save
- EVT RegVT = Tmp3.getValueType();
- EVT RegSclVT = RegVT.getScalarType();
- // The type of data as saved in memory.
- EVT MemSclVT = StVT.getScalarType();
-
- bool RegScalarLegal = TLI.isTypeLegal(RegSclVT);
- bool MemScalarLegal = TLI.isTypeLegal(MemSclVT);
-
- // We need to expand this store. If the register element type
- // is legal then we can scalarize the vector and use
- // truncating stores.
- if (RegScalarLegal) {
- // Cast floats into integers
- unsigned ScalarSize = MemSclVT.getSizeInBits();
- EVT EltVT = EVT::getIntegerVT(*DAG.getContext(), ScalarSize);
-
- // Round odd types to the next pow of two.
- if (!isPowerOf2_32(ScalarSize))
- ScalarSize = NextPowerOf2(ScalarSize);
-
- // Store Stride in bytes
- unsigned Stride = ScalarSize/8;
- // Extract each of the elements from the original vector
- // and save them into memory individually.
- SmallVector<SDValue, 8> Stores;
- for (unsigned Idx = 0; Idx < NumElem; Idx++) {
- SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- RegSclVT, Tmp3, DAG.getIntPtrConstant(Idx));
-
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
- DAG.getIntPtrConstant(Stride));
-
- // This scalar TruncStore may be illegal, but we lehalize it
- // later.
- SDValue Store = DAG.getTruncStore(Tmp1, dl, Ex, Tmp2,
- ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT,
- isVolatile, isNonTemporal, Alignment);
-
- Stores.push_back(Store);
- }
-
- Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &Stores[0], Stores.size());
- break;
- }
-
- // The scalar register type is illegal.
- // For example saving <2 x i64> -> <2 x i32> on a x86.
- // In here we bitcast the value into a vector of smaller parts and
- // save it using smaller scalars.
- if (!RegScalarLegal && MemScalarLegal) {
- // Store Stride in bytes
- unsigned Stride = MemSclVT.getSizeInBits()/8;
-
- unsigned SizeRatio =
- (RegSclVT.getSizeInBits() / MemSclVT.getSizeInBits());
-
- EVT CastValueVT = EVT::getVectorVT(*DAG.getContext(),
- MemSclVT,
- SizeRatio * NumElem);
-
- // Cast the wide elem vector to wider vec with smaller elem type.
- // Example <2 x i64> -> <4 x i32>
- Tmp3 = DAG.getNode(ISD::BITCAST, dl, CastValueVT, Tmp3);
-
- SmallVector<SDValue, 8> Stores;
- for (unsigned Idx=0; Idx < NumElem * SizeRatio; Idx++) {
- // Extract the Ith element.
- SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- NarrowScalarVT, Tmp3, DAG.getIntPtrConstant(Idx));
- // Bump pointer.
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
- DAG.getIntPtrConstant(Stride));
-
- // Store if, this element is:
- // - First element on big endian, or
- // - Last element on little endian
- if (( TLI.isBigEndian() && (Idx % SizeRatio == 0)) ||
- ((!TLI.isBigEndian() && (Idx % SizeRatio == SizeRatio-1)))) {
- SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2,
- ST->getPointerInfo().getWithOffset(Idx*Stride),
- isVolatile, isNonTemporal, Alignment);
- Stores.push_back(Store);
- }
- }
- Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &Stores[0], Stores.size());
- break;
- }
-
- assert(false && "Unable to legalize the vector trunc store!");
- }// is vector
-
+ assert(!StVT.isVector() &&
+ "Vector Stores are handled in LegalizeVectorOps");
// TRUNCSTORE:i16 i32 -> STORE i16
assert(TLI.isTypeLegal(StVT) && "Do not know how to expand this store!");
Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3);
- Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment);
+ SDValue Result =
+ DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
+ ReplaceNode(SDValue(Node, 0), Result);
break;
}
}
@@ -1741,17 +1328,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
break;
}
}
- assert(Result.getValueType() == Op.getValueType() &&
- "Bad legalization!");
-
- // Make sure that the generated code is itself legal.
- if (Result != Op)
- Result = LegalizeOp(Result);
-
- // Note that LegalizeOp may be reentered even from single-use nodes, which
- // means that we always must cache transformed nodes.
- AddLegalizedOperand(Op, Result);
- return Result;
}
SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
@@ -1778,7 +1354,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
if (Op.getValueType().isVector())
return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
MachinePointerInfo(),
Vec.getValueType().getVectorElementType(),
@@ -1826,7 +1402,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
// Finally, load the updated vector.
return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo,
- false, false, 0);
+ false, false, false, 0);
}
SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
@@ -1876,7 +1452,8 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
StoreChain = DAG.getEntryNode();
// Result is a load from the stack slot.
- return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, false, false, 0);
+ return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo,
+ false, false, false, 0);
}
SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
@@ -1905,7 +1482,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
assert(FloatVT.isByteSized() && "Unsupported floating point type!");
// Load out a legal integer with the same sign bit as the float.
SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
} else { // Little endian
SDValue LoadPtr = StackPtr;
// The float may be wider than the integer we are going to load. Advance
@@ -1916,7 +1493,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
LoadPtr, DAG.getIntPtrConstant(ByteOffset));
// Load a legal integer containing the sign bit.
SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
// Move the sign bit to the top bit of the loaded integer.
unsigned BitShift = LoadTy.getSizeInBits() -
(FloatVT.getSizeInBits() - 8 * ByteOffset);
@@ -1984,7 +1561,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
EVT OpVT = LHS.getValueType();
ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
switch (TLI.getCondCodeAction(CCCode, OpVT)) {
- default: assert(0 && "Unknown condition code action!");
+ default: llvm_unreachable("Unknown condition code action!");
case TargetLowering::Legal:
// Nothing to do.
break;
@@ -1992,7 +1569,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
unsigned Opc = 0;
switch (CCCode) {
- default: assert(0 && "Don't know how to expand this condition!");
+ default: llvm_unreachable("Don't know how to expand this condition!");
case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO; Opc = ISD::AND; break;
case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO; Opc = ISD::AND; break;
case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO; Opc = ISD::AND; break;
@@ -2058,7 +1635,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
// Result is a load from the stack slot.
if (SlotSize == DestSize)
return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo,
- false, false, DestAlign);
+ false, false, false, DestAlign);
assert(SlotSize < DestSize && "Unknown extension!");
return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr,
@@ -2081,7 +1658,7 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
false, false, 0);
return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr,
MachinePointerInfo::getFixedStack(SPFI),
- false, false, 0);
+ false, false, false, 0);
}
@@ -2127,7 +1704,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
// If all elements are constants, create a load from the constant pool.
if (isConstant) {
- std::vector<Constant*> CV;
+ SmallVector<Constant*, 16> CV;
for (unsigned i = 0, e = NumElems; i != e; ++i) {
if (ConstantFPSDNode *V =
dyn_cast<ConstantFPSDNode>(Node->getOperand(i))) {
@@ -2155,7 +1732,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
- false, false, Alignment);
+ false, false, false, Alignment);
}
if (!MoreThanTwoValues) {
@@ -2190,12 +1767,6 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
// and leave the Hi part unset.
SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
bool isSigned) {
- assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
- // The input chain to this libcall is the entry node of the function.
- // Legalizing the call will automatically add the previous call to the
- // dependence.
- SDValue InChain = DAG.getEntryNode();
-
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
@@ -2209,26 +1780,31 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
TLI.getPointerTy());
- // Splice the libcall in wherever FindInputOutputChains tells us to.
Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+ // By default, the input chain to this libcall is the entry node of the
+ // function. If the libcall is going to be emitted as a tail call then
+ // TLI.isUsedByReturnOnly will change it to the right chain if the return
+ // node which is being folded has a non-entry input chain.
+ SDValue InChain = DAG.getEntryNode();
+
// isTailCall may be true since the callee does not reference caller stack
// frame. Check if it's in the right position.
- bool isTailCall = isInTailCallPosition(DAG, Node, TLI);
+ SDValue TCChain = InChain;
+ bool isTailCall = isInTailCallPosition(DAG, Node, TCChain, TLI);
+ if (isTailCall)
+ InChain = TCChain;
+
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
0, TLI.getLibcallCallingConv(LC), isTailCall,
- /*isReturnValueUsed=*/true,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
Callee, Args, DAG, Node->getDebugLoc());
if (!CallInfo.second.getNode())
// It's a tailcall, return the chain (which is the DAG root).
return DAG.getRoot();
- // Legalize the call sequence, starting with the chain. This will advance
- // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
- // was added by LowerCallTo (guaranteeing proper serialization of calls).
- LegalizeOp(CallInfo.second);
return CallInfo.first;
}
@@ -2254,15 +1830,10 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
std::pair<SDValue,SDValue> CallInfo =
TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
- false, 0, TLI.getLibcallCallingConv(LC), false,
- /*isReturnValueUsed=*/true,
+ false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
Callee, Args, DAG, dl);
- // Legalize the call sequence, starting with the chain. This will advance
- // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
- // was added by LowerCallTo (guaranteeing proper serialization of calls).
- LegalizeOp(CallInfo.second);
-
return CallInfo.first;
}
@@ -2272,7 +1843,6 @@ std::pair<SDValue, SDValue>
SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
SDNode *Node,
bool isSigned) {
- assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
SDValue InChain = Node->getOperand(0);
TargetLowering::ArgListTy Args;
@@ -2289,18 +1859,13 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
TLI.getPointerTy());
- // Splice the libcall in wherever FindInputOutputChains tells us to.
Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
- /*isReturnValueUsed=*/true,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
Callee, Args, DAG, Node->getDebugLoc());
- // Legalize the call sequence, starting with the chain. This will advance
- // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
- // was added by LowerCallTo (guaranteeing proper serialization of calls).
- LegalizeOp(CallInfo.second);
return CallInfo;
}
@@ -2311,7 +1876,7 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
RTLIB::Libcall Call_PPCF128) {
RTLIB::Libcall LC;
switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
- default: assert(0 && "Unexpected request for libcall!");
+ default: llvm_unreachable("Unexpected request for libcall!");
case MVT::f32: LC = Call_F32; break;
case MVT::f64: LC = Call_F64; break;
case MVT::f80: LC = Call_F80; break;
@@ -2328,7 +1893,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
RTLIB::Libcall Call_I128) {
RTLIB::Libcall LC;
switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
- default: assert(0 && "Unexpected request for libcall!");
+ default: llvm_unreachable("Unexpected request for libcall!");
case MVT::i8: LC = Call_I8; break;
case MVT::i16: LC = Call_I16; break;
case MVT::i32: LC = Call_I32; break;
@@ -2343,7 +1908,7 @@ static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
const TargetLowering &TLI) {
RTLIB::Libcall LC;
switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
- default: assert(0 && "Unexpected request for libcall!");
+ default: llvm_unreachable("Unexpected request for libcall!");
case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
@@ -2388,7 +1953,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
RTLIB::Libcall LC;
switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
- default: assert(0 && "Unexpected request for libcall!");
+ default: llvm_unreachable("Unexpected request for libcall!");
case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
@@ -2426,21 +1991,16 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
TLI.getPointerTy());
- // Splice the libcall in wherever FindInputOutputChains tells us to.
DebugLoc dl = Node->getDebugLoc();
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
- /*isReturnValueUsed=*/true, Callee, Args, DAG, dl);
-
- // Legalize the call sequence, starting with the chain. This will advance
- // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that
- // was added by LowerCallTo (guaranteeing proper serialization of calls).
- LegalizeOp(CallInfo.second);
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, dl);
// Remainder is loaded back from the stack frame.
- SDValue Rem = DAG.getLoad(RetVT, dl, LastCALLSEQ_END, FIPtr,
- MachinePointerInfo(), false, false, 0);
+ SDValue Rem = DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr,
+ MachinePointerInfo(), false, false, false, 0);
Results.push_back(CallInfo.first);
Results.push_back(Rem);
}
@@ -2489,7 +2049,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
false, false, 0);
// load the constructed double
SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot,
- MachinePointerInfo(), false, false, 0);
+ MachinePointerInfo(), false, false, false, 0);
// FP constant to bias correct the final result
SDValue Bias = DAG.getConstantFP(isSigned ?
BitsToDouble(0x4330000080000000ULL) :
@@ -2611,7 +2171,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
// offset depending on the data type.
uint64_t FF;
switch (Op0.getValueType().getSimpleVT().SimpleTy) {
- default: assert(0 && "Unsupported integer type!");
+ default: llvm_unreachable("Unsupported integer type!");
case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float)
case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float)
case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float)
@@ -2629,13 +2189,15 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
if (DestVT == MVT::f32)
FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
- false, false, Alignment);
+ false, false, false, Alignment);
else {
- FudgeInReg =
- LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
- DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(),
- MVT::f32, false, false, Alignment));
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
+ DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(),
+ MVT::f32, false, false, Alignment);
+ HandleSDNode Handle(Load);
+ LegalizeOp(Load.getNode());
+ FudgeInReg = Handle.getValue();
}
return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg);
@@ -2731,7 +2293,7 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {
EVT SHVT = TLI.getShiftAmountTy(VT);
SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
switch (VT.getSimpleVT().SimpleTy) {
- default: assert(0 && "Unhandled Expand type in BSWAP!");
+ default: llvm_unreachable("Unhandled Expand type in BSWAP!");
case MVT::i16:
Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
@@ -2788,7 +2350,7 @@ static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {
SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
DebugLoc dl) {
switch (Opc) {
- default: assert(0 && "Cannot expand this yet!");
+ default: llvm_unreachable("Cannot expand this yet!");
case ISD::CTPOP: {
EVT VT = Op.getValueType();
EVT ShVT = TLI.getShiftAmountTy(VT);
@@ -2831,6 +2393,9 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
return Op;
}
+ case ISD::CTLZ_ZERO_UNDEF:
+ // This trivially expands to CTLZ.
+ return DAG.getNode(ISD::CTLZ, dl, Op.getValueType(), Op);
case ISD::CTLZ: {
// for now, we do this:
// x = x | (x >> 1);
@@ -2852,6 +2417,9 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
Op = DAG.getNOT(dl, Op, VT);
return DAG.getNode(ISD::CTPOP, dl, VT, Op);
}
+ case ISD::CTTZ_ZERO_UNDEF:
+ // This trivially expands to CTTZ.
+ return DAG.getNode(ISD::CTTZ, dl, Op.getValueType(), Op);
case ISD::CTTZ: {
// for now, we use: { return popcount(~x & (x - 1)); }
// unless the target has ctlz but not ctpop, in which case we use:
@@ -2881,7 +2449,6 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
switch (Opc) {
default:
llvm_unreachable("Unhandled atomic intrinsic Expand!");
- break;
case ISD::ATOMIC_SWAP:
switch (VT.SimpleTy) {
default: llvm_unreachable("Unexpected value type for atomic!");
@@ -2959,14 +2526,16 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
return ExpandChainLibCall(LC, Node, false);
}
-void SelectionDAGLegalize::ExpandNode(SDNode *Node,
- SmallVectorImpl<SDValue> &Results) {
+void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
+ SmallVector<SDValue, 8> Results;
DebugLoc dl = Node->getDebugLoc();
SDValue Tmp1, Tmp2, Tmp3, Tmp4;
switch (Node->getOpcode()) {
case ISD::CTPOP:
case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl);
Results.push_back(Tmp1);
break;
@@ -2986,7 +2555,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
case ISD::PREFETCH:
case ISD::VAEND:
case ISD::EH_SJLJ_LONGJMP:
- case ISD::EH_SJLJ_DISPATCHSETUP:
// If the target didn't expand these, there's nothing to do, so just
// preserve the chain and be done.
Results.push_back(Node->getOperand(0));
@@ -3006,7 +2574,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
false, false, false, false, 0, CallingConv::C,
/*isTailCall=*/false,
- /*isReturnValueUsed=*/true,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__sync_synchronize",
TLI.getPointerTy()),
Args, DAG, dl);
@@ -3083,7 +2651,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
false, false, false, false, 0, CallingConv::C,
/*isTailCall=*/false,
- /*isReturnValueUsed=*/true,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
DAG.getExternalSymbol("abort", TLI.getPointerTy()),
Args, DAG, dl);
Results.push_back(CallResult.second);
@@ -3166,7 +2734,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
unsigned Align = Node->getConstantOperandVal(3);
SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2,
- MachinePointerInfo(V), false, false, 0);
+ MachinePointerInfo(V),
+ false, false, false, 0);
SDValue VAList = VAListLoad;
if (Align > TLI.getMinStackArgumentAlignment()) {
@@ -3191,7 +2760,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
MachinePointerInfo(V), false, false, 0);
// Load the actual argument out of the pointer VAList
Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(),
- false, false, 0));
+ false, false, false, 0));
Results.push_back(Results[0].getValue(1));
break;
}
@@ -3202,7 +2771,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0),
Node->getOperand(2), MachinePointerInfo(VS),
- false, false, 0);
+ false, false, false, 0);
Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1),
MachinePointerInfo(VD), false, false, 0);
Results.push_back(Tmp1);
@@ -3236,15 +2805,57 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
Node->getOperand(2), dl));
break;
case ISD::VECTOR_SHUFFLE: {
- SmallVector<int, 8> Mask;
- cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
+ SmallVector<int, 32> NewMask;
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Node)->getMask();
EVT VT = Node->getValueType(0);
EVT EltVT = VT.getVectorElementType();
- if (!TLI.isTypeLegal(EltVT))
- EltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT);
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ if (!TLI.isTypeLegal(EltVT)) {
+
+ EVT NewEltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT);
+
+ // BUILD_VECTOR operands are allowed to be wider than the element type.
+ // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept it
+ if (NewEltVT.bitsLT(EltVT)) {
+
+ // Convert shuffle node.
+ // If original node was v4i64 and the new EltVT is i32,
+ // cast operands to v8i32 and re-build the mask.
+
+ // Calculate new VT, the size of the new VT should be equal to original.
+ EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT,
+ VT.getSizeInBits()/NewEltVT.getSizeInBits());
+ assert(NewVT.bitsEq(VT));
+
+ // cast operands to new VT
+ Op0 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op0);
+ Op1 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op1);
+
+ // Convert the shuffle mask
+ unsigned int factor = NewVT.getVectorNumElements()/VT.getVectorNumElements();
+
+ // EltVT gets smaller
+ assert(factor > 0);
+
+ for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
+ if (Mask[i] < 0) {
+ for (unsigned fi = 0; fi < factor; ++fi)
+ NewMask.push_back(Mask[i]);
+ }
+ else {
+ for (unsigned fi = 0; fi < factor; ++fi)
+ NewMask.push_back(Mask[i]*factor+fi);
+ }
+ }
+ Mask = NewMask;
+ VT = NewVT;
+ }
+ EltVT = NewEltVT;
+ }
unsigned NumElems = VT.getVectorNumElements();
- SmallVector<SDValue, 8> Ops;
+ SmallVector<SDValue, 16> Ops;
for (unsigned i = 0; i != NumElems; ++i) {
if (Mask[i] < 0) {
Ops.push_back(DAG.getUNDEF(EltVT));
@@ -3253,14 +2864,17 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
unsigned Idx = Mask[i];
if (Idx < NumElems)
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
- Node->getOperand(0),
+ Op0,
DAG.getIntPtrConstant(Idx)));
else
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
- Node->getOperand(1),
+ Op1,
DAG.getIntPtrConstant(Idx - NumElems)));
}
+
Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
+ // We may have changed the BUILD_VECTOR type. Cast it back to the Node type.
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), Tmp1);
Results.push_back(Tmp1);
break;
}
@@ -3408,10 +3022,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node);
// Check to see if this FP immediate is already legal.
// If this is a legal constant, turn it into a TargetConstantFP node.
- if (TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0)))
- Results.push_back(SDValue(Node, 0));
- else
- Results.push_back(ExpandConstantFP(CFP, true, DAG, TLI));
+ if (!TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0)))
+ Results.push_back(ExpandConstantFP(CFP, true));
break;
}
case ISD::EHSELECTION: {
@@ -3423,13 +3035,23 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
break;
}
case ISD::EXCEPTIONADDR: {
- unsigned Reg = TLI.getExceptionAddressRegister();
+ unsigned Reg = TLI.getExceptionPointerRegister();
assert(Reg && "Can't expand to unknown register!");
Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, Reg,
Node->getValueType(0)));
Results.push_back(Results[0].getValue(1));
break;
}
+ case ISD::FSUB: {
+ EVT VT = Node->getValueType(0);
+ assert(TLI.isOperationLegalOrCustom(ISD::FADD, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
+ "Don't know how to expand this FP subtraction!");
+ Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1));
+ Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1);
+ Results.push_back(Tmp1);
+ break;
+ }
case ISD::SUB: {
EVT VT = Node->getValueType(0);
assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
@@ -3657,6 +3279,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
DAG.getIntPtrConstant(0));
TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
DAG.getIntPtrConstant(1));
+ // Ret is a node with an illegal type. Because such things are not
+ // generally permitted during this phase of legalization, delete the
+ // node. The above EXTRACT_ELEMENT nodes should have been folded.
+ DAG.DeleteNode(Ret.getNode());
}
if (isSigned) {
@@ -3797,7 +3423,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()),
Tmp2, Tmp3, Tmp4, dl);
- LastCALLSEQ_END = DAG.getEntryNode();
assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!");
Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
@@ -3807,6 +3432,35 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
Results.push_back(Tmp1);
break;
}
+ case ISD::BUILD_VECTOR:
+ Results.push_back(ExpandBUILD_VECTOR(Node));
+ break;
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::SHL: {
+ // Scalarize vector SRA/SRL/SHL.
+ EVT VT = Node->getValueType(0);
+ assert(VT.isVector() && "Unable to legalize non-vector shift");
+ assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal");
+ unsigned NumElem = VT.getVectorNumElements();
+
+ SmallVector<SDValue, 8> Scalars;
+ for (unsigned Idx = 0; Idx < NumElem; Idx++) {
+ SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ VT.getScalarType(),
+ Node->getOperand(0), DAG.getIntPtrConstant(Idx));
+ SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ VT.getScalarType(),
+ Node->getOperand(1), DAG.getIntPtrConstant(Idx));
+ Scalars.push_back(DAG.getNode(Node->getOpcode(), dl,
+ VT.getScalarType(), Ex, Sh));
+ }
+ SDValue Result =
+ DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0),
+ &Scalars[0], Scalars.size());
+ ReplaceNode(SDValue(Node, 0), Result);
+ break;
+ }
case ISD::GLOBAL_OFFSET_TABLE:
case ISD::GlobalAddress:
case ISD::GlobalTLSAddress:
@@ -3817,13 +3471,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
case ISD::INTRINSIC_WO_CHAIN:
case ISD::INTRINSIC_VOID:
// FIXME: Custom lowering for these operations shouldn't return null!
- for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
- Results.push_back(SDValue(Node, i));
break;
}
+
+ // Replace the original node with the legalized result.
+ if (!Results.empty())
+ ReplaceNode(Node, Results.data());
}
-void SelectionDAGLegalize::PromoteNode(SDNode *Node,
- SmallVectorImpl<SDValue> &Results) {
+
+void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
+ SmallVector<SDValue, 8> Results;
EVT OVT = Node->getValueType(0);
if (Node->getOpcode() == ISD::UINT_TO_FP ||
Node->getOpcode() == ISD::SINT_TO_FP ||
@@ -3835,20 +3492,24 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
SDValue Tmp1, Tmp2, Tmp3;
switch (Node->getOpcode()) {
case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTPOP:
// Zero extend the argument.
Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
- // Perform the larger operation.
+ // Perform the larger operation. For CTPOP and CTTZ_ZERO_UNDEF, this is
+ // already the correct result.
Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
if (Node->getOpcode() == ISD::CTTZ) {
- //if Tmp1 == sizeinbits(NVT) then Tmp1 = sizeinbits(Old VT)
+ // FIXME: This should set a bit in the zero extended value instead.
Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT),
Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT),
ISD::SETEQ);
Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2,
DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1);
- } else if (Node->getOpcode() == ISD::CTLZ) {
+ } else if (Node->getOpcode() == ISD::CTLZ ||
+ Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
// Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT))
Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1,
DAG.getConstant(NVT.getSizeInBits() -
@@ -3877,6 +3538,33 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
Node->getOpcode() == ISD::SINT_TO_FP, dl);
Results.push_back(Tmp1);
break;
+ case ISD::VAARG: {
+ SDValue Chain = Node->getOperand(0); // Get the chain.
+ SDValue Ptr = Node->getOperand(1); // Get the pointer.
+
+ unsigned TruncOp;
+ if (OVT.isVector()) {
+ TruncOp = ISD::BITCAST;
+ } else {
+ assert(OVT.isInteger()
+ && "VAARG promotion is supported only for vectors or integer types");
+ TruncOp = ISD::TRUNCATE;
+ }
+
+ // Perform the larger operation, then convert back
+ Tmp1 = DAG.getVAArg(NVT, dl, Chain, Ptr, Node->getOperand(2),
+ Node->getConstantOperandVal(3));
+ Chain = Tmp1.getValue(1);
+
+ Tmp2 = DAG.getNode(TruncOp, dl, OVT, Tmp1);
+
+ // Modified the chain result - switch anything that used the old chain to
+ // use the new one.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp2);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain);
+ ReplacedNode(Node);
+ break;
+ }
case ISD::AND:
case ISD::OR:
case ISD::XOR: {
@@ -3924,8 +3612,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
break;
}
case ISD::VECTOR_SHUFFLE: {
- SmallVector<int, 8> Mask;
- cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Node)->getMask();
// Cast the two input vectors.
Tmp1 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(0));
@@ -3950,7 +3637,31 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
Tmp1, Tmp2, Node->getOperand(2)));
break;
}
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::FPOW: {
+ Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
+ Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
+ Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
+ Tmp3, DAG.getIntPtrConstant(0)));
+ break;
}
+ case ISD::FLOG2:
+ case ISD::FEXP2:
+ case ISD::FLOG:
+ case ISD::FEXP: {
+ Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
+ Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
+ Tmp2, DAG.getIntPtrConstant(0)));
+ break;
+ }
+ }
+
+ // Replace the original node with the legalized result.
+ if (!Results.empty())
+ ReplaceNode(Node, Results.data());
}
// SelectionDAG::Legalize - This is the entry point for the file.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 7c1cc69d6a2f..e3938968b205 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -479,8 +479,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
if (L->getExtensionType() == ISD::NON_EXTLOAD) {
NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(),
NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(),
- L->getPointerInfo(), NVT,
- L->isVolatile(), L->isNonTemporal(), L->getAlignment());
+ L->getPointerInfo(), NVT, L->isVolatile(),
+ L->isNonTemporal(), false, L->getAlignment());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
@@ -492,7 +492,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
L->getMemoryVT(), dl, L->getChain(),
L->getBasePtr(), L->getOffset(), L->getPointerInfo(),
L->getMemoryVT(), L->isVolatile(),
- L->isNonTemporal(), L->getAlignment());
+ L->isNonTemporal(), false, L->getAlignment());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
@@ -672,7 +672,7 @@ void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
case ISD::SETUEQ:
LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64;
break;
- default: assert(false && "Do not know how to soften this setcc!");
+ default: llvm_unreachable("Do not know how to soften this setcc!");
}
}
@@ -1212,7 +1212,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
switch (SrcVT.getSimpleVT().SimpleTy) {
default:
- assert(false && "Unsupported UINT_TO_FP!");
+ llvm_unreachable("Unsupported UINT_TO_FP!");
case MVT::i32:
Parts = TwoE32;
break;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index a5c4c2ded4c5..95ddb1e0f6fb 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -20,7 +20,6 @@
#include "LegalizeTypes.h"
#include "llvm/DerivedTypes.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -57,8 +56,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::Constant: Res = PromoteIntRes_Constant(N); break;
case ISD::CONVERT_RNDSAT:
Res = PromoteIntRes_CONVERT_RNDSAT(N); break;
+ case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break;
case ISD::CTPOP: Res = PromoteIntRes_CTPOP(N); break;
+ case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break;
case ISD::EXTRACT_VECTOR_ELT:
Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break;
@@ -211,13 +212,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
switch (getTypeAction(InVT)) {
- default:
- assert(false && "Unknown type action!");
- break;
case TargetLowering::TypeLegal:
break;
case TargetLowering::TypePromoteInteger:
- if (NOutVT.bitsEq(NInVT))
+ if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector() && !NInVT.isVector())
// The input promotes to the same size. Convert the promoted value.
return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp));
break;
@@ -251,9 +249,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp);
}
case TargetLowering::TypeWidenVector:
- if (OutVT.bitsEq(NInVT))
- // The input is widened to the same size. Convert to the widened value.
- return DAG.getNode(ISD::BITCAST, dl, OutVT, GetWidenedVector(InOp));
+ // The input is widened to the same size. Convert to the widened value.
+ // Make sure that the outgoing value is not a vector, because this would
+ // make us bitcast between two vectors which are legalized in different ways.
+ if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector())
+ return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetWidenedVector(InOp));
}
return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
@@ -312,7 +312,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
EVT OVT = N->getValueType(0);
EVT NVT = Op.getValueType();
- Op = DAG.getNode(ISD::CTLZ, dl, NVT, Op);
+ Op = DAG.getNode(N->getOpcode(), dl, NVT, Op);
// Subtract off the extra leading bits in the bigger type.
return DAG.getNode(ISD::SUB, dl, NVT, Op,
DAG.getConstant(NVT.getSizeInBits() -
@@ -330,13 +330,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
EVT OVT = N->getValueType(0);
EVT NVT = Op.getValueType();
DebugLoc dl = N->getDebugLoc();
- // The count is the same in the promoted type except if the original
- // value was zero. This can be handled by setting the bit just off
- // the top of the original type.
- APInt TopBit(NVT.getSizeInBits(), 0);
- TopBit.setBit(OVT.getSizeInBits());
- Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT));
- return DAG.getNode(ISD::CTTZ, dl, NVT, Op);
+ if (N->getOpcode() == ISD::CTTZ) {
+ // The count is the same in the promoted type except if the original
+ // value was zero. This can be handled by setting the bit just off
+ // the top of the original type.
+ APInt TopBit(NVT.getSizeInBits(), 0);
+ TopBit.setBit(OVT.getSizeInBits());
+ Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT));
+ }
+ return DAG.getNode(N->getOpcode(), dl, NVT, Op);
}
SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {
@@ -486,7 +488,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) {
- SDValue Mask = GetPromotedInteger(N->getOperand(0));
+ SDValue Mask = N->getOperand(0);
+ EVT OpTy = N->getOperand(1).getValueType();
+
+ // Promote all the way up to the canonical SetCC type.
+ Mask = PromoteTargetBoolean(Mask, TLI.getSetCCResultType(OpTy));
SDValue LHS = GetPromotedInteger(N->getOperand(1));
SDValue RHS = GetPromotedInteger(N->getOperand(2));
return DAG.getNode(ISD::VSELECT, N->getDebugLoc(),
@@ -1098,8 +1104,10 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break;
case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break;
case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break;
+ case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTLZ: ExpandIntRes_CTLZ(N, Lo, Hi); break;
case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break;
+ case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break;
case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
@@ -1171,7 +1179,6 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
switch (Opc) {
default:
llvm_unreachable("Unhandled atomic intrinsic Expand!");
- break;
case ISD::ATOMIC_SWAP:
switch (VT.SimpleTy) {
default: llvm_unreachable("Unexpected value type for atomic!");
@@ -1355,7 +1362,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits));
APInt KnownZero, KnownOne;
- DAG.ComputeMaskedBits(N->getOperand(1), HighBitMask, KnownZero, KnownOne);
+ DAG.ComputeMaskedBits(N->getOperand(1), KnownZero, KnownOne);
// If we don't know anything about the high bits, exit.
if (((KnownZero|KnownOne) & HighBitMask) == 0)
@@ -1390,15 +1397,15 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
}
}
-#if 0
- // FIXME: This code is broken for shifts with a zero amount!
// If we know that all of the high bits of the shift amount are zero, then we
// can do this as a couple of simple shifts.
if ((KnownZero & HighBitMask) == HighBitMask) {
- // Compute 32-amt.
- SDValue Amt2 = DAG.getNode(ISD::SUB, ShTy,
- DAG.getConstant(NVTBits, ShTy),
- Amt);
+ // Calculate 31-x. 31 is used instead of 32 to avoid creating an undefined
+ // shift if x is zero. We can use XOR here because x is known to be smaller
+ // than 32.
+ SDValue Amt2 = DAG.getNode(ISD::XOR, dl, ShTy, Amt,
+ DAG.getConstant(NVTBits-1, ShTy));
+
unsigned Op1, Op2;
switch (N->getOpcode()) {
default: llvm_unreachable("Unknown shift");
@@ -1407,13 +1414,23 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
case ISD::SRA: Op1 = ISD::SRL; Op2 = ISD::SHL; break;
}
- Lo = DAG.getNode(N->getOpcode(), NVT, InL, Amt);
- Hi = DAG.getNode(ISD::OR, NVT,
- DAG.getNode(Op1, NVT, InH, Amt),
- DAG.getNode(Op2, NVT, InL, Amt2));
+ // When shifting right the arithmetic for Lo and Hi is swapped.
+ if (N->getOpcode() != ISD::SHL)
+ std::swap(InL, InH);
+
+ // Use a little trick to get the bits that move from Lo to Hi. First
+ // shift by one bit.
+ SDValue Sh1 = DAG.getNode(Op2, dl, NVT, InL, DAG.getConstant(1, ShTy));
+ // Then compute the remaining shift with amount-1.
+ SDValue Sh2 = DAG.getNode(Op2, dl, NVT, Sh1, Amt2);
+
+ Lo = DAG.getNode(N->getOpcode(), dl, NVT, InL, Amt);
+ Hi = DAG.getNode(ISD::OR, dl, NVT, DAG.getNode(Op1, dl, NVT, InH, Amt),Sh2);
+
+ if (N->getOpcode() != ISD::SHL)
+ std::swap(Hi, Lo);
return true;
}
-#endif
return false;
}
@@ -1493,8 +1510,6 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
return true;
}
-
- return false;
}
void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
@@ -1702,8 +1717,8 @@ void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,
SDValue HiNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Hi,
DAG.getConstant(0, NVT), ISD::SETNE);
- SDValue LoLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Lo);
- SDValue HiLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Hi);
+ SDValue LoLZ = DAG.getNode(N->getOpcode(), dl, NVT, Lo);
+ SDValue HiLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, NVT, Hi);
Lo = DAG.getNode(ISD::SELECT, dl, NVT, HiNotZero, HiLZ,
DAG.getNode(ISD::ADD, dl, NVT, LoLZ,
@@ -1732,8 +1747,8 @@ void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
SDValue LoNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo,
DAG.getConstant(0, NVT), ISD::SETNE);
- SDValue LoLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Lo);
- SDValue HiLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Hi);
+ SDValue LoLZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, NVT, Lo);
+ SDValue HiLZ = DAG.getNode(N->getOpcode(), dl, NVT, Hi);
Lo = DAG.getNode(ISD::SELECT, dl, NVT, LoNotZero, LoLZ,
DAG.getNode(ISD::ADD, dl, NVT, HiLZ,
@@ -1778,6 +1793,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
unsigned Alignment = N->getAlignment();
bool isVolatile = N->isVolatile();
bool isNonTemporal = N->isNonTemporal();
+ bool isInvariant = N->isInvariant();
DebugLoc dl = N->getDebugLoc();
assert(NVT.isByteSized() && "Expanded type not byte sized!");
@@ -1808,7 +1824,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
} else if (TLI.isLittleEndian()) {
// Little-endian - low bits are at low addresses.
Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment);
+ isVolatile, isNonTemporal, isInvariant, Alignment);
unsigned ExcessBits =
N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
@@ -2305,12 +2321,14 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT);
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(Chain, RetTy, true, false, false, false,
- 0, TLI.getLibcallCallingConv(LC), false,
- true, Func, Args, DAG, dl);
+ 0, TLI.getLibcallCallingConv(LC),
+ /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ Func, Args, DAG, dl);
SplitInteger(CallInfo.first, Lo, Hi);
SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp,
- MachinePointerInfo(), false, false, 0);
+ MachinePointerInfo(), false, false, false, 0);
SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2,
DAG.getConstant(0, PtrVT),
ISD::SETNE);
@@ -2781,7 +2799,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
else if (SrcVT == MVT::i128)
FF = APInt(32, F32TwoE128);
else
- assert(false && "Unsupported UINT_TO_FP!");
+ llvm_unreachable("Unsupported UINT_TO_FP!");
// Check whether the sign bit is set.
SDValue Lo, Hi;
@@ -2926,38 +2944,28 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
- SDValue Op0 = N->getOperand(1);
- SDValue Op1 = N->getOperand(1);
- assert(Op0.getValueType() == Op1.getValueType() &&
- "Invalid input vector types");
-
EVT OutVT = N->getValueType(0);
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+ EVT InElemTy = OutVT.getVectorElementType();
EVT OutElemTy = NOutVT.getVectorElementType();
- unsigned NumElem0 = Op0.getValueType().getVectorNumElements();
- unsigned NumElem1 = Op1.getValueType().getVectorNumElements();
+ unsigned NumElem = N->getOperand(0).getValueType().getVectorNumElements();
unsigned NumOutElem = NOutVT.getVectorNumElements();
- assert(NumElem0 + NumElem1 == NumOutElem &&
- "Invalid number of incoming elements");
+ unsigned NumOperands = N->getNumOperands();
+ assert(NumElem * NumOperands == NumOutElem &&
+ "Unexpected number of elements");
// Take the elements from the first vector.
SmallVector<SDValue, 8> Ops(NumOutElem);
- for (unsigned i = 0; i < NumElem0; ++i) {
- SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- Op0.getValueType().getScalarType(), Op0,
- DAG.getIntPtrConstant(i));
- Ops[i] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext);
- }
-
- // Take the elements from the second vector
- for (unsigned i = 0; i < NumElem1; ++i) {
- SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- Op1.getValueType().getScalarType(), Op1,
- DAG.getIntPtrConstant(i));
- Ops[i + NumElem0] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext);
+ for (unsigned i = 0; i < NumOperands; ++i) {
+ SDValue Op = N->getOperand(i);
+ for (unsigned j = 0; j < NumElem; ++j) {
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ InElemTy, Op, DAG.getIntPtrConstant(j));
+ Ops[i * NumElem + j] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext);
+ }
}
return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size());
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index a4bb577433cc..439aa4de5cf5 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -222,8 +222,6 @@ bool DAGTypeLegalizer::run() {
for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) {
EVT ResultVT = N->getValueType(i);
switch (getTypeAction(ResultVT)) {
- default:
- assert(false && "Unknown action!");
case TargetLowering::TypeLegal:
break;
// The following calls must take care of *all* of the node's results,
@@ -275,8 +273,6 @@ ScanOperands:
EVT OpVT = N->getOperand(i).getValueType();
switch (getTypeAction(OpVT)) {
- default:
- assert(false && "Unknown action!");
case TargetLowering::TypeLegal:
continue;
// The following calls must either replace all of the node's results
@@ -752,7 +748,11 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
}
void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
- assert(Result.getValueType() == Op.getValueType().getVectorElementType() &&
+ // Note that in some cases vector operation operands may be greater than
+ // the vector element type. For example BUILD_VECTOR of type <1 x i1> with
+ // a constant i8 operand.
+ assert(Result.getValueType().getSizeInBits() >=
+ Op.getValueType().getVectorElementType().getSizeInBits() &&
"Invalid type for scalarized vector");
AnalyzeNewValue(Result);
@@ -889,7 +889,7 @@ SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,
MachinePointerInfo(), false, false, 0);
// Result is a load from the stack slot.
return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
}
/// CustomLowerNode - Replace the node's results with custom code provided
@@ -1056,8 +1056,9 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
std::pair<SDValue,SDValue> CallInfo =
TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
- false, 0, TLI.getLibcallCallingConv(LC), false,
- /*isReturnValueUsed=*/true,
+ false, 0, TLI.getLibcallCallingConv(LC),
+ /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
Callee, Args, DAG, dl);
return CallInfo.first;
}
@@ -1084,12 +1085,11 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC,
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
TLI.getPointerTy());
- // Splice the libcall in wherever FindInputOutputChains tells us to.
Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
- /*isReturnValueUsed=*/true,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
Callee, Args, DAG, Node->getDebugLoc());
return CallInfo;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index abacdac686bc..e8664458e9a6 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -521,6 +521,7 @@ private:
SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
SDValue ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N);
+ SDValue ScalarizeVecRes_VSELECT(SDNode *N);
SDValue ScalarizeVecRes_SELECT(SDNode *N);
SDValue ScalarizeVecRes_SELECT_CC(SDNode *N);
SDValue ScalarizeVecRes_SETCC(SDNode *N);
@@ -633,6 +634,7 @@ private:
SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue WidenVecOp_STORE(SDNode* N);
+ SDValue WidenVecOp_SETCC(SDNode* N);
SDValue WidenVecOp_Convert(SDNode *N);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 8e7e4985e4d0..a8ff7c65abde 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -21,7 +21,6 @@
#include "LegalizeTypes.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -46,8 +45,6 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
// Handle some special cases efficiently.
switch (getTypeAction(InVT)) {
- default:
- assert(false && "Unknown type action!");
case TargetLowering::TypeLegal:
case TargetLowering::TypePromoteInteger:
break;
@@ -130,7 +127,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
false, false, 0);
// Load the first half from the stack slot.
- Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, false, false, 0);
+ Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo,
+ false, false, false, 0);
// Increment the pointer to the other half.
unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
@@ -140,7 +138,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
// Load the second half from the stack slot.
Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr,
PtrInfo.getWithOffset(IncrementSize), false,
- false, MinAlign(Alignment, IncrementSize));
+ false, false, MinAlign(Alignment, IncrementSize));
// Handle endianness of the load.
if (TLI.isBigEndian())
@@ -212,11 +210,12 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
unsigned Alignment = LD->getAlignment();
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
+ bool isInvariant = LD->isInvariant();
assert(NVT.isByteSized() && "Expanded type not byte sized!");
Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment);
+ isVolatile, isNonTemporal, isInvariant, Alignment);
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits() / 8;
@@ -224,7 +223,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
DAG.getIntPtrConstant(IncrementSize));
Hi = DAG.getLoad(NVT, dl, Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
- isVolatile, isNonTemporal,
+ isVolatile, isNonTemporal, isInvariant,
MinAlign(Alignment, IncrementSize));
// Build a factor node to remember that this load is independent of the
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index f815b00db5d6..3ae8345bd198 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -64,6 +64,8 @@ class VectorLegalizer {
// Implement vselect in terms of XOR, AND, OR when blend is not supported
// by the target.
SDValue ExpandVSELECT(SDValue Op);
+ SDValue ExpandLoad(SDValue Op);
+ SDValue ExpandStore(SDValue Op);
SDValue ExpandFNEG(SDValue Op);
// Implements vector promotion; this is essentially just bitcasting the
// operands to a different type and bitcasting the result back to the
@@ -124,6 +126,33 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
SDValue Result =
SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops.data(), Ops.size()), 0);
+ if (Op.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) {
+ if (TLI.isLoadExtLegal(LD->getExtensionType(), LD->getMemoryVT()))
+ return TranslateLegalizeResults(Op, Result);
+ Changed = true;
+ return LegalizeOp(ExpandLoad(Op));
+ }
+ } else if (Op.getOpcode() == ISD::STORE) {
+ StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
+ EVT StVT = ST->getMemoryVT();
+ EVT ValVT = ST->getValue().getValueType();
+ if (StVT.isVector() && ST->isTruncatingStore())
+ switch (TLI.getTruncStoreAction(ValVT, StVT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal:
+ return TranslateLegalizeResults(Op, Result);
+ case TargetLowering::Custom:
+ Changed = true;
+ return LegalizeOp(TLI.LowerOperation(Result, DAG));
+ case TargetLowering::Expand:
+ Changed = true;
+ return LegalizeOp(ExpandStore(Op));
+ }
+ }
+
bool HasVectorValue = false;
for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
J != E;
@@ -156,8 +185,10 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::SRL:
case ISD::ROTL:
case ISD::ROTR:
- case ISD::CTTZ:
case ISD::CTLZ:
+ case ISD::CTTZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTPOP:
case ISD::SELECT:
case ISD::VSELECT:
@@ -262,6 +293,97 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
return DAG.getNode(ISD::BITCAST, dl, VT, Op);
}
+
+SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
+ DebugLoc dl = Op.getDebugLoc();
+ LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
+ SDValue Chain = LD->getChain();
+ SDValue BasePTR = LD->getBasePtr();
+ EVT SrcVT = LD->getMemoryVT();
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+
+ SmallVector<SDValue, 8> LoadVals;
+ SmallVector<SDValue, 8> LoadChains;
+ unsigned NumElem = SrcVT.getVectorNumElements();
+ unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
+
+ for (unsigned Idx=0; Idx<NumElem; Idx++) {
+ SDValue ScalarLoad = DAG.getExtLoad(ExtType, dl,
+ Op.getNode()->getValueType(0).getScalarType(),
+ Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride),
+ SrcVT.getScalarType(),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
+
+ BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
+ DAG.getIntPtrConstant(Stride));
+
+ LoadVals.push_back(ScalarLoad.getValue(0));
+ LoadChains.push_back(ScalarLoad.getValue(1));
+ }
+
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &LoadChains[0], LoadChains.size());
+ SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
+ Op.getNode()->getValueType(0), &LoadVals[0], LoadVals.size());
+
+ AddLegalizedOperand(Op.getValue(0), Value);
+ AddLegalizedOperand(Op.getValue(1), NewChain);
+
+ return (Op.getResNo() ? NewChain : Value);
+}
+
+SDValue VectorLegalizer::ExpandStore(SDValue Op) {
+ DebugLoc dl = Op.getDebugLoc();
+ StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
+ SDValue Chain = ST->getChain();
+ SDValue BasePTR = ST->getBasePtr();
+ SDValue Value = ST->getValue();
+ EVT StVT = ST->getMemoryVT();
+
+ unsigned Alignment = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
+
+ unsigned NumElem = StVT.getVectorNumElements();
+ // The type of the data we want to save
+ EVT RegVT = Value.getValueType();
+ EVT RegSclVT = RegVT.getScalarType();
+ // The type of data as saved in memory.
+ EVT MemSclVT = StVT.getScalarType();
+
+ // Cast floats into integers
+ unsigned ScalarSize = MemSclVT.getSizeInBits();
+
+ // Round odd types to the next pow of two.
+ if (!isPowerOf2_32(ScalarSize))
+ ScalarSize = NextPowerOf2(ScalarSize);
+
+ // Store Stride in bytes
+ unsigned Stride = ScalarSize/8;
+ // Extract each of the elements from the original vector
+ // and save them into memory individually.
+ SmallVector<SDValue, 8> Stores;
+ for (unsigned Idx = 0; Idx < NumElem; Idx++) {
+ SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ RegSclVT, Value, DAG.getIntPtrConstant(Idx));
+
+ // This scalar TruncStore may be illegal, but we legalize it later.
+ SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR,
+ ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT,
+ isVolatile, isNonTemporal, Alignment);
+
+ BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
+ DAG.getIntPtrConstant(Stride));
+
+ Stores.push_back(Store);
+ }
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &Stores[0], Stores.size());
+ AddLegalizedOperand(Op, TF);
+ return TF;
+}
+
SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
// Implement VSELECT in terms of XOR, AND, OR
// on platforms which do not support blend natively.
@@ -274,10 +396,12 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
// If we can't even use the basic vector operations of
// AND,OR,XOR, we will have to scalarize the op.
- if (!TLI.isOperationLegalOrCustom(ISD::AND, VT) ||
- !TLI.isOperationLegalOrCustom(ISD::XOR, VT) ||
- !TLI.isOperationLegalOrCustom(ISD::OR, VT))
- return DAG.UnrollVectorOp(Op.getNode());
+ // Notice that the operation may be 'promoted' which means that it is
+ // 'bitcasted' to another type which is handled.
+ if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand)
+ return DAG.UnrollVectorOp(Op.getNode());
assert(VT.getSizeInBits() == Op.getOperand(1).getValueType().getSizeInBits()
&& "Invalid mask size");
@@ -301,9 +425,9 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
DebugLoc DL = Op.getDebugLoc();
// Make sure that the SINT_TO_FP and SRL instructions are available.
- if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, VT) ||
- !TLI.isOperationLegalOrCustom(ISD::SRL, VT))
- return DAG.UnrollVectorOp(Op.getNode());
+ if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand)
+ return DAG.UnrollVectorOp(Op.getNode());
EVT SVT = VT.getScalarType();
assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) &&
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 107a42b2951c..5f23f01dafb4 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -21,7 +21,6 @@
//===----------------------------------------------------------------------===//
#include "LegalizeTypes.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -59,6 +58,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
+ case ISD::VSELECT: R = ScalarizeVecRes_VSELECT(N); break;
case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break;
case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break;
case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break;
@@ -194,7 +194,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
N->getPointerInfo(),
N->getMemoryVT().getVectorElementType(),
N->isVolatile(), N->isNonTemporal(),
- N->getOriginalAlignment());
+ N->isInvariant(), N->getOriginalAlignment());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
@@ -227,6 +227,37 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
return InOp;
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
+ SDValue Cond = GetScalarizedVector(N->getOperand(0));
+ SDValue LHS = GetScalarizedVector(N->getOperand(1));
+ TargetLowering::BooleanContent ScalarBool = TLI.getBooleanContents(false);
+ TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true);
+ if (ScalarBool != VecBool) {
+ EVT CondVT = Cond.getValueType();
+ switch (ScalarBool) {
+ case TargetLowering::UndefinedBooleanContent:
+ break;
+ case TargetLowering::ZeroOrOneBooleanContent:
+ assert(VecBool == TargetLowering::UndefinedBooleanContent ||
+ VecBool == TargetLowering::ZeroOrNegativeOneBooleanContent);
+ // Vector read from all ones, scalar expects a single 1 so mask.
+ Cond = DAG.getNode(ISD::AND, N->getDebugLoc(), CondVT,
+ Cond, DAG.getConstant(1, CondVT));
+ break;
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ assert(VecBool == TargetLowering::UndefinedBooleanContent ||
+ VecBool == TargetLowering::ZeroOrOneBooleanContent);
+ // Vector reads from a one, scalar from all ones so sign extend.
+ Cond = DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), CondVT,
+ Cond, DAG.getValueType(MVT::i1));
+ break;
+ }
+ }
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+ LHS.getValueType(), Cond, LHS,
+ GetScalarizedVector(N->getOperand(2)));
+}
+
SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) {
SDValue LHS = GetScalarizedVector(N->getOperand(1));
return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
@@ -405,6 +436,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
N->dump(&DAG);
dbgs() << "\n");
SDValue Lo, Hi;
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ return;
switch (N->getOpcode()) {
default:
@@ -442,8 +477,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::ANY_EXTEND:
case ISD::CONVERT_RNDSAT:
case ISD::CTLZ:
- case ISD::CTPOP:
case ISD::CTTZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTPOP:
case ISD::FABS:
case ISD::FCEIL:
case ISD::FCOS:
@@ -677,7 +714,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
// Load the Lo part from the stack slot.
Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
// Increment the pointer to the other part.
unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
@@ -686,7 +723,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
// Load the Hi part from the stack slot.
Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
- false, false, MinAlign(Alignment, IncrementSize));
+ false, false, false, MinAlign(Alignment, IncrementSize));
}
void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
@@ -713,20 +750,21 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
unsigned Alignment = LD->getOriginalAlignment();
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
+ bool isInvariant = LD->isInvariant();
EVT LoMemVT, HiMemVT;
GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal,
- Alignment);
+ isInvariant, Alignment);
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
LD->getPointerInfo().getWithOffset(IncrementSize),
- HiMemVT, isVolatile, isNonTemporal, Alignment);
+ HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -773,46 +811,18 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
DebugLoc dl = N->getDebugLoc();
GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
- // Split the input.
+ // If the input also splits, handle it directly for a compile time speedup.
+ // Otherwise split it by hand.
EVT InVT = N->getOperand(0).getValueType();
- switch (getTypeAction(InVT)) {
- default: llvm_unreachable("Unexpected type action!");
- case TargetLowering::TypeLegal: {
+ if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) {
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ } else {
EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
LoVT.getVectorNumElements());
Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
DAG.getIntPtrConstant(0));
Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
- break;
- }
- case TargetLowering::TypePromoteInteger: {
- SDValue InOp = GetPromotedInteger(N->getOperand(0));
- EVT InNVT = EVT::getVectorVT(*DAG.getContext(),
- InOp.getValueType().getVectorElementType(),
- LoVT.getVectorNumElements());
- Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
- DAG.getIntPtrConstant(0));
- Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
- DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
- break;
- }
- case TargetLowering::TypeSplitVector:
- GetSplitVector(N->getOperand(0), Lo, Hi);
- break;
- case TargetLowering::TypeWidenVector: {
- // If the result needs to be split and the input needs to be widened,
- // the two types must have different lengths. Use the widened result
- // and extract from it to do the split.
- SDValue InOp = GetWidenedVector(N->getOperand(0));
- EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
- LoVT.getVectorNumElements());
- Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
- DAG.getIntPtrConstant(0));
- Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
- DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
- break;
- }
}
if (N->getOpcode() == ISD::FP_ROUND) {
@@ -1239,6 +1249,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break;
case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break;
+ case ISD::VSELECT:
case ISD::SELECT: Res = WidenVecRes_SELECT(N); break;
case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break;
case ISD::SETCC: Res = WidenVecRes_SETCC(N); break;
@@ -1590,12 +1601,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
switch (getTypeAction(InVT)) {
- default:
- assert(false && "Unknown type action!");
- break;
case TargetLowering::TypeLegal:
break;
case TargetLowering::TypePromoteInteger:
+ // If the incoming type is a vector that is being promoted, then
+ // we know that the elements are arranged differently and that we
+ // must perform the conversion using a stack slot.
+ if (InVT.isVector())
+ break;
+
// If the InOp is promoted to the same size, convert it. Otherwise,
// fall out of the switch and widen the promoted input.
InOp = GetPromotedInteger(InOp);
@@ -1928,7 +1942,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
SDValue InOp1 = GetWidenedVector(N->getOperand(1));
SDValue InOp2 = GetWidenedVector(N->getOperand(2));
assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
- return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
WidenVT, Cond1, InOp1, InOp2);
}
@@ -2032,6 +2046,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) {
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::STORE: Res = WidenVecOp_STORE(N); break;
+ case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
case ISD::FP_EXTEND:
case ISD::FP_TO_SINT:
@@ -2165,6 +2180,32 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
MVT::Other,&StChain[0],StChain.size());
}
+SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
+ SDValue InOp0 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(1));
+ DebugLoc dl = N->getDebugLoc();
+
+ // WARNING: In this code we widen the compare instruction with garbage.
+ // This garbage may contain denormal floats which may be slow. Is this a real
+ // concern ? Should we zero the unused lanes if this is a float compare ?
+
+ // Get a new SETCC node to compare the newly widened operands.
+ // Only some of the compared elements are legal.
+ EVT SVT = TLI.getSetCCResultType(InOp0.getValueType());
+ SDValue WideSETCC = DAG.getNode(ISD::SETCC, N->getDebugLoc(),
+ SVT, InOp0, InOp1, N->getOperand(2));
+
+ // Extract the needed results from the result vector.
+ EVT ResVT = EVT::getVectorVT(*DAG.getContext(),
+ SVT.getVectorElementType(),
+ N->getValueType(0).getVectorNumElements());
+ SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
+ ResVT, WideSETCC, DAG.getIntPtrConstant(0));
+
+ return PromoteTargetBoolean(CC, N->getValueType(0));
+}
+
+
//===----------------------------------------------------------------------===//
// Vector Widening Utilities
//===----------------------------------------------------------------------===//
@@ -2276,6 +2317,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain,
unsigned Align = LD->getAlignment();
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
+ bool isInvariant = LD->isInvariant();
int LdWidth = LdVT.getSizeInBits();
int WidthDiff = WidenWidth - LdWidth; // Difference
@@ -2285,7 +2327,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain,
EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
int NewVTWidth = NewVT.getSizeInBits();
SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
- isVolatile, isNonTemporal, Align);
+ isVolatile, isNonTemporal, isInvariant, Align);
LdChain.push_back(LdOp.getValue(1));
// Check if we can load the element with one instruction
@@ -2323,18 +2365,37 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain,
BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
DAG.getIntPtrConstant(Increment));
+ SDValue L;
if (LdWidth < NewVTWidth) {
// Our current type we are using is too large, find a better size
NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
NewVTWidth = NewVT.getSizeInBits();
- }
-
- SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr,
+ L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
LD->getPointerInfo().getWithOffset(Offset),
isVolatile,
- isNonTemporal, MinAlign(Align, Increment));
- LdChain.push_back(LdOp.getValue(1));
- LdOps.push_back(LdOp);
+ isNonTemporal, isInvariant,
+ MinAlign(Align, Increment));
+ LdChain.push_back(L.getValue(1));
+ if (L->getValueType(0).isVector()) {
+ SmallVector<SDValue, 16> Loads;
+ Loads.push_back(L);
+ unsigned size = L->getValueSizeInBits(0);
+ while (size < LdOp->getValueSizeInBits(0)) {
+ Loads.push_back(DAG.getUNDEF(L->getValueType(0)));
+ size += L->getValueSizeInBits(0);
+ }
+ L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0),
+ &Loads[0], Loads.size());
+ }
+ } else {
+ L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
+ LD->getPointerInfo().getWithOffset(Offset), isVolatile,
+ isNonTemporal, isInvariant, MinAlign(Align, Increment));
+ LdChain.push_back(L.getValue(1));
+ }
+
+ LdOps.push_back(L);
+
LdWidth -= NewVTWidth;
}
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
new file mode 100644
index 000000000000..ff0136e08cd9
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -0,0 +1,657 @@
+//===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ResourcePriorityQueue class, which is a
+// SchedulingPriorityQueue that prioritizes instructions using DFA state to
+// reduce the length of the critical path through the basic block
+// on VLIW platforms.
+// The scheduler is basically a top-down adaptable list scheduler with DFA
+// resource tracking added to the cost function.
+// DFA is queried as a state machine to model "packets/bundles" during
+// schedule. Currently packets/bundles are discarded at the end of
+// scheduling, affecting only order of instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "scheduler"
+#include "llvm/CodeGen/ResourcePriorityQueue.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetLowering.h"
+
+using namespace llvm;
+
+static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden,
+ cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable use of DFA during scheduling"));
+
+static cl::opt<signed> RegPressureThreshold(
+ "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5),
+ cl::desc("Track reg pressure and switch priority to in-depth"));
+
+
+ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) :
+ Picker(this),
+ InstrItins(IS->getTargetLowering().getTargetMachine().getInstrItineraryData())
+{
+ TII = IS->getTargetLowering().getTargetMachine().getInstrInfo();
+ TRI = IS->getTargetLowering().getTargetMachine().getRegisterInfo();
+ TLI = &IS->getTargetLowering();
+
+ const TargetMachine &tm = (*IS->MF).getTarget();
+ ResourcesModel = tm.getInstrInfo()->CreateTargetScheduleState(&tm,NULL);
+ // This hard requirment could be relaxed, but for now
+ // do not let it procede.
+ assert (ResourcesModel && "Unimplemented CreateTargetScheduleState.");
+
+ unsigned NumRC = TRI->getNumRegClasses();
+ RegLimit.resize(NumRC);
+ RegPressure.resize(NumRC);
+ std::fill(RegLimit.begin(), RegLimit.end(), 0);
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I)
+ RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, *IS->MF);
+
+ ParallelLiveRanges = 0;
+ HorizontalVerticalBalance = 0;
+}
+
+unsigned
+ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
+ unsigned NumberDeps = 0;
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+
+ SUnit *PredSU = I->getSUnit();
+ const SDNode *ScegN = PredSU->getNode();
+
+ if (!ScegN)
+ continue;
+
+ // If value is passed to CopyToReg, it is probably
+ // live outside BB.
+ switch (ScegN->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor: break;
+ case ISD::CopyFromReg: NumberDeps++; break;
+ case ISD::CopyToReg: break;
+ case ISD::INLINEASM: break;
+ }
+ if (!ScegN->isMachineOpcode())
+ continue;
+
+ for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
+ EVT VT = ScegN->getValueType(i);
+ if (TLI->isTypeLegal(VT)
+ && (TLI->getRegClassFor(VT)->getID() == RCId)) {
+ NumberDeps++;
+ break;
+ }
+ }
+ }
+ return NumberDeps;
+}
+
+unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU,
+ unsigned RCId) {
+ unsigned NumberDeps = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+
+ SUnit *SuccSU = I->getSUnit();
+ const SDNode *ScegN = SuccSU->getNode();
+ if (!ScegN)
+ continue;
+
+ // If value is passed to CopyToReg, it is probably
+ // live outside BB.
+ switch (ScegN->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor: break;
+ case ISD::CopyFromReg: break;
+ case ISD::CopyToReg: NumberDeps++; break;
+ case ISD::INLINEASM: break;
+ }
+ if (!ScegN->isMachineOpcode())
+ continue;
+
+ for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = ScegN->getOperand(i);
+ EVT VT = Op.getNode()->getValueType(Op.getResNo());
+ if (TLI->isTypeLegal(VT)
+ && (TLI->getRegClassFor(VT)->getID() == RCId)) {
+ NumberDeps++;
+ break;
+ }
+ }
+ }
+ return NumberDeps;
+}
+
+static unsigned numberCtrlDepsInSU(SUnit *SU) {
+ unsigned NumberDeps = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I)
+ if (I->isCtrl())
+ NumberDeps++;
+
+ return NumberDeps;
+}
+
+static unsigned numberCtrlPredInSU(SUnit *SU) {
+ unsigned NumberDeps = 0;
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I)
+ if (I->isCtrl())
+ NumberDeps++;
+
+ return NumberDeps;
+}
+
+///
+/// Initialize nodes.
+///
+void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) {
+ SUnits = &sunits;
+ NumNodesSolelyBlocking.resize(SUnits->size(), 0);
+
+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
+ SUnit *SU = &(*SUnits)[i];
+ initNumRegDefsLeft(SU);
+ SU->NodeQueueId = 0;
+ }
+}
+
+/// This heuristic is used if DFA scheduling is not desired
+/// for some VLIW platform.
+bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
+ // The isScheduleHigh flag allows nodes with wraparound dependencies that
+ // cannot easily be modeled as edges with latencies to be scheduled as
+ // soon as possible in a top-down schedule.
+ if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
+ return false;
+
+ if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
+ return true;
+
+ unsigned LHSNum = LHS->NodeNum;
+ unsigned RHSNum = RHS->NodeNum;
+
+ // The most important heuristic is scheduling the critical path.
+ unsigned LHSLatency = PQ->getLatency(LHSNum);
+ unsigned RHSLatency = PQ->getLatency(RHSNum);
+ if (LHSLatency < RHSLatency) return true;
+ if (LHSLatency > RHSLatency) return false;
+
+ // After that, if two nodes have identical latencies, look to see if one will
+ // unblock more other nodes than the other.
+ unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
+ unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
+ if (LHSBlocked < RHSBlocked) return true;
+ if (LHSBlocked > RHSBlocked) return false;
+
+ // Finally, just to provide a stable ordering, use the node number as a
+ // deciding factor.
+ return LHSNum < RHSNum;
+}
+
+
+/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
+/// of SU, return it, otherwise return null.
+SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
+ SUnit *OnlyAvailablePred = 0;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ SUnit &Pred = *I->getSUnit();
+ if (!Pred.isScheduled) {
+ // We found an available, but not scheduled, predecessor. If it's the
+ // only one we have found, keep track of it... otherwise give up.
+ if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
+ return 0;
+ OnlyAvailablePred = &Pred;
+ }
+ }
+ return OnlyAvailablePred;
+}
+
+void ResourcePriorityQueue::push(SUnit *SU) {
+ // Look at all of the successors of this node. Count the number of nodes that
+ // this node is the sole unscheduled node for.
+ unsigned NumNodesBlocking = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I)
+ if (getSingleUnscheduledPred(I->getSUnit()) == SU)
+ ++NumNodesBlocking;
+
+ NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
+ Queue.push_back(SU);
+}
+
+/// Check if scheduling of this SU is possible
+/// in the current packet.
+bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
+ if (!SU || !SU->getNode())
+ return false;
+
+ // If this is a compound instruction,
+ // it is likely to be a call. Do not delay it.
+ if (SU->getNode()->getGluedNode())
+ return true;
+
+ // First see if the pipeline could receive this instruction
+ // in the current cycle.
+ if (SU->getNode()->isMachineOpcode())
+ switch (SU->getNode()->getMachineOpcode()) {
+ default:
+ if (!ResourcesModel->canReserveResources(&TII->get(
+ SU->getNode()->getMachineOpcode())))
+ return false;
+ case TargetOpcode::EXTRACT_SUBREG:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::IMPLICIT_DEF:
+ break;
+ }
+
+ // Now see if there are no other dependencies
+ // to instructions alredy in the packet.
+ for (unsigned i = 0, e = Packet.size(); i != e; ++i)
+ for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(),
+ E = Packet[i]->Succs.end(); I != E; ++I) {
+ // Since we do not add pseudos to packets, might as well
+ // ignor order deps.
+ if (I->isCtrl())
+ continue;
+
+ if (I->getSUnit() == SU)
+ return false;
+ }
+
+ return true;
+}
+
+/// Keep track of available resources.
+void ResourcePriorityQueue::reserveResources(SUnit *SU) {
+ // If this SU does not fit in the packet
+ // start a new one.
+ if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ }
+
+ if (SU->getNode() && SU->getNode()->isMachineOpcode()) {
+ switch (SU->getNode()->getMachineOpcode()) {
+ default:
+ ResourcesModel->reserveResources(&TII->get(
+ SU->getNode()->getMachineOpcode()));
+ break;
+ case TargetOpcode::EXTRACT_SUBREG:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::IMPLICIT_DEF:
+ break;
+ }
+ Packet.push_back(SU);
+ }
+ // Forcefully end packet for PseudoOps.
+ else {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ }
+
+ // If packet is now full, reset the state so in the next cycle
+ // we start fresh.
+ if (Packet.size() >= InstrItins->IssueWidth) {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ }
+}
+
+signed ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
+ signed RegBalance = 0;
+
+ if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
+ return RegBalance;
+
+ // Gen estimate.
+ for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) {
+ EVT VT = SU->getNode()->getValueType(i);
+ if (TLI->isTypeLegal(VT)
+ && TLI->getRegClassFor(VT)
+ && TLI->getRegClassFor(VT)->getID() == RCId)
+ RegBalance += numberRCValSuccInSU(SU, RCId);
+ }
+ // Kill estimate.
+ for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = SU->getNode()->getOperand(i);
+ EVT VT = Op.getNode()->getValueType(Op.getResNo());
+ if (isa<ConstantSDNode>(Op.getNode()))
+ continue;
+
+ if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT)
+ && TLI->getRegClassFor(VT)->getID() == RCId)
+ RegBalance -= numberRCValPredInSU(SU, RCId);
+ }
+ return RegBalance;
+}
+
+/// Estimates change in reg pressure from this SU.
+/// It is acheived by trivial tracking of defined
+/// and used vregs in dependent instructions.
+/// The RawPressure flag makes this function to ignore
+/// existing reg file sizes, and report raw def/use
+/// balance.
+signed ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) {
+ signed RegBalance = 0;
+
+ if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
+ return RegBalance;
+
+ if (RawPressure) {
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I) {
+ const TargetRegisterClass *RC = *I;
+ RegBalance += rawRegPressureDelta(SU, RC->getID());
+ }
+ }
+ else {
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I) {
+ const TargetRegisterClass *RC = *I;
+ if ((RegPressure[RC->getID()] +
+ rawRegPressureDelta(SU, RC->getID()) > 0) &&
+ (RegPressure[RC->getID()] +
+ rawRegPressureDelta(SU, RC->getID()) >= RegLimit[RC->getID()]))
+ RegBalance += rawRegPressureDelta(SU, RC->getID());
+ }
+ }
+
+ return RegBalance;
+}
+
+// Constants used to denote relative importance of
+// heuristic components for cost computation.
+static const unsigned PriorityOne = 200;
+static const unsigned PriorityTwo = 100;
+static const unsigned PriorityThree = 50;
+static const unsigned PriorityFour = 15;
+static const unsigned PriorityFive = 5;
+static const unsigned ScaleOne = 20;
+static const unsigned ScaleTwo = 10;
+static const unsigned ScaleThree = 5;
+static const unsigned FactorOne = 2;
+
+/// Returns single number reflecting benefit of scheduling SU
+/// in the current cycle.
+signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
+ // Initial trivial priority.
+ signed ResCount = 1;
+
+ // Do not waste time on a node that is already scheduled.
+ if (SU->isScheduled)
+ return ResCount;
+
+ // Forced priority is high.
+ if (SU->isScheduleHigh)
+ ResCount += PriorityOne;
+
+ // Adaptable scheduling
+ // A small, but very parallel
+ // region, where reg pressure is an issue.
+ if (HorizontalVerticalBalance > RegPressureThreshold) {
+ // Critical path first
+ ResCount += (SU->getHeight() * ScaleTwo);
+ // If resources are available for it, multiply the
+ // chance of scheduling.
+ if (isResourceAvailable(SU))
+ ResCount <<= FactorOne;
+
+ // Consider change to reg pressure from scheduling
+ // this SU.
+ ResCount -= (regPressureDelta(SU,true) * ScaleOne);
+ }
+ // Default heuristic, greeady and
+ // critical path driven.
+ else {
+ // Critical path first.
+ ResCount += (SU->getHeight() * ScaleTwo);
+ // Now see how many instructions is blocked by this SU.
+ ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo);
+ // If resources are available for it, multiply the
+ // chance of scheduling.
+ if (isResourceAvailable(SU))
+ ResCount <<= FactorOne;
+
+ ResCount -= (regPressureDelta(SU) * ScaleTwo);
+ }
+
+ // These are platform specific things.
+ // Will need to go into the back end
+ // and accessed from here via a hook.
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
+ if (N->isMachineOpcode()) {
+ const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
+ if (TID.isCall())
+ ResCount += (PriorityThree + (ScaleThree*N->getNumValues()));
+ }
+ else
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor:
+ case ISD::CopyFromReg:
+ case ISD::CopyToReg:
+ ResCount += PriorityFive;
+ break;
+
+ case ISD::INLINEASM:
+ ResCount += PriorityFour;
+ break;
+ }
+ }
+ return ResCount;
+}
+
+
+/// Main resource tracking point.
+void ResourcePriorityQueue::scheduledNode(SUnit *SU) {
+ // Use NULL entry as an event marker to reset
+ // the DFA state.
+ if (!SU) {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ return;
+ }
+
+ const SDNode *ScegN = SU->getNode();
+ // Update reg pressure tracking.
+ // First update current node.
+ if (ScegN->isMachineOpcode()) {
+ // Estimate generated regs.
+ for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
+ EVT VT = ScegN->getValueType(i);
+
+ if (TLI->isTypeLegal(VT)) {
+ const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
+ if (RC)
+ RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID());
+ }
+ }
+ // Estimate killed regs.
+ for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = ScegN->getOperand(i);
+ EVT VT = Op.getNode()->getValueType(Op.getResNo());
+
+ if (TLI->isTypeLegal(VT)) {
+ const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
+ if (RC) {
+ if (RegPressure[RC->getID()] >
+ (numberRCValPredInSU(SU, RC->getID())))
+ RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID());
+ else RegPressure[RC->getID()] = 0;
+ }
+ }
+ }
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl() || (I->getSUnit()->NumRegDefsLeft == 0))
+ continue;
+ --I->getSUnit()->NumRegDefsLeft;
+ }
+ }
+
+ // Reserve resources for this SU.
+ reserveResources(SU);
+
+ // Adjust number of parallel live ranges.
+ // Heuristic is simple - node with no data successors reduces
+ // number of live ranges. All others, increase it.
+ unsigned NumberNonControlDeps = 0;
+
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ adjustPriorityOfUnscheduledPreds(I->getSUnit());
+ if (!I->isCtrl())
+ NumberNonControlDeps++;
+ }
+
+ if (!NumberNonControlDeps) {
+ if (ParallelLiveRanges >= SU->NumPreds)
+ ParallelLiveRanges -= SU->NumPreds;
+ else
+ ParallelLiveRanges = 0;
+
+ }
+ else
+ ParallelLiveRanges += SU->NumRegDefsLeft;
+
+ // Track parallel live chains.
+ HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU));
+ HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU));
+}
+
+void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) {
+ unsigned NodeNumDefs = 0;
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+ if (N->isMachineOpcode()) {
+ const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
+ // No register need be allocated for this.
+ if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
+ NodeNumDefs = 0;
+ break;
+ }
+ NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs());
+ }
+ else
+ switch(N->getOpcode()) {
+ default: break;
+ case ISD::CopyFromReg:
+ NodeNumDefs++;
+ break;
+ case ISD::INLINEASM:
+ NodeNumDefs++;
+ break;
+ }
+
+ SU->NumRegDefsLeft = NodeNumDefs;
+}
+
+/// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
+/// scheduled. If SU is not itself available, then there is at least one
+/// predecessor node that has not been scheduled yet. If SU has exactly ONE
+/// unscheduled predecessor, we want to increase its priority: it getting
+/// scheduled will make this node available, so it is better than some other
+/// node of the same priority that will not make a node available.
+void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) {
+ if (SU->isAvailable) return; // All preds scheduled.
+
+ SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
+ if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable)
+ return;
+
+ // Okay, we found a single predecessor that is available, but not scheduled.
+ // Since it is available, it must be in the priority queue. First remove it.
+ remove(OnlyAvailablePred);
+
+ // Reinsert the node into the priority queue, which recomputes its
+ // NumNodesSolelyBlocking value.
+ push(OnlyAvailablePred);
+}
+
+
+/// Main access point - returns next instructions
+/// to be placed in scheduling sequence.
+SUnit *ResourcePriorityQueue::pop() {
+ if (empty())
+ return 0;
+
+ std::vector<SUnit *>::iterator Best = Queue.begin();
+ if (!DisableDFASched) {
+ signed BestCost = SUSchedulingCost(*Best);
+ for (std::vector<SUnit *>::iterator I = Queue.begin(),
+ E = Queue.end(); I != E; ++I) {
+ if (*I == *Best)
+ continue;
+
+ if (SUSchedulingCost(*I) > BestCost) {
+ BestCost = SUSchedulingCost(*I);
+ Best = I;
+ }
+ }
+ }
+ // Use default TD scheduling mechanism.
+ else {
+ for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
+ E = Queue.end(); I != E; ++I)
+ if (Picker(*Best, *I))
+ Best = I;
+ }
+
+ SUnit *V = *Best;
+ if (Best != prior(Queue.end()))
+ std::swap(*Best, Queue.back());
+
+ Queue.pop_back();
+
+ return V;
+}
+
+
+void ResourcePriorityQueue::remove(SUnit *SU) {
+ assert(!Queue.empty() && "Queue is empty!");
+ std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU);
+ if (I != prior(Queue.end()))
+ std::swap(*I, Queue.back());
+
+ Queue.pop_back();
+}
+
+
+#ifdef NDEBUG
+void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {}
+#else
+void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {
+ ResourcePriorityQueue q = *this;
+ while (!q.empty()) {
+ SUnit *su = q.pop();
+ dbgs() << "Height " << su->getHeight() << ": ";
+ su->dump(DAG);
+ }
+}
+#endif
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index b275c6321ae4..24da432a47a1 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -43,7 +43,7 @@ namespace {
SmallVector<SUnit *, 16> Queue;
bool empty() const { return Queue.empty(); }
-
+
void push(SUnit *U) {
Queue.push_back(U);
}
@@ -101,8 +101,8 @@ private:
bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
void ListScheduleBottomUp();
- /// ForceUnitLatencies - The fast scheduler doesn't care about real latencies.
- bool ForceUnitLatencies() const { return true; }
+ /// forceUnitLatencies - The fast scheduler doesn't care about real latencies.
+ bool forceUnitLatencies() const { return true; }
};
} // end anonymous namespace
@@ -112,7 +112,7 @@ void ScheduleDAGFast::Schedule() {
DEBUG(dbgs() << "********** List Scheduling **********\n");
NumLiveRegs = 0;
- LiveRegDefs.resize(TRI->getNumRegs(), NULL);
+ LiveRegDefs.resize(TRI->getNumRegs(), NULL);
LiveRegCycles.resize(TRI->getNumRegs(), 0);
// Build the scheduling graph.
@@ -159,7 +159,7 @@ void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
ReleasePred(SU, &*I);
if (I->isAssignedRegDep()) {
// This is a physical register dependency and it's impossible or
- // expensive to copy the register. Make sure nothing that can
+ // expensive to copy the register. Make sure nothing that can
// clobber the register is scheduled between the predecessor and
// this node.
if (!LiveRegDefs[I->getReg()]) {
@@ -245,10 +245,10 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
SDValue(LoadNode, 1));
- SUnit *NewSU = NewSUnit(N);
+ SUnit *NewSU = newSUnit(N);
assert(N->getNodeId() == -1 && "Node already inserted!");
N->setNodeId(NewSU->NodeNum);
-
+
const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
@@ -268,7 +268,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
LoadSU = &SUnits[LoadNode->getNodeId()];
isNewLoad = false;
} else {
- LoadSU = NewSUnit(LoadNode);
+ LoadSU = newSUnit(LoadNode);
LoadNode->setNodeId(LoadSU->NodeNum);
}
@@ -329,7 +329,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
D.setSUnit(LoadSU);
AddPred(SuccDep, D);
}
- }
+ }
if (isNewLoad) {
AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency));
}
@@ -381,11 +381,11 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
const TargetRegisterClass *DestRC,
const TargetRegisterClass *SrcRC,
SmallVector<SUnit*, 2> &Copies) {
- SUnit *CopyFromSU = NewSUnit(static_cast<SDNode *>(NULL));
+ SUnit *CopyFromSU = newSUnit(static_cast<SDNode *>(NULL));
CopyFromSU->CopySrcRC = SrcRC;
CopyFromSU->CopyDstRC = DestRC;
- SUnit *CopyToSU = NewSUnit(static_cast<SDNode *>(NULL));
+ SUnit *CopyToSU = newSUnit(static_cast<SDNode *>(NULL));
CopyToSU->CopySrcRC = DestRC;
CopyToSU->CopyDstRC = SrcRC;
@@ -425,7 +425,7 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
unsigned NumRes = MCID.getNumDefs();
- for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+ for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
if (Reg == *ImpDef)
break;
++NumRes;
@@ -447,7 +447,7 @@ static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
Added = true;
}
}
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
+ for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
if (RegAdded.insert(*Alias)) {
LRegs.push_back(*Alias);
@@ -508,7 +508,7 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
if (!MCID.ImplicitDefs)
continue;
- for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg) {
+ for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) {
CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
}
}
@@ -630,7 +630,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
std::reverse(Sequence.begin(), Sequence.end());
#ifndef NDEBUG
- VerifySchedule(/*isBottomUp=*/true);
+ VerifyScheduledSequence(/*isBottomUp=*/true);
#endif
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index e757defd3895..2cb5d37d689e 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -45,10 +45,6 @@ static RegisterScheduler
"Bottom-up register reduction list scheduling",
createBURRListDAGScheduler);
static RegisterScheduler
- tdrListrDAGScheduler("list-tdrr",
- "Top-down register reduction list scheduling",
- createTDRRListDAGScheduler);
-static RegisterScheduler
sourceListDAGScheduler("source",
"Similar to list-burr but schedules in source "
"order when possible",
@@ -93,6 +89,9 @@ static cl::opt<bool> DisableSchedCriticalPath(
static cl::opt<bool> DisableSchedHeight(
"disable-sched-height", cl::Hidden, cl::init(false),
cl::desc("Disable scheduled-height priority in sched=list-ilp"));
+static cl::opt<bool> Disable2AddrHack(
+ "disable-2addr-hack", cl::Hidden, cl::init(true),
+ cl::desc("Disable scheduler's two-address hack"));
static cl::opt<int> MaxReorderWindow(
"max-sched-reorder", cl::Hidden, cl::init(6),
@@ -103,17 +102,6 @@ static cl::opt<unsigned> AvgIPC(
"sched-avg-ipc", cl::Hidden, cl::init(1),
cl::desc("Average inst/cycle whan no target itinerary exists."));
-#ifndef NDEBUG
-namespace {
- // For sched=list-ilp, Count the number of times each factor comes into play.
- enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth,
- FactStatic, FactOther, NumFactors };
-}
-static const char *FactorName[NumFactors] =
-{"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"};
-static int FactorCount[NumFactors];
-#endif //!NDEBUG
-
namespace {
//===----------------------------------------------------------------------===//
/// ScheduleDAGRRList - The actual register reduction list scheduler
@@ -121,10 +109,6 @@ namespace {
///
class ScheduleDAGRRList : public ScheduleDAGSDNodes {
private:
- /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
- /// it is top-down.
- bool isBottomUp;
-
/// NeedLatency - True if the scheduler will make use of latency information.
///
bool NeedLatency;
@@ -162,11 +146,15 @@ private:
/// and similar queries.
ScheduleDAGTopologicalSort Topo;
+ // Hack to keep track of the inverse of FindCallSeqStart without more crazy
+ // DAG crawling.
+ DenseMap<SUnit*, SUnit*> CallSeqEndForStart;
+
public:
ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
SchedulingPriorityQueue *availqueue,
CodeGenOpt::Level OptLevel)
- : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()),
+ : ScheduleDAGSDNodes(mf),
NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
Topo(SUnits) {
@@ -221,8 +209,6 @@ private:
void ReleasePred(SUnit *SU, const SDep *PredEdge);
void ReleasePredecessors(SUnit *SU);
- void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
- void ReleaseSuccessors(SUnit *SU);
void ReleasePending();
void AdvanceToCycle(unsigned NextCycle);
void AdvancePastStalls(SUnit *SU);
@@ -242,15 +228,11 @@ private:
SUnit *PickNodeToScheduleBottomUp();
void ListScheduleBottomUp();
- void ScheduleNodeTopDown(SUnit*);
- void ListScheduleTopDown();
-
-
/// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
/// Updates the topological ordering if required.
SUnit *CreateNewSUnit(SDNode *N) {
unsigned NumSUnits = SUnits.size();
- SUnit *NewNode = NewSUnit(N);
+ SUnit *NewNode = newSUnit(N);
// Update the topological ordering.
if (NewNode->NodeNum >= NumSUnits)
Topo.InitDAGTopologicalSorting();
@@ -268,9 +250,9 @@ private:
return NewNode;
}
- /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
+ /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
/// need actual latency information but the hybrid scheduler does.
- bool ForceUnitLatencies() const {
+ bool forceUnitLatencies() const {
return !NeedLatency;
}
};
@@ -278,7 +260,7 @@ private:
/// GetCostForDef - Looks up the register class and cost for a given definition.
/// Typically this just means looking up the representative register class,
-/// but for untyped values (MVT::untyped) it means inspecting the node's
+/// but for untyped values (MVT::Untyped) it means inspecting the node's
/// opcode to determine what register class is being generated.
static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
const TargetLowering *TLI,
@@ -289,7 +271,7 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
// Special handling for untyped values. These values can only come from
// the expansion of custom DAG-to-DAG patterns.
- if (VT == MVT::untyped) {
+ if (VT == MVT::Untyped) {
const SDNode *Node = RegDefPos.GetNode();
unsigned Opcode = Node->getMachineOpcode();
@@ -319,18 +301,16 @@ void ScheduleDAGRRList::Schedule() {
DEBUG(dbgs()
<< "********** List Scheduling BB#" << BB->getNumber()
<< " '" << BB->getName() << "' **********\n");
-#ifndef NDEBUG
- for (int i = 0; i < NumFactors; ++i) {
- FactorCount[i] = 0;
- }
-#endif //!NDEBUG
CurCycle = 0;
IssueCount = 0;
MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
NumLiveRegs = 0;
- LiveRegDefs.resize(TRI->getNumRegs(), NULL);
- LiveRegGens.resize(TRI->getNumRegs(), NULL);
+ // Allocate slots for each physical register, plus one for a special register
+ // to track the virtual resource of a calling sequence.
+ LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL);
+ LiveRegGens.resize(TRI->getNumRegs() + 1, NULL);
+ CallSeqEndForStart.clear();
// Build the scheduling graph.
BuildSchedGraph(NULL);
@@ -343,18 +323,16 @@ void ScheduleDAGRRList::Schedule() {
HazardRec->Reset();
- // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
- if (isBottomUp)
- ListScheduleBottomUp();
- else
- ListScheduleTopDown();
+ // Execute the actual scheduling loop.
+ ListScheduleBottomUp();
-#ifndef NDEBUG
- for (int i = 0; i < NumFactors; ++i) {
- DEBUG(dbgs() << FactorName[i] << "\t" << FactorCount[i] << "\n");
- }
-#endif // !NDEBUG
AvailableQueue->releaseState();
+
+ DEBUG({
+ dbgs() << "*** Final schedule ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
}
//===----------------------------------------------------------------------===//
@@ -376,7 +354,7 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
#endif
--PredSU->NumSuccsLeft;
- if (!ForceUnitLatencies()) {
+ if (!forceUnitLatencies()) {
// Updating predecessor's height. This is now the cycle when the
// predecessor can be scheduled without causing a pipeline stall.
PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
@@ -403,6 +381,109 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
}
}
+/// IsChainDependent - Test if Outer is reachable from Inner through
+/// chain dependencies.
+static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
+ unsigned NestLevel,
+ const TargetInstrInfo *TII) {
+ SDNode *N = Outer;
+ for (;;) {
+ if (N == Inner)
+ return true;
+ // For a TokenFactor, examine each operand. There may be multiple ways
+ // to get to the CALLSEQ_BEGIN, but we need to find the path with the
+ // most nesting in order to ensure that we find the corresponding match.
+ if (N->getOpcode() == ISD::TokenFactor) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (IsChainDependent(N->getOperand(i).getNode(), Inner, NestLevel, TII))
+ return true;
+ return false;
+ }
+ // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
+ if (N->isMachineOpcode()) {
+ if (N->getMachineOpcode() ==
+ (unsigned)TII->getCallFrameDestroyOpcode()) {
+ ++NestLevel;
+ } else if (N->getMachineOpcode() ==
+ (unsigned)TII->getCallFrameSetupOpcode()) {
+ if (NestLevel == 0)
+ return false;
+ --NestLevel;
+ }
+ }
+ // Otherwise, find the chain and continue climbing.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (N->getOperand(i).getValueType() == MVT::Other) {
+ N = N->getOperand(i).getNode();
+ goto found_chain_operand;
+ }
+ return false;
+ found_chain_operand:;
+ if (N->getOpcode() == ISD::EntryToken)
+ return false;
+ }
+}
+
+/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
+/// the corresponding (lowered) CALLSEQ_BEGIN node.
+///
+/// NestLevel and MaxNested are used in recursion to indcate the current level
+/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
+/// level seen so far.
+///
+/// TODO: It would be better to give CALLSEQ_END an explicit operand to point
+/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
+static SDNode *
+FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
+ const TargetInstrInfo *TII) {
+ for (;;) {
+ // For a TokenFactor, examine each operand. There may be multiple ways
+ // to get to the CALLSEQ_BEGIN, but we need to find the path with the
+ // most nesting in order to ensure that we find the corresponding match.
+ if (N->getOpcode() == ISD::TokenFactor) {
+ SDNode *Best = 0;
+ unsigned BestMaxNest = MaxNest;
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ unsigned MyNestLevel = NestLevel;
+ unsigned MyMaxNest = MaxNest;
+ if (SDNode *New = FindCallSeqStart(N->getOperand(i).getNode(),
+ MyNestLevel, MyMaxNest, TII))
+ if (!Best || (MyMaxNest > BestMaxNest)) {
+ Best = New;
+ BestMaxNest = MyMaxNest;
+ }
+ }
+ assert(Best);
+ MaxNest = BestMaxNest;
+ return Best;
+ }
+ // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
+ if (N->isMachineOpcode()) {
+ if (N->getMachineOpcode() ==
+ (unsigned)TII->getCallFrameDestroyOpcode()) {
+ ++NestLevel;
+ MaxNest = std::max(MaxNest, NestLevel);
+ } else if (N->getMachineOpcode() ==
+ (unsigned)TII->getCallFrameSetupOpcode()) {
+ assert(NestLevel != 0);
+ --NestLevel;
+ if (NestLevel == 0)
+ return N;
+ }
+ }
+ // Otherwise, find the chain and continue climbing.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (N->getOperand(i).getValueType() == MVT::Other) {
+ N = N->getOperand(i).getNode();
+ goto found_chain_operand;
+ }
+ return 0;
+ found_chain_operand:;
+ if (N->getOpcode() == ISD::EntryToken)
+ return 0;
+ }
+}
+
/// Call ReleasePred for each predecessor, then update register live def/gen.
/// Always update LiveRegDefs for a register dependence even if the current SU
/// also defines the register. This effectively create one large live range
@@ -440,6 +521,27 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
}
}
}
+
+ // If we're scheduling a lowered CALLSEQ_END, find the corresponding
+ // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
+ // these nodes, to prevent other calls from being interscheduled with them.
+ unsigned CallResource = TRI->getNumRegs();
+ if (!LiveRegDefs[CallResource])
+ for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
+ if (Node->isMachineOpcode() &&
+ Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
+ unsigned NestLevel = 0;
+ unsigned MaxNest = 0;
+ SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
+
+ SUnit *Def = &SUnits[N->getNodeId()];
+ CallSeqEndForStart[Def] = SU;
+
+ ++NumLiveRegs;
+ LiveRegDefs[CallResource] = Def;
+ LiveRegGens[CallResource] = SU;
+ break;
+ }
}
/// Check to see if any of the pending instructions are ready to issue. If
@@ -457,8 +559,7 @@ void ScheduleDAGRRList::ReleasePending() {
// Check to see if any of the pending instructions are ready to issue. If
// so, add them to the available queue.
for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
- unsigned ReadyCycle =
- isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth();
+ unsigned ReadyCycle = PendingQueue[i]->getHeight();
if (ReadyCycle < MinAvailableCycle)
MinAvailableCycle = ReadyCycle;
@@ -487,10 +588,7 @@ void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
}
else {
for (; CurCycle != NextCycle; ++CurCycle) {
- if (isBottomUp)
- HazardRec->RecedeCycle();
- else
- HazardRec->AdvanceCycle();
+ HazardRec->RecedeCycle();
}
}
// FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
@@ -511,7 +609,7 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
// currently need to treat these nodes like real instructions.
// if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
- unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth();
+ unsigned ReadyCycle = SU->getHeight();
// Bump CurCycle to account for latency. We assume the latency of other
// available instructions may be hidden by the stall (not a full pipe stall).
@@ -522,7 +620,7 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
// Calls are scheduled in their preceding cycle, so don't conflict with
// hazards from instructions after the call. EmitNode will reset the
// scoreboard state before emitting the call.
- if (isBottomUp && SU->isCall)
+ if (SU->isCall)
return;
// FIXME: For resource conflicts in very long non-pipelined stages, we
@@ -530,7 +628,7 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
int Stalls = 0;
while (true) {
ScheduleHazardRecognizer::HazardType HT =
- HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls);
+ HazardRec->getHazardType(SU, -Stalls);
if (HT == ScheduleHazardRecognizer::NoHazard)
break;
@@ -568,17 +666,13 @@ void ScheduleDAGRRList::EmitNode(SUnit *SU) {
HazardRec->Reset();
return;
}
- if (isBottomUp && SU->isCall) {
+ if (SU->isCall) {
// Calls are scheduled with their preceding instructions. For bottom-up
// scheduling, clear the pipeline state before emitting.
HazardRec->Reset();
}
HazardRec->EmitInstruction(SU);
-
- if (!isBottomUp && SU->isCall) {
- HazardRec->Reset();
- }
}
static void resetVRegCycle(SUnit *SU);
@@ -607,7 +701,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
Sequence.push_back(SU);
- AvailableQueue->ScheduledNode(SU);
+ AvailableQueue->scheduledNode(SU);
// If HazardRec is disabled, and each inst counts as one cycle, then
// advance CurCycle before ReleasePredecessors to avoid useless pushes to
@@ -630,6 +724,20 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
LiveRegGens[I->getReg()] = NULL;
}
}
+ // Release the special call resource dependence, if this is the beginning
+ // of a call.
+ unsigned CallResource = TRI->getNumRegs();
+ if (LiveRegDefs[CallResource] == SU)
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->isMachineOpcode() &&
+ SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ --NumLiveRegs;
+ LiveRegDefs[CallResource] = NULL;
+ LiveRegGens[CallResource] = NULL;
+ }
+ }
resetVRegCycle(SU);
@@ -686,15 +794,41 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
}
}
+ // Reclaim the special call resource dependence, if this is the beginning
+ // of a call.
+ unsigned CallResource = TRI->getNumRegs();
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->isMachineOpcode() &&
+ SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
+ ++NumLiveRegs;
+ LiveRegDefs[CallResource] = SU;
+ LiveRegGens[CallResource] = CallSeqEndForStart[SU];
+ }
+ }
+
+ // Release the special call resource dependence, if this is the end
+ // of a call.
+ if (LiveRegGens[CallResource] == SU)
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->isMachineOpcode() &&
+ SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ --NumLiveRegs;
+ LiveRegDefs[CallResource] = NULL;
+ LiveRegGens[CallResource] = NULL;
+ }
+ }
+
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
if (I->isAssignedRegDep()) {
+ if (!LiveRegDefs[I->getReg()])
+ ++NumLiveRegs;
// This becomes the nearest def. Note that an earlier def may still be
// pending if this is a two-address node.
LiveRegDefs[I->getReg()] = SU;
- if (!LiveRegDefs[I->getReg()]) {
- ++NumLiveRegs;
- }
if (LiveRegGens[I->getReg()] == NULL ||
I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
LiveRegGens[I->getReg()] = I->getSUnit();
@@ -714,7 +848,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
else {
AvailableQueue->push(SU);
}
- AvailableQueue->UnscheduledNode(SU);
+ AvailableQueue->unscheduledNode(SU);
}
/// After backtracking, the hazard checker needs to be restored to a state
@@ -805,6 +939,11 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
return NULL;
+ // unfolding an x86 DEC64m operation results in store, dec, load which
+ // can't be handled here so quit
+ if (NewNodes.size() == 3)
+ return NULL;
+
DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
assert(NewNodes.size() == 2 && "Expected a load folding node!");
@@ -830,7 +969,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
LoadNode->setNodeId(LoadSU->NodeNum);
InitNumRegDefsLeft(LoadSU);
- ComputeLatency(LoadSU);
+ computeLatency(LoadSU);
}
SUnit *NewSU = CreateNewSUnit(N);
@@ -848,7 +987,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
NewSU->isCommutable = true;
InitNumRegDefsLeft(NewSU);
- ComputeLatency(NewSU);
+ computeLatency(NewSU);
// Record all the edges to and from the old SU, by category.
SmallVector<SDep, 4> ChainPreds;
@@ -1027,7 +1166,7 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
unsigned NumRes = MCID.getNumDefs();
- for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+ for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
if (Reg == *ImpDef)
break;
++NumRes;
@@ -1042,7 +1181,7 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
SmallSet<unsigned, 4> &RegAdded,
SmallVector<unsigned, 4> &LRegs,
const TargetRegisterInfo *TRI) {
- for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
+ for (const uint16_t *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
// Check if Ref is live.
if (!LiveRegDefs[*AliasI]) continue;
@@ -1057,6 +1196,31 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
}
}
+/// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
+/// by RegMask, and add them to LRegs.
+static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
+ std::vector<SUnit*> &LiveRegDefs,
+ SmallSet<unsigned, 4> &RegAdded,
+ SmallVector<unsigned, 4> &LRegs) {
+ // Look at all live registers. Skip Reg0 and the special CallResource.
+ for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
+ if (!LiveRegDefs[i]) continue;
+ if (LiveRegDefs[i] == SU) continue;
+ if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
+ if (RegAdded.insert(i))
+ LRegs.push_back(i);
+ }
+}
+
+/// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
+static const uint32_t *getNodeRegMask(const SDNode *N) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (const RegisterMaskSDNode *Op =
+ dyn_cast<RegisterMaskSDNode>(N->getOperand(i).getNode()))
+ return Op->getRegMask();
+ return NULL;
+}
+
/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
/// scheduling of the given node to satisfy live physical register dependencies.
/// If the specific node is the last one that's available to schedule, do
@@ -1108,10 +1272,27 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
if (!Node->isMachineOpcode())
continue;
+ // If we're in the middle of scheduling a call, don't begin scheduling
+ // another call. Also, don't allow any physical registers to be live across
+ // the call.
+ if (Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
+ // Check the special calling-sequence resource.
+ unsigned CallResource = TRI->getNumRegs();
+ if (LiveRegDefs[CallResource]) {
+ SDNode *Gen = LiveRegGens[CallResource]->getNode();
+ while (SDNode *Glued = Gen->getGluedNode())
+ Gen = Glued;
+ if (!IsChainDependent(Gen, Node, 0, TII) && RegAdded.insert(CallResource))
+ LRegs.push_back(CallResource);
+ }
+ }
+ if (const uint32_t *RegMask = getNodeRegMask(Node))
+ CheckForLiveRegDefMasked(SU, RegMask, LiveRegDefs, RegAdded, LRegs);
+
const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
if (!MCID.ImplicitDefs)
continue;
- for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg)
+ for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
}
@@ -1300,99 +1481,10 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
std::reverse(Sequence.begin(), Sequence.end());
#ifndef NDEBUG
- VerifySchedule(isBottomUp);
-#endif
-}
-
-//===----------------------------------------------------------------------===//
-// Top-Down Scheduling
-//===----------------------------------------------------------------------===//
-
-/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
-/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
-void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
- SUnit *SuccSU = SuccEdge->getSUnit();
-
-#ifndef NDEBUG
- if (SuccSU->NumPredsLeft == 0) {
- dbgs() << "*** Scheduling failed! ***\n";
- SuccSU->dump(this);
- dbgs() << " has been released too many times!\n";
- llvm_unreachable(0);
- }
-#endif
- --SuccSU->NumPredsLeft;
-
- // If all the node's predecessors are scheduled, this node is ready
- // to be scheduled. Ignore the special ExitSU node.
- if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
- SuccSU->isAvailable = true;
- AvailableQueue->push(SuccSU);
- }
-}
-
-void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
- // Top down: release successors
- for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- assert(!I->isAssignedRegDep() &&
- "The list-tdrr scheduler doesn't yet support physreg dependencies!");
-
- ReleaseSucc(SU, &*I);
- }
-}
-
-/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
-/// count of its successors. If a successor pending count is zero, add it to
-/// the Available queue.
-void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) {
- DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
- DEBUG(SU->dump(this));
-
- assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
- SU->setDepthToAtLeast(CurCycle);
- Sequence.push_back(SU);
-
- ReleaseSuccessors(SU);
- SU->isScheduled = true;
- AvailableQueue->ScheduledNode(SU);
-}
-
-/// ListScheduleTopDown - The main loop of list scheduling for top-down
-/// schedulers.
-void ScheduleDAGRRList::ListScheduleTopDown() {
- AvailableQueue->setCurCycle(CurCycle);
-
- // Release any successors of the special Entry node.
- ReleaseSuccessors(&EntrySU);
-
- // All leaves to Available queue.
- for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
- // It is available if it has no predecessors.
- if (SUnits[i].Preds.empty()) {
- AvailableQueue->push(&SUnits[i]);
- SUnits[i].isAvailable = true;
- }
- }
-
- // While Available queue is not empty, grab the node with the highest
- // priority. If it is not ready put it back. Schedule the node.
- Sequence.reserve(SUnits.size());
- while (!AvailableQueue->empty()) {
- SUnit *CurSU = AvailableQueue->pop();
-
- if (CurSU)
- ScheduleNodeTopDown(CurSU);
- ++CurCycle;
- AvailableQueue->setCurCycle(CurCycle);
- }
-
-#ifndef NDEBUG
- VerifySchedule(isBottomUp);
+ VerifyScheduledSequence(/*isBottomUp=*/true);
#endif
}
-
//===----------------------------------------------------------------------===//
// RegReductionPriorityQueue Definition
//===----------------------------------------------------------------------===//
@@ -1437,21 +1529,6 @@ struct bu_ls_rr_sort : public queue_sort {
bool operator()(SUnit* left, SUnit* right) const;
};
-// td_ls_rr_sort - Priority function for top down register pressure reduction
-// scheduler.
-struct td_ls_rr_sort : public queue_sort {
- enum {
- IsBottomUp = false,
- HasReadyFilter = false
- };
-
- RegReductionPQBase *SPQ;
- td_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
- td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
-
- bool operator()(const SUnit* left, const SUnit* right) const;
-};
-
// src_ls_rr_sort - Priority function for source order scheduler.
struct src_ls_rr_sort : public queue_sort {
enum {
@@ -1510,6 +1587,7 @@ protected:
std::vector<SUnit*> Queue;
unsigned CurQueueId;
bool TracksRegPressure;
+ bool SrcOrder;
// SUnits - The SUnits for the current graph.
std::vector<SUnit> *SUnits;
@@ -1535,11 +1613,12 @@ public:
RegReductionPQBase(MachineFunction &mf,
bool hasReadyFilter,
bool tracksrp,
+ bool srcorder,
const TargetInstrInfo *tii,
const TargetRegisterInfo *tri,
const TargetLowering *tli)
: SchedulingPriorityQueue(hasReadyFilter),
- CurQueueId(0), TracksRegPressure(tracksrp),
+ CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder),
MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
if (TracksRegPressure) {
unsigned NumRC = TRI->getNumRegClasses();
@@ -1610,9 +1689,9 @@ public:
int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
- void ScheduledNode(SUnit *SU);
+ void scheduledNode(SUnit *SU);
- void UnscheduledNode(SUnit *SU);
+ void unscheduledNode(SUnit *SU);
protected:
bool canClobber(const SUnit *SU, const SUnit *Op);
@@ -1654,10 +1733,12 @@ class RegReductionPriorityQueue : public RegReductionPQBase {
public:
RegReductionPriorityQueue(MachineFunction &mf,
bool tracksrp,
+ bool srcorder,
const TargetInstrInfo *tii,
const TargetRegisterInfo *tri,
const TargetLowering *tli)
- : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli),
+ : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
+ tii, tri, tli),
Picker(this) {}
bool isBottomUp() const { return SF::IsBottomUp; }
@@ -1680,10 +1761,7 @@ public:
SF DumpPicker = Picker;
while (!DumpQueue.empty()) {
SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
- if (isBottomUp())
- dbgs() << "Height " << SU->getHeight() << ": ";
- else
- dbgs() << "Depth " << SU->getDepth() << ": ";
+ dbgs() << "Height " << SU->getHeight() << ": ";
SU->dump(DAG);
}
}
@@ -1692,9 +1770,6 @@ public:
typedef RegReductionPriorityQueue<bu_ls_rr_sort>
BURegReductionPriorityQueue;
-typedef RegReductionPriorityQueue<td_ls_rr_sort>
-TDRegReductionPriorityQueue;
-
typedef RegReductionPriorityQueue<src_ls_rr_sort>
SrcRegReductionPriorityQueue;
@@ -1919,7 +1994,7 @@ int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
return PDiff;
}
-void RegReductionPQBase::ScheduledNode(SUnit *SU) {
+void RegReductionPQBase::scheduledNode(SUnit *SU) {
if (!TracksRegPressure)
return;
@@ -1988,7 +2063,7 @@ void RegReductionPQBase::ScheduledNode(SUnit *SU) {
dumpRegPressure();
}
-void RegReductionPQBase::UnscheduledNode(SUnit *SU) {
+void RegReductionPQBase::unscheduledNode(SUnit *SU) {
if (!TracksRegPressure)
return;
@@ -2235,37 +2310,29 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
int LHeight = (int)left->getHeight() + LPenalty;
int RHeight = (int)right->getHeight() + RPenalty;
- bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) &&
+ bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
BUHasStall(left, LHeight, SPQ);
- bool RStall = (!checkPref || right->SchedulingPref == Sched::Latency) &&
+ bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
BUHasStall(right, RHeight, SPQ);
// If scheduling one of the node will cause a pipeline stall, delay it.
// If scheduling either one of the node will cause a pipeline stall, sort
// them according to their height.
if (LStall) {
- if (!RStall) {
- DEBUG(++FactorCount[FactStall]);
+ if (!RStall)
return 1;
- }
- if (LHeight != RHeight) {
- DEBUG(++FactorCount[FactStall]);
+ if (LHeight != RHeight)
return LHeight > RHeight ? 1 : -1;
- }
- } else if (RStall) {
- DEBUG(++FactorCount[FactStall]);
+ } else if (RStall)
return -1;
- }
// If either node is scheduling for latency, sort them by height/depth
// and latency.
- if (!checkPref || (left->SchedulingPref == Sched::Latency ||
- right->SchedulingPref == Sched::Latency)) {
+ if (!checkPref || (left->SchedulingPref == Sched::ILP ||
+ right->SchedulingPref == Sched::ILP)) {
if (DisableSchedCycles) {
- if (LHeight != RHeight) {
- DEBUG(++FactorCount[FactHeight]);
+ if (LHeight != RHeight)
return LHeight > RHeight ? 1 : -1;
- }
}
else {
// If neither instruction stalls (!LStall && !RStall) then
@@ -2274,17 +2341,14 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
int LDepth = left->getDepth() - LPenalty;
int RDepth = right->getDepth() - RPenalty;
if (LDepth != RDepth) {
- DEBUG(++FactorCount[FactDepth]);
DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
<< ") depth " << LDepth << " vs SU (" << right->NodeNum
<< ") depth " << RDepth << "\n");
return LDepth < RDepth ? 1 : -1;
}
}
- if (left->Latency != right->Latency) {
- DEBUG(++FactorCount[FactOther]);
+ if (left->Latency != right->Latency)
return left->Latency > right->Latency ? 1 : -1;
- }
}
return 0;
}
@@ -2298,7 +2362,6 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
bool LHasPhysReg = left->hasPhysRegDefs;
bool RHasPhysReg = right->hasPhysRegDefs;
if (LHasPhysReg != RHasPhysReg) {
- DEBUG(++FactorCount[FactRegUses]);
#ifndef NDEBUG
const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"};
#endif
@@ -2324,10 +2387,8 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
}
- if (LPriority != RPriority) {
- DEBUG(++FactorCount[FactStatic]);
+ if (LPriority != RPriority)
return LPriority > RPriority;
- }
// One or both of the nodes are calls and their sethi-ullman numbers are the
// same, then keep source order.
@@ -2360,18 +2421,14 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
// This creates more short live intervals.
unsigned LDist = closestSucc(left);
unsigned RDist = closestSucc(right);
- if (LDist != RDist) {
- DEBUG(++FactorCount[FactOther]);
+ if (LDist != RDist)
return LDist < RDist;
- }
// How many registers becomes live when the node is scheduled.
unsigned LScratch = calcMaxScratches(left);
unsigned RScratch = calcMaxScratches(right);
- if (LScratch != RScratch) {
- DEBUG(++FactorCount[FactOther]);
+ if (LScratch != RScratch)
return LScratch > RScratch;
- }
// Comparing latency against a call makes little sense unless the node
// is register pressure-neutral.
@@ -2386,20 +2443,15 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
return result > 0;
}
else {
- if (left->getHeight() != right->getHeight()) {
- DEBUG(++FactorCount[FactHeight]);
+ if (left->getHeight() != right->getHeight())
return left->getHeight() > right->getHeight();
- }
- if (left->getDepth() != right->getDepth()) {
- DEBUG(++FactorCount[FactDepth]);
+ if (left->getDepth() != right->getDepth())
return left->getDepth() < right->getDepth();
- }
}
assert(left->NodeQueueId && right->NodeQueueId &&
"NodeQueueId cannot be zero");
- DEBUG(++FactorCount[FactOther]);
return (left->NodeQueueId > right->NodeQueueId);
}
@@ -2459,13 +2511,11 @@ bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
// Avoid causing spills. If register pressure is high, schedule for
// register pressure reduction.
if (LHigh && !RHigh) {
- DEBUG(++FactorCount[FactPressureDiff]);
DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
<< right->NodeNum << ")\n");
return true;
}
else if (!LHigh && RHigh) {
- DEBUG(++FactorCount[FactPressureDiff]);
DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
<< left->NodeNum << ")\n");
return false;
@@ -2529,7 +2579,6 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
}
if (!DisableSchedRegPressure && LPDiff != RPDiff) {
- DEBUG(++FactorCount[FactPressureDiff]);
DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff
<< " != SU(" << right->NodeNum << "): " << RPDiff << "\n");
return LPDiff > RPDiff;
@@ -2538,7 +2587,6 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
bool LReduce = canEnableCoalescing(left);
bool RReduce = canEnableCoalescing(right);
- DEBUG(if (LReduce != RReduce) ++FactorCount[FactPressureDiff]);
if (LReduce && !RReduce) return false;
if (RReduce && !LReduce) return true;
}
@@ -2546,17 +2594,14 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
<< " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
- DEBUG(++FactorCount[FactRegUses]);
return LLiveUses < RLiveUses;
}
if (!DisableSchedStalls) {
bool LStall = BUHasStall(left, left->getHeight(), SPQ);
bool RStall = BUHasStall(right, right->getHeight(), SPQ);
- if (LStall != RStall) {
- DEBUG(++FactorCount[FactHeight]);
+ if (LStall != RStall)
return left->getHeight() > right->getHeight();
- }
}
if (!DisableSchedCriticalPath) {
@@ -2565,17 +2610,14 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
<< left->getDepth() << " != SU(" << right->NodeNum << "): "
<< right->getDepth() << "\n");
- DEBUG(++FactorCount[FactDepth]);
return left->getDepth() < right->getDepth();
}
}
if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
int spread = (int)left->getHeight() - (int)right->getHeight();
- if (std::abs(spread) > MaxReorderWindow) {
- DEBUG(++FactorCount[FactHeight]);
+ if (std::abs(spread) > MaxReorderWindow)
return left->getHeight() > right->getHeight();
- }
}
return BURRSort(left, right, SPQ);
@@ -2584,9 +2626,10 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
SUnits = &sunits;
// Add pseudo dependency edges for two-address nodes.
- AddPseudoTwoAddrDeps();
+ if (!Disable2AddrHack)
+ AddPseudoTwoAddrDeps();
// Reroute edges to nodes with multiple uses.
- if (!TracksRegPressure)
+ if (!TracksRegPressure && !SrcOrder)
PrescheduleNodesWithMultipleUses();
// Calculate node priorities.
CalculateSethiUllmanNumbers();
@@ -2628,9 +2671,10 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
ScheduleDAGRRList *scheduleDAG,
const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) {
- const unsigned *ImpDefs
+ const uint16_t *ImpDefs
= TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
- if(!ImpDefs)
+ const uint32_t *RegMask = getNodeRegMask(SU->getNode());
+ if(!ImpDefs && !RegMask)
return false;
for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end();
@@ -2641,14 +2685,18 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
if (!PI->isAssignedRegDep())
continue;
- for (const unsigned *ImpDef = ImpDefs; *ImpDef; ++ImpDef) {
- // Return true if SU clobbers this physical register use and the
- // definition of the register reaches from DepSU. IsReachable queries a
- // topological forward sort of the DAG (following the successors).
- if (TRI->regsOverlap(*ImpDef, PI->getReg()) &&
- scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
- return true;
- }
+ if (RegMask && MachineOperand::clobbersPhysReg(RegMask, PI->getReg()) &&
+ scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
+ return true;
+
+ if (ImpDefs)
+ for (const uint16_t *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
+ // Return true if SU clobbers this physical register use and the
+ // definition of the register reaches from DepSU. IsReachable queries
+ // a topological forward sort of the DAG (following the successors).
+ if (TRI->regsOverlap(*ImpDef, PI->getReg()) &&
+ scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
+ return true;
}
}
return false;
@@ -2661,16 +2709,17 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
const TargetRegisterInfo *TRI) {
SDNode *N = SuccSU->getNode();
unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
- const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
+ const uint16_t *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
assert(ImpDefs && "Caller should check hasPhysRegDefs");
for (const SDNode *SUNode = SU->getNode(); SUNode;
SUNode = SUNode->getGluedNode()) {
if (!SUNode->isMachineOpcode())
continue;
- const unsigned *SUImpDefs =
+ const uint16_t *SUImpDefs =
TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
- if (!SUImpDefs)
- return false;
+ const uint32_t *SURegMask = getNodeRegMask(SUNode);
+ if (!SUImpDefs && !SURegMask)
+ continue;
for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
EVT VT = N->getValueType(i);
if (VT == MVT::Glue || VT == MVT::Other)
@@ -2678,6 +2727,10 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
if (!N->hasAnyUseOfValue(i))
continue;
unsigned Reg = ImpDefs[i - NumDefs];
+ if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
+ return true;
+ if (!SUImpDefs)
+ continue;
for (;*SUImpDefs; ++SUImpDefs) {
unsigned SUReg = *SUImpDefs;
if (TRI->regsOverlap(Reg, SUReg))
@@ -2887,69 +2940,6 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() {
}
}
-/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
-/// predecessors of the successors of the SUnit SU. Stop when the provided
-/// limit is exceeded.
-static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
- unsigned Limit) {
- unsigned Sum = 0;
- for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- const SUnit *SuccSU = I->getSUnit();
- for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
- EE = SuccSU->Preds.end(); II != EE; ++II) {
- SUnit *PredSU = II->getSUnit();
- if (!PredSU->isScheduled)
- if (++Sum > Limit)
- return Sum;
- }
- }
- return Sum;
-}
-
-
-// Top down
-bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
- if (int res = checkSpecialNodes(left, right))
- return res < 0;
-
- unsigned LPriority = SPQ->getNodePriority(left);
- unsigned RPriority = SPQ->getNodePriority(right);
- bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
- bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
- bool LIsFloater = LIsTarget && left->NumPreds == 0;
- bool RIsFloater = RIsTarget && right->NumPreds == 0;
- unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
- unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
-
- if (left->NumSuccs == 0 && right->NumSuccs != 0)
- return false;
- else if (left->NumSuccs != 0 && right->NumSuccs == 0)
- return true;
-
- if (LIsFloater)
- LBonus -= 2;
- if (RIsFloater)
- RBonus -= 2;
- if (left->NumSuccs == 1)
- LBonus += 2;
- if (right->NumSuccs == 1)
- RBonus += 2;
-
- if (LPriority+LBonus != RPriority+RBonus)
- return LPriority+LBonus < RPriority+RBonus;
-
- if (left->getDepth() != right->getDepth())
- return left->getDepth() < right->getDepth();
-
- if (left->NumSuccsLeft != right->NumSuccsLeft)
- return left->NumSuccsLeft > right->NumSuccsLeft;
-
- assert(left->NodeQueueId && right->NodeQueueId &&
- "NodeQueueId cannot be zero");
- return (left->NodeQueueId > right->NodeQueueId);
-}
-
//===----------------------------------------------------------------------===//
// Public Constructor Functions
//===----------------------------------------------------------------------===//
@@ -2962,21 +2952,7 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
BURegReductionPriorityQueue *PQ =
- new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
- ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
- PQ->setScheduleDAG(SD);
- return SD;
-}
-
-llvm::ScheduleDAGSDNodes *
-llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS,
- CodeGenOpt::Level OptLevel) {
- const TargetMachine &TM = IS->TM;
- const TargetInstrInfo *TII = TM.getInstrInfo();
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
-
- TDRegReductionPriorityQueue *PQ =
- new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
+ new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, 0);
ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
PQ->setScheduleDAG(SD);
return SD;
@@ -2990,7 +2966,7 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
SrcRegReductionPriorityQueue *PQ =
- new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
+ new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, 0);
ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
PQ->setScheduleDAG(SD);
return SD;
@@ -3005,7 +2981,7 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
const TargetLowering *TLI = &IS->getTargetLowering();
HybridBURRPriorityQueue *PQ =
- new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
+ new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
PQ->setScheduleDAG(SD);
@@ -3021,7 +2997,7 @@ llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
const TargetLowering *TLI = &IS->getTargetLowering();
ILPBURRPriorityQueue *PQ =
- new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
+ new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
PQ->setScheduleDAG(SD);
return SD;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 71f07d6fa47a..69dd813b24e0 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -17,6 +17,8 @@
#include "ScheduleDAGSDNodes.h"
#include "InstrEmitter.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -44,20 +46,26 @@ static cl::opt<int> HighLatencyCycles(
"instructions take for targets with no itinerary"));
ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
- : ScheduleDAG(mf),
+ : ScheduleDAG(mf), BB(0), DAG(0),
InstrItins(mf.getTarget().getInstrItineraryData()) {}
/// Run - perform scheduling.
///
-void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb,
- MachineBasicBlock::iterator insertPos) {
+void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb) {
+ BB = bb;
DAG = dag;
- ScheduleDAG::Run(bb, insertPos);
+
+ // Clear the scheduler's SUnit DAG.
+ ScheduleDAG::clearDAG();
+ Sequence.clear();
+
+ // Invoke the target's selection of scheduler.
+ Schedule();
}
/// NewSUnit - Creates a new SUnit and return a ptr to it.
///
-SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) {
+SUnit *ScheduleDAGSDNodes::newSUnit(SDNode *N) {
#ifndef NDEBUG
const SUnit *Addr = 0;
if (!SUnits.empty())
@@ -79,7 +87,7 @@ SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) {
}
SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
- SUnit *SU = NewSUnit(Old->getNode());
+ SUnit *SU = newSUnit(Old->getNode());
SU->OrigNode = Old->OrigNode;
SU->Latency = Old->Latency;
SU->isVRegCycle = Old->isVRegCycle;
@@ -302,7 +310,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
// If this node has already been processed, stop now.
if (NI->getNodeId() != -1) continue;
- SUnit *NodeSUnit = NewSUnit(NI);
+ SUnit *NodeSUnit = newSUnit(NI);
// See if anything is glued to this node, if so, add them to glued
// nodes. Nodes can have at most one glue input and one glue output. Glue
@@ -360,7 +368,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
InitNumRegDefsLeft(NodeSUnit);
// Assign the Latency field of NodeSUnit using target-provided information.
- ComputeLatency(NodeSUnit);
+ computeLatency(NodeSUnit);
}
// Find all call operands.
@@ -382,7 +390,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
// Check to see if the scheduler cares about latencies.
- bool UnitLatencies = ForceUnitLatencies();
+ bool UnitLatencies = forceUnitLatencies();
// Pass 2: add the preds, succs, etc.
for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
@@ -448,7 +456,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
OpLatency, PhysReg);
if (!isChain && !UnitLatencies) {
- ComputeOperandLatency(OpN, N, i, const_cast<SDep &>(dep));
+ computeOperandLatency(OpN, N, i, const_cast<SDep &>(dep));
ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
}
@@ -541,7 +549,7 @@ void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
}
}
-void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
+void ScheduleDAGSDNodes::computeLatency(SUnit *SU) {
SDNode *N = SU->getNode();
// TokenFactor operands are considered zero latency, and some schedulers
@@ -553,7 +561,7 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
}
// Check to see if the scheduler cares about latencies.
- if (ForceUnitLatencies()) {
+ if (forceUnitLatencies()) {
SU->Latency = 1;
return;
}
@@ -575,10 +583,10 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
SU->Latency += TII->getInstrLatency(InstrItins, N);
}
-void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
+void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
unsigned OpIdx, SDep& dep) const{
// Check to see if the scheduler cares about latencies.
- if (ForceUnitLatencies())
+ if (forceUnitLatencies())
return;
if (dep.getKind() != SDep::Data)
@@ -621,6 +629,30 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
}
}
+void ScheduleDAGSDNodes::dumpSchedule() const {
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ if (SUnit *SU = Sequence[i])
+ SU->dump(this);
+ else
+ dbgs() << "**** NOOP ****\n";
+ }
+}
+
+#ifndef NDEBUG
+/// VerifyScheduledSequence - Verify that all SUnits were scheduled and that
+/// their state is consistent with the nodes listed in Sequence.
+///
+void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) {
+ unsigned ScheduledNodes = ScheduleDAG::VerifyScheduledDAG(isBottomUp);
+ unsigned Noops = 0;
+ for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
+ if (!Sequence[i])
+ ++Noops;
+ assert(Sequence.size() - Noops == ScheduledNodes &&
+ "The number of nodes scheduled doesn't match the expected number!");
+}
+#endif // NDEBUG
+
namespace {
struct OrderSorter {
bool operator()(const std::pair<unsigned, MachineInstr*> &A,
@@ -686,9 +718,48 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order);
}
+void ScheduleDAGSDNodes::
+EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
+ MachineBasicBlock::iterator InsertPos) {
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ if (I->getSUnit()->CopyDstRC) {
+ // Copy to physical register.
+ DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit());
+ assert(VRI != VRBaseMap.end() && "Node emitted out of order - late");
+ // Find the destination physical register.
+ unsigned Reg = 0;
+ for (SUnit::const_succ_iterator II = SU->Succs.begin(),
+ EE = SU->Succs.end(); II != EE; ++II) {
+ if (II->isCtrl()) continue; // ignore chain preds
+ if (II->getReg()) {
+ Reg = II->getReg();
+ break;
+ }
+ }
+ BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg)
+ .addReg(VRI->second);
+ } else {
+ // Copy from physical register.
+ assert(I->getReg() && "Unknown physical register!");
+ unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
+ bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+ BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase)
+ .addReg(I->getReg());
+ }
+ break;
+ }
+}
-/// EmitSchedule - Emit the machine code in scheduled order.
-MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
+/// EmitSchedule - Emit the machine code in scheduled order. Return the new
+/// InsertPos and MachineBasicBlock that contains this insertion
+/// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does
+/// not necessarily refer to returned BB. The emitter may split blocks.
+MachineBasicBlock *ScheduleDAGSDNodes::
+EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
InstrEmitter Emitter(BB, InsertPos);
DenseMap<SDValue, unsigned> VRBaseMap;
DenseMap<SUnit*, unsigned> CopyVRBaseMap;
@@ -711,7 +782,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
SUnit *SU = Sequence[i];
if (!SU) {
// Null SUnit* is a noop.
- EmitNoop();
+ TII->insertNoop(*Emitter.getBlock(), InsertPos);
continue;
}
@@ -719,7 +790,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
// SDNode and any glued SDNodes and append them to the block.
if (!SU->getNode()) {
// Emit a copy.
- EmitPhysRegCopy(SU, CopyVRBaseMap);
+ EmitPhysRegCopy(SU, CopyVRBaseMap, InsertPos);
continue;
}
@@ -784,19 +855,24 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
}
// Add trailing DbgValue's before the terminator. FIXME: May want to add
// some of them before one or more conditional branches?
+ SmallVector<MachineInstr*, 8> DbgMIs;
while (DI != DE) {
- MachineBasicBlock *InsertBB = Emitter.getBlock();
- MachineBasicBlock::iterator Pos= Emitter.getBlock()->getFirstTerminator();
- if (!(*DI)->isInvalidated()) {
- MachineInstr *DbgMI= Emitter.EmitDbgValue(*DI, VRBaseMap);
- if (DbgMI)
- InsertBB->insert(Pos, DbgMI);
- }
+ if (!(*DI)->isInvalidated())
+ if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap))
+ DbgMIs.push_back(DbgMI);
++DI;
}
+
+ MachineBasicBlock *InsertBB = Emitter.getBlock();
+ MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator();
+ InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end());
}
- BB = Emitter.getBlock();
InsertPos = Emitter.getInsertPos();
- return BB;
+ return Emitter.getBlock();
+}
+
+/// Return the basic block label.
+std::string ScheduleDAGSDNodes::getDAGName() const {
+ return "sunit-dag." + BB->getFullName();
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 9c27b2ea02ec..75940ec33ddc 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -35,17 +35,20 @@ namespace llvm {
///
class ScheduleDAGSDNodes : public ScheduleDAG {
public:
+ MachineBasicBlock *BB;
SelectionDAG *DAG; // DAG of the current basic block
const InstrItineraryData *InstrItins;
+ /// The schedule. Null SUnit*'s represent noop instructions.
+ std::vector<SUnit*> Sequence;
+
explicit ScheduleDAGSDNodes(MachineFunction &mf);
virtual ~ScheduleDAGSDNodes() {}
/// Run - perform scheduling.
///
- void Run(SelectionDAG *dag, MachineBasicBlock *bb,
- MachineBasicBlock::iterator insertPos);
+ void Run(SelectionDAG *dag, MachineBasicBlock *bb);
/// isPassiveNode - Return true if the node is a non-scheduled leaf.
///
@@ -53,6 +56,7 @@ namespace llvm {
if (isa<ConstantSDNode>(Node)) return true;
if (isa<ConstantFPSDNode>(Node)) return true;
if (isa<RegisterSDNode>(Node)) return true;
+ if (isa<RegisterMaskSDNode>(Node)) return true;
if (isa<GlobalAddressSDNode>(Node)) return true;
if (isa<BasicBlockSDNode>(Node)) return true;
if (isa<FrameIndexSDNode>(Node)) return true;
@@ -67,7 +71,7 @@ namespace llvm {
/// NewSUnit - Creates a new SUnit and return a ptr to it.
///
- SUnit *NewSUnit(SDNode *N);
+ SUnit *newSUnit(SDNode *N);
/// Clone - Creates a clone of the specified SUnit. It does not copy the
/// predecessors / successors info nor the temporary scheduling states.
@@ -78,7 +82,7 @@ namespace llvm {
/// are input. This SUnit graph is similar to the SelectionDAG, but
/// excludes nodes that aren't interesting to scheduling, and represents
/// flagged together nodes with a single SUnit.
- virtual void BuildSchedGraph(AliasAnalysis *AA);
+ void BuildSchedGraph(AliasAnalysis *AA);
/// InitVRegCycleFlag - Set isVRegCycle if this node's single use is
/// CopyToReg and its only active data operands are CopyFromReg within a
@@ -90,30 +94,41 @@ namespace llvm {
///
void InitNumRegDefsLeft(SUnit *SU);
- /// ComputeLatency - Compute node latency.
+ /// computeLatency - Compute node latency.
///
- virtual void ComputeLatency(SUnit *SU);
+ virtual void computeLatency(SUnit *SU);
- /// ComputeOperandLatency - Override dependence edge latency using
+ /// computeOperandLatency - Override dependence edge latency using
/// operand use/def information
///
- virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use,
+ virtual void computeOperandLatency(SUnit *Def, SUnit *Use,
SDep& dep) const { }
- virtual void ComputeOperandLatency(SDNode *Def, SDNode *Use,
+ virtual void computeOperandLatency(SDNode *Def, SDNode *Use,
unsigned OpIdx, SDep& dep) const;
- virtual MachineBasicBlock *EmitSchedule();
-
/// Schedule - Order nodes according to selected style, filling
/// in the Sequence member.
///
virtual void Schedule() = 0;
+ /// VerifyScheduledSequence - Verify that all SUnits are scheduled and
+ /// consistent with the Sequence of scheduled instructions.
+ void VerifyScheduledSequence(bool isBottomUp);
+
+ /// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock
+ /// according to the order specified in Sequence.
+ ///
+ MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos);
+
virtual void dumpNode(const SUnit *SU) const;
+ void dumpSchedule() const;
+
virtual std::string getGraphNodeLabel(const SUnit *SU) const;
+ virtual std::string getDAGName() const;
+
virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const;
/// RegDefIter - In place iteration over the values defined by an
@@ -159,6 +174,9 @@ namespace llvm {
/// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph.
void BuildSchedUnits();
void AddSchedEdges();
+
+ void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
+ MachineBasicBlock::iterator InsertPos);
};
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index 430283d5eff9..c8512914c1e2 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -1,4 +1,4 @@
-//===---- ScheduleDAGList.cpp - Implement a list scheduler for isel DAG ---===//
+//===- ScheduleDAGVLIW.cpp - SelectionDAG list scheduler for VLIW -*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -31,6 +31,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/ResourcePriorityQueue.h"
#include <climits>
using namespace llvm;
@@ -38,15 +39,15 @@ STATISTIC(NumNoops , "Number of noops inserted");
STATISTIC(NumStalls, "Number of pipeline stalls");
static RegisterScheduler
- tdListDAGScheduler("list-td", "Top-down list scheduler",
- createTDListDAGScheduler);
+ VLIWScheduler("vliw-td", "VLIW scheduler",
+ createVLIWDAGScheduler);
namespace {
//===----------------------------------------------------------------------===//
-/// ScheduleDAGList - The actual list scheduler implementation. This supports
-/// top-down scheduling.
+/// ScheduleDAGVLIW - The actual DFA list scheduler implementation. This
+/// supports / top-down scheduling.
///
-class ScheduleDAGList : public ScheduleDAGSDNodes {
+class ScheduleDAGVLIW : public ScheduleDAGSDNodes {
private:
/// AvailableQueue - The priority queue to use for the available SUnits.
///
@@ -61,16 +62,20 @@ private:
/// HazardRec - The hazard recognizer to use.
ScheduleHazardRecognizer *HazardRec;
+ /// AA - AliasAnalysis for making memory reference queries.
+ AliasAnalysis *AA;
+
public:
- ScheduleDAGList(MachineFunction &mf,
+ ScheduleDAGVLIW(MachineFunction &mf,
+ AliasAnalysis *aa,
SchedulingPriorityQueue *availqueue)
- : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue) {
+ : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) {
const TargetMachine &tm = mf.getTarget();
HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
}
- ~ScheduleDAGList() {
+ ~ScheduleDAGVLIW() {
delete HazardRec;
delete AvailableQueue;
}
@@ -78,23 +83,25 @@ public:
void Schedule();
private:
- void ReleaseSucc(SUnit *SU, const SDep &D);
- void ReleaseSuccessors(SUnit *SU);
- void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
- void ListScheduleTopDown();
+ void releaseSucc(SUnit *SU, const SDep &D);
+ void releaseSuccessors(SUnit *SU);
+ void scheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+ void listScheduleTopDown();
};
} // end anonymous namespace
/// Schedule - Schedule the DAG using list scheduling.
-void ScheduleDAGList::Schedule() {
- DEBUG(dbgs() << "********** List Scheduling **********\n");
+void ScheduleDAGVLIW::Schedule() {
+ DEBUG(dbgs()
+ << "********** List Scheduling BB#" << BB->getNumber()
+ << " '" << BB->getName() << "' **********\n");
// Build the scheduling graph.
- BuildSchedGraph(NULL);
+ BuildSchedGraph(AA);
AvailableQueue->initNodes(SUnits);
- ListScheduleTopDown();
+ listScheduleTopDown();
AvailableQueue->releaseState();
}
@@ -103,9 +110,9 @@ void ScheduleDAGList::Schedule() {
// Top-Down Scheduling
//===----------------------------------------------------------------------===//
-/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// releaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
/// the PendingQueue if the count reaches zero. Also update its cycle bound.
-void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) {
+void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) {
SUnit *SuccSU = D.getSUnit();
#ifndef NDEBUG
@@ -122,25 +129,26 @@ void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) {
// If all the node's predecessors are scheduled, this node is ready
// to be scheduled. Ignore the special ExitSU node.
- if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
+ if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
PendingQueue.push_back(SuccSU);
+ }
}
-void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) {
+void ScheduleDAGVLIW::releaseSuccessors(SUnit *SU) {
// Top down: release successors.
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
assert(!I->isAssignedRegDep() &&
"The list-td scheduler doesn't yet support physreg dependencies!");
- ReleaseSucc(SU, *I);
+ releaseSucc(SU, *I);
}
}
-/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// scheduleNodeTopDown - Add the node to the schedule. Decrement the pending
/// count of its successors. If a successor pending count is zero, add it to
/// the Available queue.
-void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+void ScheduleDAGVLIW::scheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
DEBUG(SU->dump(this));
@@ -148,20 +156,20 @@ void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
SU->setDepthToAtLeast(CurCycle);
- ReleaseSuccessors(SU);
+ releaseSuccessors(SU);
SU->isScheduled = true;
- AvailableQueue->ScheduledNode(SU);
+ AvailableQueue->scheduledNode(SU);
}
-/// ListScheduleTopDown - The main loop of list scheduling for top-down
+/// listScheduleTopDown - The main loop of list scheduling for top-down
/// schedulers.
-void ScheduleDAGList::ListScheduleTopDown() {
+void ScheduleDAGVLIW::listScheduleTopDown() {
unsigned CurCycle = 0;
// Release any successors of the special Entry node.
- ReleaseSuccessors(&EntrySU);
+ releaseSuccessors(&EntrySU);
- // All leaves to Available queue.
+ // All leaves to AvailableQueue.
for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
// It is available if it has no predecessors.
if (SUnits[i].Preds.empty()) {
@@ -170,7 +178,7 @@ void ScheduleDAGList::ListScheduleTopDown() {
}
}
- // While Available queue is not empty, grab the node with the highest
+ // While AvailableQueue is not empty, grab the node with the highest
// priority. If it is not ready put it back. Schedule the node.
std::vector<SUnit*> NotReady;
Sequence.reserve(SUnits.size());
@@ -184,7 +192,8 @@ void ScheduleDAGList::ListScheduleTopDown() {
PendingQueue[i] = PendingQueue.back();
PendingQueue.pop_back();
--i; --e;
- } else {
+ }
+ else {
assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?");
}
}
@@ -192,6 +201,8 @@ void ScheduleDAGList::ListScheduleTopDown() {
// If there are no instructions available, don't try to issue anything, and
// don't advance the hazard recognizer.
if (AvailableQueue->empty()) {
+ // Reset DFA state.
+ AvailableQueue->scheduledNode(0);
++CurCycle;
continue;
}
@@ -223,7 +234,7 @@ void ScheduleDAGList::ListScheduleTopDown() {
// If we found a node to schedule, do it now.
if (FoundSUnit) {
- ScheduleNodeTopDown(FoundSUnit, CurCycle);
+ scheduleNodeTopDown(FoundSUnit, CurCycle);
HazardRec->EmitInstruction(FoundSUnit);
// If this is a pseudo-op node, we don't want to increment the current
@@ -250,7 +261,7 @@ void ScheduleDAGList::ListScheduleTopDown() {
}
#ifndef NDEBUG
- VerifySchedule(/*isBottomUp=*/false);
+ VerifyScheduledSequence(/*isBottomUp=*/false);
#endif
}
@@ -258,8 +269,8 @@ void ScheduleDAGList::ListScheduleTopDown() {
// Public Constructor Functions
//===----------------------------------------------------------------------===//
-/// createTDListDAGScheduler - This creates a top-down list scheduler.
+/// createVLIWDAGScheduler - This creates a top-down list scheduler.
ScheduleDAGSDNodes *
-llvm::createTDListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
- return new ScheduleDAGList(*IS->MF, new LatencyPriorityQueue());
+llvm::createVLIWDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ return new ScheduleDAGVLIW(*IS->MF, IS->AA, new ResourcePriorityQueue(IS));
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 20bea8e4c9e9..92671d1678c6 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -28,7 +28,6 @@
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLowering.h"
@@ -63,6 +62,7 @@ static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) {
static const fltSemantics *EVTToAPFloatSemantics(EVT VT) {
switch (VT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unknown FP format");
+ case MVT::f16: return &APFloat::IEEEhalf;
case MVT::f32: return &APFloat::IEEEsingle;
case MVT::f64: return &APFloat::IEEEdouble;
case MVT::f80: return &APFloat::x87DoubleExtended;
@@ -125,20 +125,29 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) {
if (i == e) return false;
// Do not accept build_vectors that aren't all constants or which have non-~0
- // elements.
+ // elements. We have to be a bit careful here, as the type of the constant
+ // may not be the same as the type of the vector elements due to type
+ // legalization (the elements are promoted to a legal type for the target and
+ // a vector of a type may be legal when the base element type is not).
+ // We only want to check enough bits to cover the vector elements, because
+ // we care if the resultant vector is all ones, not whether the individual
+ // constants are.
SDValue NotZero = N->getOperand(i);
+ unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
if (isa<ConstantSDNode>(NotZero)) {
- if (!cast<ConstantSDNode>(NotZero)->isAllOnesValue())
+ if (cast<ConstantSDNode>(NotZero)->getAPIntValue().countTrailingOnes() <
+ EltSize)
return false;
} else if (isa<ConstantFPSDNode>(NotZero)) {
- if (!cast<ConstantFPSDNode>(NotZero)->getValueAPF().
- bitcastToAPInt().isAllOnesValue())
+ if (cast<ConstantFPSDNode>(NotZero)->getValueAPF()
+ .bitcastToAPInt().countTrailingOnes() < EltSize)
return false;
} else
return false;
// Okay, we have at least one ~0 value, check to see if the rest match or are
- // undefs.
+ // undefs. Even with the above element type twiddling, this should be OK, as
+ // the same type legalization should have applied to all the elements.
for (++i; i != e; ++i)
if (N->getOperand(i) != NotZero &&
N->getOperand(i).getOpcode() != ISD::UNDEF)
@@ -384,7 +393,9 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
case ISD::Register:
ID.AddInteger(cast<RegisterSDNode>(N)->getReg());
break;
-
+ case ISD::RegisterMask:
+ ID.AddPointer(cast<RegisterMaskSDNode>(N)->getRegMask());
+ break;
case ISD::SRCVALUE:
ID.AddPointer(cast<SrcValueSDNode>(N)->getValue());
break;
@@ -475,7 +486,7 @@ static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) {
///
static inline unsigned
encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile,
- bool isNonTemporal) {
+ bool isNonTemporal, bool isInvariant) {
assert((ConvType & 3) == ConvType &&
"ConvType may not require more than 2 bits!");
assert((AM & 7) == AM &&
@@ -483,7 +494,8 @@ encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile,
return ConvType |
(AM << 2) |
(isVolatile << 5) |
- (isNonTemporal << 6);
+ (isNonTemporal << 6) |
+ (isInvariant << 7);
}
//===----------------------------------------------------------------------===//
@@ -564,6 +576,12 @@ void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes,
void SelectionDAG::RemoveDeadNode(SDNode *N, DAGUpdateListener *UpdateListener){
SmallVector<SDNode*, 16> DeadNodes(1, N);
+
+ // Create a dummy node that adds a reference to the root node, preventing
+ // it from being deleted. (This matters if the root is an operand of the
+ // dead node.)
+ HandleSDNode Dummy(getRoot());
+
RemoveDeadNodes(DeadNodes, UpdateListener);
}
@@ -834,9 +852,9 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
}
// EntryNode could meaningfully have debug info if we can find it...
-SelectionDAG::SelectionDAG(const TargetMachine &tm)
+SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
: TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()),
- EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)),
+ OptLevel(OL), EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)),
Root(getEntryNode()), Ordering(0) {
AllNodes.push_back(&EntryNode);
Ordering = new SDNodeOrdering();
@@ -1025,16 +1043,14 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) {
return getConstantFP(APFloat((float)Val), VT, isTarget);
else if (EltVT==MVT::f64)
return getConstantFP(APFloat(Val), VT, isTarget);
- else if (EltVT==MVT::f80 || EltVT==MVT::f128) {
+ else if (EltVT==MVT::f80 || EltVT==MVT::f128 || EltVT==MVT::f16) {
bool ignored;
APFloat apf = APFloat(Val);
apf.convert(*EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,
&ignored);
return getConstantFP(apf, VT, isTarget);
- } else {
- assert(0 && "Unsupported type in getConstantFP");
- return SDValue();
- }
+ } else
+ llvm_unreachable("Unsupported type in getConstantFP");
}
SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL,
@@ -1369,6 +1385,20 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
return SDValue(N, 0);
}
+SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), 0, 0);
+ ID.AddPointer(RegMask);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) RegisterMaskSDNode(RegMask);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) {
FoldingSetNodeID ID;
SDValue Ops[] = { Root };
@@ -1598,7 +1628,7 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
unsigned Depth) const {
APInt KnownZero, KnownOne;
- ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth);
+ ComputeMaskedBits(Op, KnownZero, KnownOne, Depth);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
return (KnownZero & Mask) == Mask;
}
@@ -1607,15 +1637,12 @@ bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
/// known to be either zero or one and return them in the KnownZero/KnownOne
/// bitsets. This code only analyzes bits in Mask, in order to short-circuit
/// processing.
-void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
- APInt &KnownZero, APInt &KnownOne,
- unsigned Depth) const {
- unsigned BitWidth = Mask.getBitWidth();
- assert(BitWidth == Op.getValueType().getScalarType().getSizeInBits() &&
- "Mask size mismatches value type size!");
+void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
+ APInt &KnownOne, unsigned Depth) const {
+ unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything.
- if (Depth == 6 || Mask == 0)
+ if (Depth == 6)
return; // Limit search depth.
APInt KnownZero2, KnownOne2;
@@ -1623,14 +1650,13 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
switch (Op.getOpcode()) {
case ISD::Constant:
// We know all of the bits for a constant!
- KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & Mask;
- KnownZero = ~KnownOne & Mask;
+ KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue();
+ KnownZero = ~KnownOne;
return;
case ISD::AND:
// If either the LHS or the RHS are Zero, the result is zero.
- ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
- ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownZero,
- KnownZero2, KnownOne2, Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
@@ -1640,9 +1666,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
KnownZero |= KnownZero2;
return;
case ISD::OR:
- ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
- ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownOne,
- KnownZero2, KnownOne2, Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
@@ -1652,8 +1677,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
KnownOne |= KnownOne2;
return;
case ISD::XOR: {
- ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
- ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
@@ -1665,9 +1690,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
return;
}
case ISD::MUL: {
- APInt Mask2 = APInt::getAllOnesValue(BitWidth);
- ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero, KnownOne, Depth+1);
- ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
@@ -1686,33 +1710,29 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
LeadZ = std::min(LeadZ, BitWidth);
KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
APInt::getHighBitsSet(BitWidth, LeadZ);
- KnownZero &= Mask;
return;
}
case ISD::UDIV: {
// For the purposes of computing leading zeros we can conservatively
// treat a udiv as a logical right shift by the power of 2 known to
// be less than the denominator.
- APInt AllOnes = APInt::getAllOnesValue(BitWidth);
- ComputeMaskedBits(Op.getOperand(0),
- AllOnes, KnownZero2, KnownOne2, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
unsigned LeadZ = KnownZero2.countLeadingOnes();
KnownOne2.clearAllBits();
KnownZero2.clearAllBits();
- ComputeMaskedBits(Op.getOperand(1),
- AllOnes, KnownZero2, KnownOne2, Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
if (RHSUnknownLeadingOnes != BitWidth)
LeadZ = std::min(BitWidth,
LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
- KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask;
+ KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ);
return;
}
case ISD::SELECT:
- ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1);
- ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1);
+ ComputeMaskedBits(Op.getOperand(2), KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
@@ -1721,8 +1741,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
KnownZero &= KnownZero2;
return;
case ISD::SELECT_CC:
- ComputeMaskedBits(Op.getOperand(3), Mask, KnownZero, KnownOne, Depth+1);
- ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero2, KnownOne2, Depth+1);
+ ComputeMaskedBits(Op.getOperand(3), KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(2), KnownZero2, KnownOne2, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
@@ -1754,8 +1774,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
if (ShAmt >= BitWidth)
return;
- ComputeMaskedBits(Op.getOperand(0), Mask.lshr(ShAmt),
- KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
KnownZero <<= ShAmt;
KnownOne <<= ShAmt;
@@ -1772,13 +1791,12 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
if (ShAmt >= BitWidth)
return;
- ComputeMaskedBits(Op.getOperand(0), (Mask << ShAmt),
- KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
KnownZero = KnownZero.lshr(ShAmt);
KnownOne = KnownOne.lshr(ShAmt);
- APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask;
+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
KnownZero |= HighBits; // High bits known zero.
}
return;
@@ -1790,15 +1808,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
if (ShAmt >= BitWidth)
return;
- APInt InDemandedMask = (Mask << ShAmt);
// If any of the demanded bits are produced by the sign extension, we also
// demand the input sign bit.
- APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask;
- if (HighBits.getBoolValue())
- InDemandedMask |= APInt::getSignBit(BitWidth);
+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
- ComputeMaskedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne,
- Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
KnownZero = KnownZero.lshr(ShAmt);
KnownOne = KnownOne.lshr(ShAmt);
@@ -1820,10 +1834,10 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
// Sign extension. Compute the demanded bits in the result that are not
// present in the input.
- APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits) & Mask;
+ APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits);
APInt InSignBit = APInt::getSignBit(EBits);
- APInt InputDemandedBits = Mask & APInt::getLowBitsSet(BitWidth, EBits);
+ APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, EBits);
// If the sign extended bits are demanded, we know that the sign
// bit is demanded.
@@ -1831,8 +1845,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
if (NewBits.getBoolValue())
InputDemandedBits |= InSignBit;
- ComputeMaskedBits(Op.getOperand(0), InputDemandedBits,
- KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ KnownOne &= InputDemandedBits;
+ KnownZero &= InputDemandedBits;
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
// If the sign bit of the input is known set or clear, then we know the
@@ -1850,7 +1865,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
return;
}
case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTPOP: {
unsigned LowBits = Log2_32(BitWidth)+1;
KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
@@ -1858,22 +1875,23 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
return;
}
case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
if (ISD::isZEXTLoad(Op.getNode())) {
- LoadSDNode *LD = cast<LoadSDNode>(Op);
EVT VT = LD->getMemoryVT();
unsigned MemBits = VT.getScalarType().getSizeInBits();
- KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits) & Mask;
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
+ } else if (const MDNode *Ranges = LD->getRanges()) {
+ computeMaskedBitsLoad(*Ranges, KnownZero);
}
return;
}
case ISD::ZERO_EXTEND: {
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarType().getSizeInBits();
- APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
- APInt InMask = Mask.trunc(InBits);
+ APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits);
KnownZero = KnownZero.trunc(InBits);
KnownOne = KnownOne.trunc(InBits);
- ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
KnownZero = KnownZero.zext(BitWidth);
KnownOne = KnownOne.zext(BitWidth);
KnownZero |= NewBits;
@@ -1883,17 +1901,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarType().getSizeInBits();
APInt InSignBit = APInt::getSignBit(InBits);
- APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
- APInt InMask = Mask.trunc(InBits);
-
- // If any of the sign extended bits are demanded, we know that the sign
- // bit is demanded. Temporarily set this bit in the mask for our callee.
- if (NewBits.getBoolValue())
- InMask |= InSignBit;
+ APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits);
KnownZero = KnownZero.trunc(InBits);
KnownOne = KnownOne.trunc(InBits);
- ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
// Note if the sign bit is known to be zero or one.
bool SignBitKnownZero = KnownZero.isNegative();
@@ -1901,13 +1913,6 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
assert(!(SignBitKnownZero && SignBitKnownOne) &&
"Sign bit can't be known to be both zero and one!");
- // If the sign bit wasn't actually demanded by our caller, we don't
- // want it set in the KnownZero and KnownOne result values. Reset the
- // mask and reapply it to the result values.
- InMask = Mask.trunc(InBits);
- KnownZero &= InMask;
- KnownOne &= InMask;
-
KnownZero = KnownZero.zext(BitWidth);
KnownOne = KnownOne.zext(BitWidth);
@@ -1921,10 +1926,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
case ISD::ANY_EXTEND: {
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarType().getSizeInBits();
- APInt InMask = Mask.trunc(InBits);
KnownZero = KnownZero.trunc(InBits);
KnownOne = KnownOne.trunc(InBits);
- ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
KnownZero = KnownZero.zext(BitWidth);
KnownOne = KnownOne.zext(BitWidth);
return;
@@ -1932,10 +1936,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
case ISD::TRUNCATE: {
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarType().getSizeInBits();
- APInt InMask = Mask.zext(InBits);
KnownZero = KnownZero.zext(InBits);
KnownOne = KnownOne.zext(InBits);
- ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
KnownZero = KnownZero.trunc(BitWidth);
KnownOne = KnownOne.trunc(BitWidth);
@@ -1944,9 +1947,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
case ISD::AssertZext: {
EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits());
- ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero,
- KnownOne, Depth+1);
- KnownZero |= (~InMask) & Mask;
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ KnownZero |= (~InMask);
return;
}
case ISD::FGETSIGN:
@@ -1963,8 +1965,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros();
// NLZ can't be BitWidth with no sign bit
APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
- ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero2, KnownOne2,
- Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
// If all of the MaskV bits are known to be zero, then we know the
// output top bits are zero, because we now know that the output is
@@ -1972,7 +1973,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
if ((KnownZero2 & MaskV) == MaskV) {
unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros();
// Top bits known zero.
- KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask;
+ KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2);
}
}
}
@@ -1983,13 +1984,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
// Output known-0 bits are known if clear or set in both the low clear bits
// common to both LHS & RHS. For example, 8+(X<<3) is known to have the
// low 3 bits clear.
- APInt Mask2 = APInt::getLowBitsSet(BitWidth,
- BitWidth - Mask.countLeadingZeros());
- ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
unsigned KnownZeroOut = KnownZero2.countTrailingOnes();
- ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero2, KnownOne2, Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
KnownZeroOut = std::min(KnownZeroOut,
KnownZero2.countTrailingOnes());
@@ -2013,7 +2012,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
if (RA.isPowerOf2()) {
APInt LowBits = RA - 1;
APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
- ComputeMaskedBits(Op.getOperand(0), Mask2,KnownZero2,KnownOne2,Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero2,KnownOne2,Depth+1);
// The low bits of the first operand are unchanged by the srem.
KnownZero = KnownZero2 & LowBits;
@@ -2028,10 +2027,6 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
// the upper bits are all one.
if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0))
KnownOne |= ~LowBits;
-
- KnownZero &= Mask;
- KnownOne &= Mask;
-
assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
}
}
@@ -2041,9 +2036,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
const APInt &RA = Rem->getAPIntValue();
if (RA.isPowerOf2()) {
APInt LowBits = (RA - 1);
- APInt Mask2 = LowBits & Mask;
- KnownZero |= ~LowBits & Mask;
- ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero, KnownOne,Depth+1);
+ KnownZero |= ~LowBits;
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne,Depth+1);
assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
break;
}
@@ -2051,16 +2045,13 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
// Since the result is less than or equal to either operand, any leading
// zero bits in either operand must also exist in the result.
- APInt AllOnes = APInt::getAllOnesValue(BitWidth);
- ComputeMaskedBits(Op.getOperand(0), AllOnes, KnownZero, KnownOne,
- Depth+1);
- ComputeMaskedBits(Op.getOperand(1), AllOnes, KnownZero2, KnownOne2,
- Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
uint32_t Leaders = std::max(KnownZero.countLeadingOnes(),
KnownZero2.countLeadingOnes());
KnownOne.clearAllBits();
- KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask;
+ KnownZero = APInt::getHighBitsSet(BitWidth, Leaders);
return;
}
case ISD::FrameIndex:
@@ -2080,8 +2071,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
case ISD::INTRINSIC_W_CHAIN:
case ISD::INTRINSIC_VOID:
// Allow the target to implement this method for its nodes.
- TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this,
- Depth);
+ TLI.computeMaskedBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth);
return;
}
}
@@ -2205,12 +2195,11 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
if (CRHS->isAllOnesValue()) {
APInt KnownZero, KnownOne;
- APInt Mask = APInt::getAllOnesValue(VTBits);
- ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
- if ((KnownZero | APInt(VTBits, 1)) == Mask)
+ if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue())
return VTBits;
// If we are subtracting one from a positive number, there is no carry
@@ -2221,8 +2210,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
if (Tmp2 == 1) return 1;
- return std::min(Tmp, Tmp2)-1;
- break;
+ return std::min(Tmp, Tmp2)-1;
case ISD::SUB:
Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
@@ -2232,11 +2220,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0)))
if (CLHS->isNullValue()) {
APInt KnownZero, KnownOne;
- APInt Mask = APInt::getAllOnesValue(VTBits);
- ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
- if ((KnownZero | APInt(VTBits, 1)) == Mask)
+ if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue())
return VTBits;
// If the input is known to be positive (the sign bit is known clear),
@@ -2251,8 +2238,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
// is, at worst, one more bit than the inputs.
Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
if (Tmp == 1) return 1; // Early out.
- return std::min(Tmp, Tmp2)-1;
- break;
+ return std::min(Tmp, Tmp2)-1;
case ISD::TRUNCATE:
// FIXME: it's tricky to do anything useful for this, but it is an important
// case for targets like X86.
@@ -2286,9 +2272,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
// Finally, if we can prove that the top bits of the result are 0's or 1's,
// use this information.
APInt KnownZero, KnownOne;
- APInt Mask = APInt::getAllOnesValue(VTBits);
- ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth);
+ ComputeMaskedBits(Op, KnownZero, KnownOne, Depth);
+ APInt Mask;
if (KnownZero.isNegative()) { // sign bit is 0
Mask = KnownZero;
} else if (KnownOne.isNegative()) { // sign bit is 1;
@@ -2328,7 +2314,7 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
// If we're told that NaNs won't happen, assume they won't.
- if (NoNaNsFPMath)
+ if (getTarget().Options.NoNaNsFPMath)
return true;
// If the value is a constant, we can obviously see if it is a NaN or not.
@@ -2423,8 +2409,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
case ISD::CTPOP:
return getConstant(Val.countPopulation(), VT);
case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
return getConstant(Val.countLeadingZeros(), VT);
case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
return getConstant(Val.countTrailingZeros(), VT);
}
}
@@ -2440,7 +2428,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
case ISD::FABS:
V.clearSign();
return getConstantFP(V, VT);
- case ISD::FP_ROUND:
case ISD::FP_EXTEND: {
bool ignored;
// This can return overflow, underflow, or inexact; we don't care.
@@ -2561,17 +2548,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
"Vector element count mismatch!");
if (OpOpcode == ISD::TRUNCATE)
return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
- else if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
- OpOpcode == ISD::ANY_EXTEND) {
+ if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
+ OpOpcode == ISD::ANY_EXTEND) {
// If the source is smaller than the dest, we still need an extend.
if (Operand.getNode()->getOperand(0).getValueType().getScalarType()
.bitsLT(VT.getScalarType()))
return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
- else if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT))
+ if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT))
return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
- else
- return Operand.getNode()->getOperand(0);
+ return Operand.getNode()->getOperand(0);
}
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
break;
case ISD::BITCAST:
// Basic sanity checking.
@@ -2601,7 +2589,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
break;
case ISD::FNEG:
// -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
- if (UnsafeFPMath && OpOpcode == ISD::FSUB)
+ if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB)
return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1),
Operand.getNode()->getOperand(0));
if (OpOpcode == ISD::FNEG) // --X -> X
@@ -2736,7 +2724,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
case ISD::FMUL:
case ISD::FDIV:
case ISD::FREM:
- if (UnsafeFPMath) {
+ if (getTarget().Options.UnsafeFPMath) {
if (Opcode == ISD::FADD) {
// 0+x --> x
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1))
@@ -3005,6 +2993,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
default: break;
}
}
+
+ if (Opcode == ISD::FP_ROUND) {
+ APFloat V = N1CFP->getValueAPF(); // make copy
+ bool ignored;
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void)V.convert(*EVTToAPFloatSemantics(VT),
+ APFloat::rmNearestTiesToEven, &ignored);
+ return getConstantFP(V, VT);
+ }
}
// Canonicalize an UNDEF to the RHS, even over a constant.
@@ -3059,7 +3057,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
case ISD::FMUL:
case ISD::FDIV:
case ISD::FREM:
- if (UnsafeFPMath)
+ if (getTarget().Options.UnsafeFPMath)
return N2;
break;
case ISD::MUL:
@@ -3133,16 +3131,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
case ISD::SELECT:
if (N1C) {
if (N1C->getZExtValue())
- return N2; // select true, X, Y -> X
- else
- return N3; // select false, X, Y -> Y
+ return N2; // select true, X, Y -> X
+ return N3; // select false, X, Y -> Y
}
if (N2 == N3) return N2; // select C, X, X -> X
break;
case ISD::VECTOR_SHUFFLE:
llvm_unreachable("should use getVectorShuffle constructor!");
- break;
case ISD::INSERT_SUBVECTOR: {
SDValue Index = N3;
if (VT.isSimple() && N1.getValueType().isSimple()
@@ -3275,8 +3271,7 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
/// used when a memcpy is turned into a memset when the source is a constant
/// string ptr.
static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
- const TargetLowering &TLI,
- std::string &Str, unsigned Offset) {
+ const TargetLowering &TLI, StringRef Str) {
// Handle vector with all elements zero.
if (Str.empty()) {
if (VT.isInteger())
@@ -3294,15 +3289,18 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
}
assert(!VT.isVector() && "Can't handle vector type here!");
- unsigned NumBits = VT.getSizeInBits();
- unsigned MSB = NumBits / 8;
+ unsigned NumVTBytes = VT.getSizeInBits() / 8;
+ unsigned NumBytes = std::min(NumVTBytes, unsigned(Str.size()));
+
uint64_t Val = 0;
- if (TLI.isLittleEndian())
- Offset = Offset + MSB - 1;
- for (unsigned i = 0; i != MSB; ++i) {
- Val = (Val << 8) | (unsigned char)Str[Offset];
- Offset += TLI.isLittleEndian() ? -1 : 1;
+ if (TLI.isLittleEndian()) {
+ for (unsigned i = 0; i != NumBytes; ++i)
+ Val |= (uint64_t)(unsigned char)Str[i] << i*8;
+ } else {
+ for (unsigned i = 0; i != NumBytes; ++i)
+ Val |= (uint64_t)(unsigned char)Str[i] << (NumVTBytes-i-1)*8;
}
+
return DAG.getConstant(Val, VT);
}
@@ -3317,7 +3315,7 @@ static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset,
/// isMemSrcFromString - Returns true if memcpy source is a string constant.
///
-static bool isMemSrcFromString(SDValue Src, std::string &Str) {
+static bool isMemSrcFromString(SDValue Src, StringRef &Str) {
unsigned SrcDelta = 0;
GlobalAddressSDNode *G = NULL;
if (Src.getOpcode() == ISD::GlobalAddress)
@@ -3331,11 +3329,7 @@ static bool isMemSrcFromString(SDValue Src, std::string &Str) {
if (!G)
return false;
- const GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal());
- if (GV && GetConstantStringInfo(GV, Str, SrcDelta, false))
- return true;
-
- return false;
+ return getConstantStringInfo(G->getGlobal(), Str, SrcDelta, false);
}
/// FindOptimalMemOpLowering - Determines the optimial series memory ops
@@ -3345,7 +3339,7 @@ static bool isMemSrcFromString(SDValue Src, std::string &Str) {
static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
unsigned Limit, uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
- bool NonScalarIntSafe,
+ bool IsZeroVal,
bool MemcpyStrSrc,
SelectionDAG &DAG,
const TargetLowering &TLI) {
@@ -3359,7 +3353,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
// 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
// not need to be loaded.
EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign,
- NonScalarIntSafe, MemcpyStrSrc,
+ IsZeroVal, MemcpyStrSrc,
DAG.getMachineFunction());
if (VT == MVT::Other) {
@@ -3438,7 +3432,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
unsigned SrcAlign = DAG.InferPtrAlignment(Src);
if (Align > SrcAlign)
SrcAlign = Align;
- std::string Str;
+ StringRef Str;
bool CopyFromStr = isMemSrcFromString(Src, Str);
bool isZeroStr = CopyFromStr && Str.empty();
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
@@ -3475,7 +3469,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
// We only handle zero vectors here.
// FIXME: Handle other cases where store of vector immediate is done in
// a single instruction.
- Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff);
+ Value = getMemsetStringVal(VT, dl, DAG, TLI, Str.substr(SrcOff));
Store = DAG.getStore(Chain, dl, Value,
getMemBasePlusOffset(Dst, DstOff, DAG),
DstPtrInfo.getWithOffset(DstOff), isVol,
@@ -3562,7 +3556,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
Value = DAG.getLoad(VT, dl, Chain,
getMemBasePlusOffset(Src, SrcOff, DAG),
SrcPtrInfo.getWithOffset(SrcOff), isVol,
- false, SrcAlign);
+ false, false, SrcAlign);
LoadValues.push_back(Value);
LoadChains.push_back(Value.getValue(1));
SrcOff += VTSize;
@@ -3606,11 +3600,11 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
- bool NonScalarIntSafe =
+ bool IsZeroVal =
isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
Size, (DstAlignCanChange ? 0 : Align), 0,
- NonScalarIntSafe, false, DAG, TLI))
+ IsZeroVal, false, DAG, TLI))
return SDValue();
if (DstAlignCanChange) {
@@ -3717,8 +3711,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
std::pair<SDValue,SDValue> CallResult =
TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
false, false, false, false, 0,
- TLI.getLibcallCallingConv(RTLIB::MEMCPY), false,
- /*isReturnValueUsed=*/false,
+ TLI.getLibcallCallingConv(RTLIB::MEMCPY),
+ /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/false,
getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY),
TLI.getPointerTy()),
Args, *this, dl);
@@ -3769,8 +3764,9 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
std::pair<SDValue,SDValue> CallResult =
TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
false, false, false, false, 0,
- TLI.getLibcallCallingConv(RTLIB::MEMMOVE), false,
- /*isReturnValueUsed=*/false,
+ TLI.getLibcallCallingConv(RTLIB::MEMMOVE),
+ /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/false,
getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE),
TLI.getPointerTy()),
Args, *this, dl);
@@ -3829,8 +3825,9 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
std::pair<SDValue,SDValue> CallResult =
TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
false, false, false, false, 0,
- TLI.getLibcallCallingConv(RTLIB::MEMSET), false,
- /*isReturnValueUsed=*/false,
+ TLI.getLibcallCallingConv(RTLIB::MEMSET),
+ /*isTailCall=*/false,
+ /*doesNotReturn*/false, /*isReturnValueUsed=*/false,
getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
TLI.getPointerTy()),
Args, *this, dl);
@@ -4138,8 +4135,9 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
EVT VT, DebugLoc dl, SDValue Chain,
SDValue Ptr, SDValue Offset,
MachinePointerInfo PtrInfo, EVT MemVT,
- bool isVolatile, bool isNonTemporal,
- unsigned Alignment, const MDNode *TBAAInfo) {
+ bool isVolatile, bool isNonTemporal, bool isInvariant,
+ unsigned Alignment, const MDNode *TBAAInfo,
+ const MDNode *Ranges) {
assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");
if (Alignment == 0) // Ensure that codegen never sees alignment 0
@@ -4150,6 +4148,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
Flags |= MachineMemOperand::MOVolatile;
if (isNonTemporal)
Flags |= MachineMemOperand::MONonTemporal;
+ if (isInvariant)
+ Flags |= MachineMemOperand::MOInvariant;
// If we don't have a PtrInfo, infer the trivial frame index case to simplify
// clients.
@@ -4159,7 +4159,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO =
MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment,
- TBAAInfo);
+ TBAAInfo, Ranges);
return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO);
}
@@ -4196,7 +4196,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3);
ID.AddInteger(MemVT.getRawBits());
ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(),
- MMO->isNonTemporal()));
+ MMO->isNonTemporal(),
+ MMO->isInvariant()));
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<LoadSDNode>(E)->refineAlignment(MMO);
@@ -4213,10 +4214,13 @@ SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl,
SDValue Chain, SDValue Ptr,
MachinePointerInfo PtrInfo,
bool isVolatile, bool isNonTemporal,
- unsigned Alignment, const MDNode *TBAAInfo) {
+ bool isInvariant, unsigned Alignment,
+ const MDNode *TBAAInfo,
+ const MDNode *Ranges) {
SDValue Undef = getUNDEF(Ptr.getValueType());
return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
- PtrInfo, VT, isVolatile, isNonTemporal, Alignment, TBAAInfo);
+ PtrInfo, VT, isVolatile, isNonTemporal, isInvariant, Alignment,
+ TBAAInfo, Ranges);
}
SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT,
@@ -4226,7 +4230,7 @@ SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT,
unsigned Alignment, const MDNode *TBAAInfo) {
SDValue Undef = getUNDEF(Ptr.getValueType());
return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef,
- PtrInfo, MemVT, isVolatile, isNonTemporal, Alignment,
+ PtrInfo, MemVT, isVolatile, isNonTemporal, false, Alignment,
TBAAInfo);
}
@@ -4239,8 +4243,8 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
"Load is already a indexed load!");
return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,
LD->getChain(), Base, Offset, LD->getPointerInfo(),
- LD->getMemoryVT(),
- LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment());
+ LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(),
+ false, LD->getAlignment());
}
SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
@@ -4282,7 +4286,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
ID.AddInteger(VT.getRawBits());
ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
- MMO->isNonTemporal()));
+ MMO->isNonTemporal(), MMO->isInvariant()));
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<StoreSDNode>(E)->refineAlignment(MMO);
@@ -4349,7 +4353,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
ID.AddInteger(SVT.getRawBits());
ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(),
- MMO->isNonTemporal()));
+ MMO->isNonTemporal(), MMO->isInvariant()));
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<StoreSDNode>(E)->refineAlignment(MMO);
@@ -4903,6 +4907,20 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
return N;
}
+/// UpdadeDebugLocOnMergedSDNode - If the opt level is -O0 then it throws away
+/// the line number information on the merged node since it is not possible to
+/// preserve the information that operation is associated with multiple lines.
+/// This will make the debugger working better at -O0, were there is a higher
+/// probability having other instructions associated with that line.
+///
+SDNode *SelectionDAG::UpdadeDebugLocOnMergedSDNode(SDNode *N, DebugLoc OLoc) {
+ DebugLoc NLoc = N->getDebugLoc();
+ if (!(NLoc.isUnknown()) && (OptLevel == CodeGenOpt::None) && (OLoc != NLoc)) {
+ N->setDebugLoc(DebugLoc());
+ }
+ return N;
+}
+
/// MorphNodeTo - This *mutates* the specified node to have the specified
/// return type, opcode, and operands.
///
@@ -4924,7 +4942,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, VTs, Ops, NumOps);
if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
- return ON;
+ return UpdadeDebugLocOnMergedSDNode(ON, N->getDebugLoc());
}
if (!RemoveNodeFromCSEMaps(N))
@@ -5128,8 +5146,9 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
FoldingSetNodeID ID;
AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps);
IP = 0;
- if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
- return cast<MachineSDNode>(E);
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ return cast<MachineSDNode>(UpdadeDebugLocOnMergedSDNode(E, DL));
+ }
}
// Allocate a new MachineSDNode.
@@ -5290,6 +5309,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To,
// already exists there, recursively merge the results together.
AddModifiedNodeToCSEMaps(User, &Listener);
}
+
+ // If we just RAUW'd the root, take note.
+ if (FromN == getRoot())
+ setRoot(To);
}
/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
@@ -5335,6 +5358,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To,
// already exists there, recursively merge the results together.
AddModifiedNodeToCSEMaps(User, &Listener);
}
+
+ // If we just RAUW'd the root, take note.
+ if (From == getRoot().getNode())
+ setRoot(SDValue(To, getRoot().getResNo()));
}
/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
@@ -5373,6 +5400,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From,
// already exists there, recursively merge the results together.
AddModifiedNodeToCSEMaps(User, &Listener);
}
+
+ // If we just RAUW'd the root, take note.
+ if (From == getRoot().getNode())
+ setRoot(SDValue(To[getRoot().getResNo()]));
}
/// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving
@@ -5431,6 +5462,10 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To,
// already exists there, recursively merge the results together.
AddModifiedNodeToCSEMaps(User, &Listener);
}
+
+ // If we just RAUW'd the root, take note.
+ if (From == getRoot())
+ setRoot(To);
}
namespace {
@@ -5657,7 +5692,7 @@ MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt,
MachineMemOperand *mmo)
: SDNode(Opc, dl, VTs), MemoryVT(memvt), MMO(mmo) {
SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
- MMO->isNonTemporal());
+ MMO->isNonTemporal(), MMO->isInvariant());
assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
assert(isNonTemporal() == MMO->isNonTemporal() &&
"Non-temporal encoding error!");
@@ -5670,7 +5705,7 @@ MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs,
: SDNode(Opc, dl, VTs, Ops, NumOps),
MemoryVT(memvt), MMO(mmo) {
SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
- MMO->isNonTemporal());
+ MMO->isNonTemporal(), MMO->isInvariant());
assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!");
}
@@ -5846,565 +5881,6 @@ uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
return cast<ConstantSDNode>(OperandList[Num])->getZExtValue();
}
-std::string SDNode::getOperationName(const SelectionDAG *G) const {
- switch (getOpcode()) {
- default:
- if (getOpcode() < ISD::BUILTIN_OP_END)
- return "<<Unknown DAG Node>>";
- if (isMachineOpcode()) {
- if (G)
- if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo())
- if (getMachineOpcode() < TII->getNumOpcodes())
- return TII->get(getMachineOpcode()).getName();
- return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>";
- }
- if (G) {
- const TargetLowering &TLI = G->getTargetLoweringInfo();
- const char *Name = TLI.getTargetNodeName(getOpcode());
- if (Name) return Name;
- return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>";
- }
- return "<<Unknown Node #" + utostr(getOpcode()) + ">>";
-
-#ifndef NDEBUG
- case ISD::DELETED_NODE:
- return "<<Deleted Node!>>";
-#endif
- case ISD::PREFETCH: return "Prefetch";
- case ISD::MEMBARRIER: return "MemBarrier";
- case ISD::ATOMIC_FENCE: return "AtomicFence";
- case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap";
- case ISD::ATOMIC_SWAP: return "AtomicSwap";
- case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd";
- case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub";
- case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd";
- case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr";
- case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor";
- case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand";
- case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin";
- case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax";
- case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin";
- case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax";
- case ISD::ATOMIC_LOAD: return "AtomicLoad";
- case ISD::ATOMIC_STORE: return "AtomicStore";
- case ISD::PCMARKER: return "PCMarker";
- case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
- case ISD::SRCVALUE: return "SrcValue";
- case ISD::MDNODE_SDNODE: return "MDNode";
- case ISD::EntryToken: return "EntryToken";
- case ISD::TokenFactor: return "TokenFactor";
- case ISD::AssertSext: return "AssertSext";
- case ISD::AssertZext: return "AssertZext";
-
- case ISD::BasicBlock: return "BasicBlock";
- case ISD::VALUETYPE: return "ValueType";
- case ISD::Register: return "Register";
-
- case ISD::Constant: return "Constant";
- case ISD::ConstantFP: return "ConstantFP";
- case ISD::GlobalAddress: return "GlobalAddress";
- case ISD::GlobalTLSAddress: return "GlobalTLSAddress";
- case ISD::FrameIndex: return "FrameIndex";
- case ISD::JumpTable: return "JumpTable";
- case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE";
- case ISD::RETURNADDR: return "RETURNADDR";
- case ISD::FRAMEADDR: return "FRAMEADDR";
- case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET";
- case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR";
- case ISD::LSDAADDR: return "LSDAADDR";
- case ISD::EHSELECTION: return "EHSELECTION";
- case ISD::EH_RETURN: return "EH_RETURN";
- case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
- case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
- case ISD::EH_SJLJ_DISPATCHSETUP: return "EH_SJLJ_DISPATCHSETUP";
- case ISD::ConstantPool: return "ConstantPool";
- case ISD::ExternalSymbol: return "ExternalSymbol";
- case ISD::BlockAddress: return "BlockAddress";
- case ISD::INTRINSIC_WO_CHAIN:
- case ISD::INTRINSIC_VOID:
- case ISD::INTRINSIC_W_CHAIN: {
- unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
- unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();
- if (IID < Intrinsic::num_intrinsics)
- return Intrinsic::getName((Intrinsic::ID)IID);
- else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
- return TII->getName(IID);
- llvm_unreachable("Invalid intrinsic ID");
- }
-
- case ISD::BUILD_VECTOR: return "BUILD_VECTOR";
- case ISD::TargetConstant: return "TargetConstant";
- case ISD::TargetConstantFP:return "TargetConstantFP";
- case ISD::TargetGlobalAddress: return "TargetGlobalAddress";
- case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress";
- case ISD::TargetFrameIndex: return "TargetFrameIndex";
- case ISD::TargetJumpTable: return "TargetJumpTable";
- case ISD::TargetConstantPool: return "TargetConstantPool";
- case ISD::TargetExternalSymbol: return "TargetExternalSymbol";
- case ISD::TargetBlockAddress: return "TargetBlockAddress";
-
- case ISD::CopyToReg: return "CopyToReg";
- case ISD::CopyFromReg: return "CopyFromReg";
- case ISD::UNDEF: return "undef";
- case ISD::MERGE_VALUES: return "merge_values";
- case ISD::INLINEASM: return "inlineasm";
- case ISD::EH_LABEL: return "eh_label";
- case ISD::HANDLENODE: return "handlenode";
-
- // Unary operators
- case ISD::FABS: return "fabs";
- case ISD::FNEG: return "fneg";
- case ISD::FSQRT: return "fsqrt";
- case ISD::FSIN: return "fsin";
- case ISD::FCOS: return "fcos";
- case ISD::FTRUNC: return "ftrunc";
- case ISD::FFLOOR: return "ffloor";
- case ISD::FCEIL: return "fceil";
- case ISD::FRINT: return "frint";
- case ISD::FNEARBYINT: return "fnearbyint";
- case ISD::FEXP: return "fexp";
- case ISD::FEXP2: return "fexp2";
- case ISD::FLOG: return "flog";
- case ISD::FLOG2: return "flog2";
- case ISD::FLOG10: return "flog10";
-
- // Binary operators
- case ISD::ADD: return "add";
- case ISD::SUB: return "sub";
- case ISD::MUL: return "mul";
- case ISD::MULHU: return "mulhu";
- case ISD::MULHS: return "mulhs";
- case ISD::SDIV: return "sdiv";
- case ISD::UDIV: return "udiv";
- case ISD::SREM: return "srem";
- case ISD::UREM: return "urem";
- case ISD::SMUL_LOHI: return "smul_lohi";
- case ISD::UMUL_LOHI: return "umul_lohi";
- case ISD::SDIVREM: return "sdivrem";
- case ISD::UDIVREM: return "udivrem";
- case ISD::AND: return "and";
- case ISD::OR: return "or";
- case ISD::XOR: return "xor";
- case ISD::SHL: return "shl";
- case ISD::SRA: return "sra";
- case ISD::SRL: return "srl";
- case ISD::ROTL: return "rotl";
- case ISD::ROTR: return "rotr";
- case ISD::FADD: return "fadd";
- case ISD::FSUB: return "fsub";
- case ISD::FMUL: return "fmul";
- case ISD::FDIV: return "fdiv";
- case ISD::FMA: return "fma";
- case ISD::FREM: return "frem";
- case ISD::FCOPYSIGN: return "fcopysign";
- case ISD::FGETSIGN: return "fgetsign";
- case ISD::FPOW: return "fpow";
-
- case ISD::FPOWI: return "fpowi";
- case ISD::SETCC: return "setcc";
- case ISD::SELECT: return "select";
- case ISD::VSELECT: return "vselect";
- case ISD::SELECT_CC: return "select_cc";
- case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt";
- case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt";
- case ISD::CONCAT_VECTORS: return "concat_vectors";
- case ISD::INSERT_SUBVECTOR: return "insert_subvector";
- case ISD::EXTRACT_SUBVECTOR: return "extract_subvector";
- case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector";
- case ISD::VECTOR_SHUFFLE: return "vector_shuffle";
- case ISD::CARRY_FALSE: return "carry_false";
- case ISD::ADDC: return "addc";
- case ISD::ADDE: return "adde";
- case ISD::SADDO: return "saddo";
- case ISD::UADDO: return "uaddo";
- case ISD::SSUBO: return "ssubo";
- case ISD::USUBO: return "usubo";
- case ISD::SMULO: return "smulo";
- case ISD::UMULO: return "umulo";
- case ISD::SUBC: return "subc";
- case ISD::SUBE: return "sube";
- case ISD::SHL_PARTS: return "shl_parts";
- case ISD::SRA_PARTS: return "sra_parts";
- case ISD::SRL_PARTS: return "srl_parts";
-
- // Conversion operators.
- case ISD::SIGN_EXTEND: return "sign_extend";
- case ISD::ZERO_EXTEND: return "zero_extend";
- case ISD::ANY_EXTEND: return "any_extend";
- case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg";
- case ISD::TRUNCATE: return "truncate";
- case ISD::FP_ROUND: return "fp_round";
- case ISD::FLT_ROUNDS_: return "flt_rounds";
- case ISD::FP_ROUND_INREG: return "fp_round_inreg";
- case ISD::FP_EXTEND: return "fp_extend";
-
- case ISD::SINT_TO_FP: return "sint_to_fp";
- case ISD::UINT_TO_FP: return "uint_to_fp";
- case ISD::FP_TO_SINT: return "fp_to_sint";
- case ISD::FP_TO_UINT: return "fp_to_uint";
- case ISD::BITCAST: return "bitcast";
- case ISD::FP16_TO_FP32: return "fp16_to_fp32";
- case ISD::FP32_TO_FP16: return "fp32_to_fp16";
-
- case ISD::CONVERT_RNDSAT: {
- switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) {
- default: llvm_unreachable("Unknown cvt code!");
- case ISD::CVT_FF: return "cvt_ff";
- case ISD::CVT_FS: return "cvt_fs";
- case ISD::CVT_FU: return "cvt_fu";
- case ISD::CVT_SF: return "cvt_sf";
- case ISD::CVT_UF: return "cvt_uf";
- case ISD::CVT_SS: return "cvt_ss";
- case ISD::CVT_SU: return "cvt_su";
- case ISD::CVT_US: return "cvt_us";
- case ISD::CVT_UU: return "cvt_uu";
- }
- }
-
- // Control flow instructions
- case ISD::BR: return "br";
- case ISD::BRIND: return "brind";
- case ISD::BR_JT: return "br_jt";
- case ISD::BRCOND: return "brcond";
- case ISD::BR_CC: return "br_cc";
- case ISD::CALLSEQ_START: return "callseq_start";
- case ISD::CALLSEQ_END: return "callseq_end";
-
- // Other operators
- case ISD::LOAD: return "load";
- case ISD::STORE: return "store";
- case ISD::VAARG: return "vaarg";
- case ISD::VACOPY: return "vacopy";
- case ISD::VAEND: return "vaend";
- case ISD::VASTART: return "vastart";
- case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc";
- case ISD::EXTRACT_ELEMENT: return "extract_element";
- case ISD::BUILD_PAIR: return "build_pair";
- case ISD::STACKSAVE: return "stacksave";
- case ISD::STACKRESTORE: return "stackrestore";
- case ISD::TRAP: return "trap";
-
- // Bit manipulation
- case ISD::BSWAP: return "bswap";
- case ISD::CTPOP: return "ctpop";
- case ISD::CTTZ: return "cttz";
- case ISD::CTLZ: return "ctlz";
-
- // Trampolines
- case ISD::INIT_TRAMPOLINE: return "init_trampoline";
- case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline";
-
- case ISD::CONDCODE:
- switch (cast<CondCodeSDNode>(this)->get()) {
- default: llvm_unreachable("Unknown setcc condition!");
- case ISD::SETOEQ: return "setoeq";
- case ISD::SETOGT: return "setogt";
- case ISD::SETOGE: return "setoge";
- case ISD::SETOLT: return "setolt";
- case ISD::SETOLE: return "setole";
- case ISD::SETONE: return "setone";
-
- case ISD::SETO: return "seto";
- case ISD::SETUO: return "setuo";
- case ISD::SETUEQ: return "setue";
- case ISD::SETUGT: return "setugt";
- case ISD::SETUGE: return "setuge";
- case ISD::SETULT: return "setult";
- case ISD::SETULE: return "setule";
- case ISD::SETUNE: return "setune";
-
- case ISD::SETEQ: return "seteq";
- case ISD::SETGT: return "setgt";
- case ISD::SETGE: return "setge";
- case ISD::SETLT: return "setlt";
- case ISD::SETLE: return "setle";
- case ISD::SETNE: return "setne";
- }
- }
-}
-
-const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) {
- switch (AM) {
- default:
- return "";
- case ISD::PRE_INC:
- return "<pre-inc>";
- case ISD::PRE_DEC:
- return "<pre-dec>";
- case ISD::POST_INC:
- return "<post-inc>";
- case ISD::POST_DEC:
- return "<post-dec>";
- }
-}
-
-std::string ISD::ArgFlagsTy::getArgFlagsString() {
- std::string S = "< ";
-
- if (isZExt())
- S += "zext ";
- if (isSExt())
- S += "sext ";
- if (isInReg())
- S += "inreg ";
- if (isSRet())
- S += "sret ";
- if (isByVal())
- S += "byval ";
- if (isNest())
- S += "nest ";
- if (getByValAlign())
- S += "byval-align:" + utostr(getByValAlign()) + " ";
- if (getOrigAlign())
- S += "orig-align:" + utostr(getOrigAlign()) + " ";
- if (getByValSize())
- S += "byval-size:" + utostr(getByValSize()) + " ";
- return S + ">";
-}
-
-void SDNode::dump() const { dump(0); }
-void SDNode::dump(const SelectionDAG *G) const {
- print(dbgs(), G);
- dbgs() << '\n';
-}
-
-void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
- OS << (void*)this << ": ";
-
- for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
- if (i) OS << ",";
- if (getValueType(i) == MVT::Other)
- OS << "ch";
- else
- OS << getValueType(i).getEVTString();
- }
- OS << " = " << getOperationName(G);
-}
-
-void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
- if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) {
- if (!MN->memoperands_empty()) {
- OS << "<";
- OS << "Mem:";
- for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(),
- e = MN->memoperands_end(); i != e; ++i) {
- OS << **i;
- if (llvm::next(i) != e)
- OS << " ";
- }
- OS << ">";
- }
- } else if (const ShuffleVectorSDNode *SVN =
- dyn_cast<ShuffleVectorSDNode>(this)) {
- OS << "<";
- for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) {
- int Idx = SVN->getMaskElt(i);
- if (i) OS << ",";
- if (Idx < 0)
- OS << "u";
- else
- OS << Idx;
- }
- OS << ">";
- } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
- OS << '<' << CSDN->getAPIntValue() << '>';
- } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
- if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle)
- OS << '<' << CSDN->getValueAPF().convertToFloat() << '>';
- else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble)
- OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';
- else {
- OS << "<APFloat(";
- CSDN->getValueAPF().bitcastToAPInt().dump();
- OS << ")>";
- }
- } else if (const GlobalAddressSDNode *GADN =
- dyn_cast<GlobalAddressSDNode>(this)) {
- int64_t offset = GADN->getOffset();
- OS << '<';
- WriteAsOperand(OS, GADN->getGlobal());
- OS << '>';
- if (offset > 0)
- OS << " + " << offset;
- else
- OS << " " << offset;
- if (unsigned int TF = GADN->getTargetFlags())
- OS << " [TF=" << TF << ']';
- } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) {
- OS << "<" << FIDN->getIndex() << ">";
- } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) {
- OS << "<" << JTDN->getIndex() << ">";
- if (unsigned int TF = JTDN->getTargetFlags())
- OS << " [TF=" << TF << ']';
- } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){
- int offset = CP->getOffset();
- if (CP->isMachineConstantPoolEntry())
- OS << "<" << *CP->getMachineCPVal() << ">";
- else
- OS << "<" << *CP->getConstVal() << ">";
- if (offset > 0)
- OS << " + " << offset;
- else
- OS << " " << offset;
- if (unsigned int TF = CP->getTargetFlags())
- OS << " [TF=" << TF << ']';
- } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
- OS << "<";
- const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
- if (LBB)
- OS << LBB->getName() << " ";
- OS << (const void*)BBDN->getBasicBlock() << ">";
- } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
- OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0);
- } else if (const ExternalSymbolSDNode *ES =
- dyn_cast<ExternalSymbolSDNode>(this)) {
- OS << "'" << ES->getSymbol() << "'";
- if (unsigned int TF = ES->getTargetFlags())
- OS << " [TF=" << TF << ']';
- } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) {
- if (M->getValue())
- OS << "<" << M->getValue() << ">";
- else
- OS << "<null>";
- } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) {
- if (MD->getMD())
- OS << "<" << MD->getMD() << ">";
- else
- OS << "<null>";
- } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
- OS << ":" << N->getVT().getEVTString();
- }
- else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
- OS << "<" << *LD->getMemOperand();
-
- bool doExt = true;
- switch (LD->getExtensionType()) {
- default: doExt = false; break;
- case ISD::EXTLOAD: OS << ", anyext"; break;
- case ISD::SEXTLOAD: OS << ", sext"; break;
- case ISD::ZEXTLOAD: OS << ", zext"; break;
- }
- if (doExt)
- OS << " from " << LD->getMemoryVT().getEVTString();
-
- const char *AM = getIndexedModeName(LD->getAddressingMode());
- if (*AM)
- OS << ", " << AM;
-
- OS << ">";
- } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) {
- OS << "<" << *ST->getMemOperand();
-
- if (ST->isTruncatingStore())
- OS << ", trunc to " << ST->getMemoryVT().getEVTString();
-
- const char *AM = getIndexedModeName(ST->getAddressingMode());
- if (*AM)
- OS << ", " << AM;
-
- OS << ">";
- } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
- OS << "<" << *M->getMemOperand() << ">";
- } else if (const BlockAddressSDNode *BA =
- dyn_cast<BlockAddressSDNode>(this)) {
- OS << "<";
- WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false);
- OS << ", ";
- WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false);
- OS << ">";
- if (unsigned int TF = BA->getTargetFlags())
- OS << " [TF=" << TF << ']';
- }
-
- if (G)
- if (unsigned Order = G->GetOrdering(this))
- OS << " [ORD=" << Order << ']';
-
- if (getNodeId() != -1)
- OS << " [ID=" << getNodeId() << ']';
-
- DebugLoc dl = getDebugLoc();
- if (G && !dl.isUnknown()) {
- DIScope
- Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext()));
- OS << " dbg:";
- // Omit the directory, since it's usually long and uninteresting.
- if (Scope.Verify())
- OS << Scope.getFilename();
- else
- OS << "<unknown>";
- OS << ':' << dl.getLine();
- if (dl.getCol() != 0)
- OS << ':' << dl.getCol();
- }
-}
-
-void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
- print_types(OS, G);
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
- if (i) OS << ", "; else OS << " ";
- OS << (void*)getOperand(i).getNode();
- if (unsigned RN = getOperand(i).getResNo())
- OS << ":" << RN;
- }
- print_details(OS, G);
-}
-
-static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
- const SelectionDAG *G, unsigned depth,
- unsigned indent) {
- if (depth == 0)
- return;
-
- OS.indent(indent);
-
- N->print(OS, G);
-
- if (depth < 1)
- return;
-
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
- // Don't follow chain operands.
- if (N->getOperand(i).getValueType() == MVT::Other)
- continue;
- OS << '\n';
- printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2);
- }
-}
-
-void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G,
- unsigned depth) const {
- printrWithDepthHelper(OS, this, G, depth, 0);
-}
-
-void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {
- // Don't print impossibly deep things.
- printrWithDepth(OS, G, 10);
-}
-
-void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
- printrWithDepth(dbgs(), G, depth);
-}
-
-void SDNode::dumprFull(const SelectionDAG *G) const {
- // Don't print impossibly deep things.
- dumprWithDepth(G, 10);
-}
-
-static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
- if (N->getOperand(i).getNode()->hasOneUse())
- DumpNodes(N->getOperand(i).getNode(), indent+2, G);
- else
- dbgs() << "\n" << std::string(indent+2, ' ')
- << (void*)N->getOperand(i).getNode() << ": <multiple use>";
-
-
- dbgs() << "\n";
- dbgs().indent(indent);
- N->dump(G);
-}
-
SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
assert(N->getNumValues() == 1 &&
"Can't unroll a vector with multiple results!");
@@ -6527,20 +6003,14 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
const GlobalValue *GV;
int64_t GVOffset = 0;
if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
- // If GV has specified alignment, then use it. Otherwise, use the preferred
- // alignment.
- unsigned Align = GV->getAlignment();
- if (!Align) {
- if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
- if (GVar->hasInitializer()) {
- const TargetData *TD = TLI.getTargetData();
- Align = TD->getPreferredAlignment(GVar);
- }
- }
- if (!Align)
- Align = TLI.getTargetData()->getABITypeAlignment(GV->getType());
- }
- return MinAlign(Align, GVOffset);
+ unsigned PtrWidth = TLI.getPointerTy().getSizeInBits();
+ APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0);
+ llvm::ComputeMaskedBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne,
+ TLI.getTargetData());
+ unsigned AlignBits = KnownZero.countTrailingOnes();
+ unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
+ if (Align)
+ return MinAlign(Align, GVOffset);
}
// If this is a direct reference to a stack slot, use information about the
@@ -6566,74 +6036,6 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
return 0;
}
-void SelectionDAG::dump() const {
- dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:";
-
- for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
- I != E; ++I) {
- const SDNode *N = I;
- if (!N->hasOneUse() && N != getRoot().getNode())
- DumpNodes(N, 2, this);
- }
-
- if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
-
- dbgs() << "\n\n";
-}
-
-void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
- print_types(OS, G);
- print_details(OS, G);
-}
-
-typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet;
-static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
- const SelectionDAG *G, VisitedSDNodeSet &once) {
- if (!once.insert(N)) // If we've been here before, return now.
- return;
-
- // Dump the current SDNode, but don't end the line yet.
- OS << std::string(indent, ' ');
- N->printr(OS, G);
-
- // Having printed this SDNode, walk the children:
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
- const SDNode *child = N->getOperand(i).getNode();
-
- if (i) OS << ",";
- OS << " ";
-
- if (child->getNumOperands() == 0) {
- // This child has no grandchildren; print it inline right here.
- child->printr(OS, G);
- once.insert(child);
- } else { // Just the address. FIXME: also print the child's opcode.
- OS << (void*)child;
- if (unsigned RN = N->getOperand(i).getResNo())
- OS << ":" << RN;
- }
- }
-
- OS << "\n";
-
- // Dump children that have grandchildren on their own line(s).
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
- const SDNode *child = N->getOperand(i).getNode();
- DumpNodesr(OS, child, indent+2, G, once);
- }
-}
-
-void SDNode::dumpr() const {
- VisitedSDNodeSet once;
- DumpNodesr(dbgs(), this, 0, 0, once);
-}
-
-void SDNode::dumpr(const SelectionDAG *G) const {
- VisitedSDNodeSet once;
- DumpNodesr(dbgs(), this, 0, G, once);
-}
-
-
// getAddressSpace - Return the address space this GlobalAddress belongs to.
unsigned GlobalAddressSDNode::getAddressSpace() const {
return getGlobal()->getType()->getAddressSpace();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 095b4001696f..94cb95804f69 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -41,13 +41,13 @@
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/CommandLine.h"
@@ -197,7 +197,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
// FP_ROUND's are always exact here.
if (ValueVT.bitsLT(Val.getValueType()))
return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val,
- DAG.getIntPtrConstant(1));
+ DAG.getTargetConstant(1, TLI.getPointerTy()));
return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
}
@@ -206,7 +206,6 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
llvm_unreachable("Unknown mismatch!");
- return SDValue();
}
/// getCopyFromParts - Create a value that contains the specified legal parts
@@ -353,10 +352,13 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
assert(NumParts == 1 && "Do not know what to promote to!");
Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
} else {
- assert(PartVT.isInteger() && ValueVT.isInteger() &&
+ assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
+ ValueVT.isInteger() &&
"Unknown mismatch!");
ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
+ if (PartVT == MVT::x86mmx)
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
}
} else if (PartBits == ValueVT.getSizeInBits()) {
// Different types of the same size.
@@ -364,10 +366,13 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
} else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
// If the parts cover less bits than value has, truncate the value.
- assert(PartVT.isInteger() && ValueVT.isInteger() &&
+ assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
+ ValueVT.isInteger() &&
"Unknown mismatch!");
ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+ if (PartVT == MVT::x86mmx)
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
}
// The value may have changed - recompute ValueVT.
@@ -813,9 +818,11 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
}
}
-void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) {
+void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa,
+ const TargetLibraryInfo *li) {
AA = &aa;
GFI = gfi;
+ LibInfo = li;
TD = DAG.getTarget().getTargetData();
LPadToCallSiteMap.clear();
}
@@ -964,7 +971,7 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
DAG.AddDbgValue(SDV, Val.getNode(), false);
}
} else
- DEBUG(dbgs() << "Dropping debug info for " << DI);
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
DanglingDebugInfoMap[V] = DanglingDebugInfo();
}
}
@@ -1054,6 +1061,23 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
return DAG.getMergeValues(&Constants[0], Constants.size(),
getCurDebugLoc());
}
+
+ if (const ConstantDataSequential *CDS =
+ dyn_cast<ConstantDataSequential>(C)) {
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+ SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode();
+ // Add each leaf value from the operand to the Constants list
+ // to form a flattened list of all the values.
+ for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
+ Ops.push_back(SDValue(Val, i));
+ }
+
+ if (isa<ArrayType>(CDS->getType()))
+ return DAG.getMergeValues(&Ops[0], Ops.size(), getCurDebugLoc());
+ return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
+ VT, &Ops[0], Ops.size());
+ }
if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
@@ -1088,9 +1112,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
// Now that we know the number and type of the elements, get that number of
// elements into the Ops array based on what kind of constant it is.
SmallVector<SDValue, 16> Ops;
- if (const ConstantVector *CP = dyn_cast<ConstantVector>(C)) {
+ if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
for (unsigned i = 0; i != NumElements; ++i)
- Ops.push_back(getValue(CP->getOperand(i)));
+ Ops.push_back(getValue(CV->getOperand(i)));
} else {
assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
EVT EltVT = TLI.getValueType(VecTy->getElementType());
@@ -1126,7 +1150,6 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
}
llvm_unreachable("Can't get register for value!");
- return SDValue();
}
void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
@@ -1285,8 +1308,8 @@ bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
}
/// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
-uint32_t SelectionDAGBuilder::getEdgeWeight(MachineBasicBlock *Src,
- MachineBasicBlock *Dst) {
+uint32_t SelectionDAGBuilder::getEdgeWeight(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const {
BranchProbabilityInfo *BPI = FuncInfo.BPI;
if (!BPI)
return 0;
@@ -1336,6 +1359,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
Condition = getICmpCondCode(IC->getPredicate());
} else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
Condition = getFCmpCondCode(FC->getPredicate());
+ if (TM.Options.NoNaNsFPMath)
+ Condition = getFCmpCodeWithoutNaN(Condition);
} else {
Condition = ISD::SETEQ; // silence warning.
llvm_unreachable("Unknown compare instruction");
@@ -1811,8 +1836,8 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
CopyToExportRegsIfNeeded(&I);
// Update successor info
- InvokeMBB->addSuccessor(Return);
- InvokeMBB->addSuccessor(LandingPad);
+ addSuccessorWithWeight(InvokeMBB, Return);
+ addSuccessorWithWeight(InvokeMBB, LandingPad);
// Drop into normal successor.
DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
@@ -1820,9 +1845,6 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
DAG.getBasicBlock(Return)));
}
-void SelectionDAGBuilder::visitUnwind(const UnwindInst &I) {
-}
-
void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
}
@@ -1835,6 +1857,12 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
AddLandingPadInfo(LP, MMI, MBB);
+ // If there aren't registers to copy the values into (e.g., during SjLj
+ // exceptions), then don't bother to create these DAG nodes.
+ if (TLI.getExceptionPointerRegister() == 0 &&
+ TLI.getExceptionSelectorRegister() == 0)
+ return;
+
SmallVector<EVT, 2> ValueVTs;
ComputeValueVTs(TLI, LP.getType(), ValueVTs);
@@ -2003,7 +2031,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
}
static inline bool areJTsAllowed(const TargetLowering &TLI) {
- return !DisableJumpTables &&
+ return !TLI.getTargetMachine().Options.DisableJumpTables &&
(TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
}
@@ -2190,7 +2218,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
CaseRange LHSR(CR.Range.first, Pivot);
CaseRange RHSR(Pivot, CR.Range.second);
- Constant *C = Pivot->Low;
+ const Constant *C = Pivot->Low;
MachineBasicBlock *FalseBB = 0, *TrueBB = 0;
// We know that we branch to the LHS if the Value being switched on is
@@ -2383,14 +2411,14 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
BranchProbabilityInfo *BPI = FuncInfo.BPI;
// Start with "simple" cases
- for (size_t i = 1; i < SI.getNumSuccessors(); ++i) {
- BasicBlock *SuccBB = SI.getSuccessor(i);
+ for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end();
+ i != e; ++i) {
+ const BasicBlock *SuccBB = i.getCaseSuccessor();
MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB];
uint32_t ExtraWeight = BPI ? BPI->getEdgeWeight(SI.getParent(), SuccBB) : 0;
- Cases.push_back(Case(SI.getSuccessorValue(i),
- SI.getSuccessorValue(i),
+ Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(),
SMBB, ExtraWeight));
}
std::sort(Cases.begin(), Cases.end(), CaseCmp());
@@ -2457,7 +2485,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
// If there is only the default destination, branch to it if it is not the
// next basic block. Otherwise, just fall through.
- if (SI.getNumCases() == 1) {
+ if (!SI.getNumCases()) {
// Update machine-CFG edges.
// If this is not a fall-through branch, emit the branch.
@@ -2626,6 +2654,8 @@ void SelectionDAGBuilder::visitFCmp(const User &I) {
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
ISD::CondCode Condition = getFCmpCondCode(predicate);
+ if (TM.Options.NoNaNsFPMath)
+ Condition = getFCmpCodeWithoutNaN(Condition);
EVT DestVT = TLI.getValueType(I.getType());
setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition));
}
@@ -2685,11 +2715,12 @@ void SelectionDAGBuilder::visitFPTrunc(const User &I) {
SDValue N = getValue(I.getOperand(0));
EVT DestVT = TLI.getValueType(I.getType());
setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(),
- DestVT, N, DAG.getIntPtrConstant(0)));
+ DestVT, N,
+ DAG.getTargetConstant(0, TLI.getPointerTy())));
}
void SelectionDAGBuilder::visitFPExt(const User &I){
- // FPTrunc is never a no-op cast, no need to check
+ // FPExt is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
EVT DestVT = TLI.getValueType(I.getType());
setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N));
@@ -2772,33 +2803,25 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) {
TLI.getValueType(I.getType()), InVec, InIdx));
}
-// Utility for visitShuffleVector - Returns true if the mask is mask starting
-// from SIndx and increasing to the element length (undefs are allowed).
-static bool SequentialMask(SmallVectorImpl<int> &Mask, unsigned SIndx) {
- unsigned MaskNumElts = Mask.size();
- for (unsigned i = 0; i != MaskNumElts; ++i)
- if ((Mask[i] >= 0) && (Mask[i] != (int)(i + SIndx)))
+// Utility for visitShuffleVector - Return true if every element in Mask,
+// begining from position Pos and ending in Pos+Size, falls within the
+// specified sequential range [L, L+Pos). or is undef.
+static bool isSequentialInRange(const SmallVectorImpl<int> &Mask,
+ unsigned Pos, unsigned Size, int Low) {
+ for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low)
+ if (Mask[i] >= 0 && Mask[i] != Low)
return false;
return true;
}
void SelectionDAGBuilder::visitShuffleVector(const User &I) {
- SmallVector<int, 8> Mask;
SDValue Src1 = getValue(I.getOperand(0));
SDValue Src2 = getValue(I.getOperand(1));
- // Convert the ConstantVector mask operand into an array of ints, with -1
- // representing undef values.
- SmallVector<Constant*, 8> MaskElts;
- cast<Constant>(I.getOperand(2))->getVectorElements(MaskElts);
- unsigned MaskNumElts = MaskElts.size();
- for (unsigned i = 0; i != MaskNumElts; ++i) {
- if (isa<UndefValue>(MaskElts[i]))
- Mask.push_back(-1);
- else
- Mask.push_back(cast<ConstantInt>(MaskElts[i])->getSExtValue());
- }
-
+ SmallVector<int, 8> Mask;
+ ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask);
+ unsigned MaskNumElts = Mask.size();
+
EVT VT = TLI.getValueType(I.getType());
EVT SrcVT = Src1.getValueType();
unsigned SrcNumElts = SrcVT.getVectorNumElements();
@@ -2814,11 +2837,23 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
// Mask is longer than the source vectors and is a multiple of the source
// vectors. We can use concatenate vector to make the mask and vectors
// lengths match.
- if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) {
- // The shuffle is concatenating two vectors together.
- setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
- VT, Src1, Src2));
- return;
+ if (SrcNumElts*2 == MaskNumElts) {
+ // First check for Src1 in low and Src2 in high
+ if (isSequentialInRange(Mask, 0, SrcNumElts, 0) &&
+ isSequentialInRange(Mask, SrcNumElts, SrcNumElts, SrcNumElts)) {
+ // The shuffle is concatenating two vectors together.
+ setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
+ VT, Src1, Src2));
+ return;
+ }
+ // Then check for Src2 in low and Src1 in high
+ if (isSequentialInRange(Mask, 0, SrcNumElts, SrcNumElts) &&
+ isSequentialInRange(Mask, SrcNumElts, SrcNumElts, 0)) {
+ // The shuffle is concatenating two vectors together.
+ setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
+ VT, Src2, Src1));
+ return;
+ }
}
// Pad both vectors with undefs to make them the same length as the mask.
@@ -2843,10 +2878,9 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
SmallVector<int, 8> MappedOps;
for (unsigned i = 0; i != MaskNumElts; ++i) {
int Idx = Mask[i];
- if (Idx < (int)SrcNumElts)
- MappedOps.push_back(Idx);
- else
- MappedOps.push_back(Idx + MaskNumElts - SrcNumElts);
+ if (Idx >= (int)SrcNumElts)
+ Idx -= SrcNumElts - MaskNumElts;
+ MappedOps.push_back(Idx);
}
setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
@@ -2858,13 +2892,13 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
// Analyze the access pattern of the vector to see if we can extract
// two subvectors and do the shuffle. The analysis is done by calculating
// the range of elements the mask access on both vectors.
- int MinRange[2] = { static_cast<int>(SrcNumElts+1),
- static_cast<int>(SrcNumElts+1)};
+ int MinRange[2] = { static_cast<int>(SrcNumElts),
+ static_cast<int>(SrcNumElts)};
int MaxRange[2] = {-1, -1};
for (unsigned i = 0; i != MaskNumElts; ++i) {
int Idx = Mask[i];
- int Input = 0;
+ unsigned Input = 0;
if (Idx < 0)
continue;
@@ -2880,35 +2914,31 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
// Check if the access is smaller than the vector size and can we find
// a reasonable extract index.
- int RangeUse[2] = { 2, 2 }; // 0 = Unused, 1 = Extract, 2 = Can not
- // Extract.
+ int RangeUse[2] = { -1, -1 }; // 0 = Unused, 1 = Extract, -1 = Can not
+ // Extract.
int StartIdx[2]; // StartIdx to extract from
- for (int Input=0; Input < 2; ++Input) {
- if (MinRange[Input] == (int)(SrcNumElts+1) && MaxRange[Input] == -1) {
+ for (unsigned Input = 0; Input < 2; ++Input) {
+ if (MinRange[Input] >= (int)SrcNumElts && MaxRange[Input] < 0) {
RangeUse[Input] = 0; // Unused
StartIdx[Input] = 0;
- } else if (MaxRange[Input] - MinRange[Input] < (int)MaskNumElts) {
- // Fits within range but we should see if we can find a good
- // start index that is a multiple of the mask length.
- if (MaxRange[Input] < (int)MaskNumElts) {
- RangeUse[Input] = 1; // Extract from beginning of the vector
- StartIdx[Input] = 0;
- } else {
- StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
- if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
- StartIdx[Input] + MaskNumElts <= SrcNumElts)
- RangeUse[Input] = 1; // Extract from a multiple of the mask length.
- }
+ continue;
}
+
+ // Find a good start index that is a multiple of the mask length. Then
+ // see if the rest of the elements are in range.
+ StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
+ if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
+ StartIdx[Input] + MaskNumElts <= SrcNumElts)
+ RangeUse[Input] = 1; // Extract from a multiple of the mask length.
}
if (RangeUse[0] == 0 && RangeUse[1] == 0) {
setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
return;
}
- else if (RangeUse[0] < 2 && RangeUse[1] < 2) {
+ if (RangeUse[0] >= 0 && RangeUse[1] >= 0) {
// Extract appropriate subvector and generate a vector shuffle
- for (int Input=0; Input < 2; ++Input) {
+ for (unsigned Input = 0; Input < 2; ++Input) {
SDValue &Src = Input == 0 ? Src1 : Src2;
if (RangeUse[Input] == 0)
Src = DAG.getUNDEF(VT);
@@ -2921,12 +2951,13 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
SmallVector<int, 8> MappedOps;
for (unsigned i = 0; i != MaskNumElts; ++i) {
int Idx = Mask[i];
- if (Idx < 0)
- MappedOps.push_back(Idx);
- else if (Idx < (int)SrcNumElts)
- MappedOps.push_back(Idx - StartIdx[0]);
- else
- MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts);
+ if (Idx >= 0) {
+ if (Idx < (int)SrcNumElts)
+ Idx -= StartIdx[0];
+ else
+ Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
+ }
+ MappedOps.push_back(Idx);
}
setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
@@ -2942,22 +2973,20 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
EVT PtrVT = TLI.getPointerTy();
SmallVector<SDValue,8> Ops;
for (unsigned i = 0; i != MaskNumElts; ++i) {
- if (Mask[i] < 0) {
- Ops.push_back(DAG.getUNDEF(EltVT));
- } else {
- int Idx = Mask[i];
- SDValue Res;
+ int Idx = Mask[i];
+ SDValue Res;
- if (Idx < (int)SrcNumElts)
- Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
- EltVT, Src1, DAG.getConstant(Idx, PtrVT));
- else
- Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
- EltVT, Src2,
- DAG.getConstant(Idx - SrcNumElts, PtrVT));
+ if (Idx < 0) {
+ Res = DAG.getUNDEF(EltVT);
+ } else {
+ SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2;
+ if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;
- Ops.push_back(Res);
+ Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
+ EltVT, Src, DAG.getConstant(Idx, PtrVT));
}
+
+ Ops.push_back(Res);
}
setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
@@ -3042,7 +3071,9 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
SDValue N = getValue(I.getOperand(0));
- Type *Ty = I.getOperand(0)->getType();
+ // Note that the pointer operand may be a vector of pointers. Take the scalar
+ // element which holds a pointer.
+ Type *Ty = I.getOperand(0)->getType()->getScalarType();
for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end();
OI != E; ++OI) {
@@ -3096,7 +3127,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
unsigned Amt = ElementSize.logBase2();
IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(),
N.getValueType(), IdxN,
- DAG.getConstant(Amt, TLI.getPointerTy()));
+ DAG.getConstant(Amt, IdxN.getValueType()));
} else {
SDValue Scale = DAG.getConstant(ElementSize, TLI.getPointerTy());
IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(),
@@ -3175,8 +3206,10 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
bool isVolatile = I.isVolatile();
bool isNonTemporal = I.getMetadata("nontemporal") != 0;
+ bool isInvariant = I.getMetadata("invariant.load") != 0;
unsigned Alignment = I.getAlignment();
const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
+ const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
SmallVector<EVT, 4> ValueVTs;
SmallVector<uint64_t, 4> Offsets;
@@ -3224,7 +3257,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
DAG.getConstant(Offsets[i], PtrVT));
SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
A, MachinePointerInfo(SV, Offsets[i]), isVolatile,
- isNonTemporal, Alignment, TBAAInfo);
+ isNonTemporal, isInvariant, Alignment, TBAAInfo,
+ Ranges);
Values[i] = L;
Chains[ChainI] = L.getValue(1);
@@ -3358,7 +3392,7 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
DebugLoc dl = getCurDebugLoc();
ISD::NodeType NT;
switch (I.getOperation()) {
- default: llvm_unreachable("Unknown atomicrmw operation"); return;
+ default: llvm_unreachable("Unknown atomicrmw operation");
case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break;
case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break;
case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break;
@@ -3496,24 +3530,16 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
// Add the intrinsic ID as an integer operand if it's not a target intrinsic.
if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
Info.opc == ISD::INTRINSIC_W_CHAIN)
- Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy()));
+ Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy()));
// Add all operands of the call to the operand list.
for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
SDValue Op = getValue(I.getArgOperand(i));
- assert(TLI.isTypeLegal(Op.getValueType()) &&
- "Intrinsic uses a non-legal type?");
Ops.push_back(Op);
}
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(TLI, I.getType(), ValueVTs);
-#ifndef NDEBUG
- for (unsigned Val = 0, E = ValueVTs.size(); Val != E; ++Val) {
- assert(TLI.isTypeLegal(ValueVTs[Val]) &&
- "Intrinsic uses a non-legal type?");
- }
-#endif // NDEBUG
if (HasChain)
ValueVTs.push_back(MVT::Other);
@@ -3556,6 +3582,12 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
}
setValue(&I, Result);
+ } else {
+ // Assign order to result here. If the intrinsic does not produce a result,
+ // it won't be mapped to a SDNode and visit() will not assign it an order
+ // number.
+ ++SDNodeOrder;
+ AssignOrderingToNode(Result.getNode());
}
}
@@ -3597,17 +3629,6 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt) {
return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32);
}
-// implVisitAluOverflow - Lower arithmetic overflow instrinsics.
-const char *
-SelectionDAGBuilder::implVisitAluOverflow(const CallInst &I, ISD::NodeType Op) {
- SDValue Op1 = getValue(I.getArgOperand(0));
- SDValue Op2 = getValue(I.getArgOperand(1));
-
- SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
- setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2));
- return 0;
-}
-
/// visitExp - Lower an exp intrinsic. Handles the special sequences for
/// limited-precision mode.
void
@@ -4367,9 +4388,8 @@ static unsigned getTruncatedArgReg(const SDValue &N) {
const SDValue &CFR = Ext.getOperand(0);
if (CFR.getOpcode() == ISD::CopyFromReg)
return cast<RegisterSDNode>(CFR.getOperand(1))->getReg();
- else
- if (CFR.getOpcode() == ISD::TRUNCATE)
- return getTruncatedArgReg(CFR);
+ if (CFR.getOpcode() == ISD::TRUNCATE)
+ return getTruncatedArgReg(CFR);
}
return 0;
}
@@ -4398,7 +4418,7 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
// Some arguments' frame index is recorded during argument lowering.
Offset = FuncInfo.getArgumentFrameIndex(Arg);
if (Offset)
- Reg = TRI->getFrameRegister(MF);
+ Reg = TRI->getFrameRegister(MF);
if (!Reg && N.getNode()) {
if (N.getOpcode() == ISD::CopyFromReg)
@@ -4473,9 +4493,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(0))));
return 0;
case Intrinsic::setjmp:
- return "_setjmp"+!TLI.usesUnderscoreSetJmp();
+ return &"_setjmp"[!TLI.usesUnderscoreSetJmp()];
case Intrinsic::longjmp:
- return "_longjmp"+!TLI.usesUnderscoreLongJmp();
+ return &"_longjmp"[!TLI.usesUnderscoreLongJmp()];
case Intrinsic::memcpy: {
// Assert for address < 256 since we support only user defined address
// spaces.
@@ -4531,8 +4551,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
MDNode *Variable = DI.getVariable();
const Value *Address = DI.getAddress();
- if (!Address || !DIVariable(Variable).Verify())
+ if (!Address || !DIVariable(Variable).Verify()) {
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
return 0;
+ }
// Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder
// but do not always have a corresponding SDNode built. The SDNodeOrder
@@ -4543,7 +4565,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// Check if address has undef value.
if (isa<UndefValue>(Address) ||
(Address->use_empty() && !isa<Argument>(Address))) {
- DEBUG(dbgs() << "Dropping debug info for " << DI);
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
return 0;
}
@@ -4553,11 +4575,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
N = UnusedArgNodeMap[Address];
SDDbgValue *SDV;
if (N.getNode()) {
- // Parameters are handled specially.
- bool isParameter =
- DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable;
if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
Address = BCI->getOperand(0);
+ // Parameters are handled specially.
+ bool isParameter =
+ (DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable ||
+ isa<Argument>(Address));
+
const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
if (isParameter && !AI) {
@@ -4577,7 +4601,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
0, dl, SDNodeOrder);
else {
// Can't do anything with other non-AI cases yet.
- DEBUG(dbgs() << "Dropping debug info for " << DI);
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+ DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t");
+ DEBUG(Address->dump());
return 0;
}
DAG.AddDbgValue(SDV, N.getNode(), isParameter);
@@ -4599,7 +4625,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
}
}
- DEBUG(dbgs() << "Dropping debug info for " << DI);
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
}
}
return 0;
@@ -4645,7 +4671,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
} else {
// We may expand this to cover more cases. One case where we have no
// data available is an unreferenced parameter.
- DEBUG(dbgs() << "Dropping debug info for " << DI);
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
}
}
@@ -4654,8 +4680,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
V = BCI->getOperand(0);
const AllocaInst *AI = dyn_cast<AllocaInst>(V);
// Don't handle byval struct arguments or VLAs, for example.
- if (!AI)
+ if (!AI) {
+ DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n");
+ DEBUG(dbgs() << " Last seen at:\n " << *V << "\n");
return 0;
+ }
DenseMap<const AllocaInst*, int>::iterator SI =
FuncInfo.StaticAllocaMap.find(AI);
if (SI == FuncInfo.StaticAllocaMap.end())
@@ -4667,43 +4696,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc());
return 0;
}
- case Intrinsic::eh_exception: {
- // Insert the EXCEPTIONADDR instruction.
- assert(FuncInfo.MBB->isLandingPad() &&
- "Call to eh.exception not in landing pad!");
- SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
- SDValue Ops[1];
- Ops[0] = DAG.getRoot();
- SDValue Op = DAG.getNode(ISD::EXCEPTIONADDR, dl, VTs, Ops, 1);
- setValue(&I, Op);
- DAG.setRoot(Op.getValue(1));
- return 0;
- }
-
- case Intrinsic::eh_selector: {
- MachineBasicBlock *CallMBB = FuncInfo.MBB;
- MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
- if (CallMBB->isLandingPad())
- AddCatchInfo(I, &MMI, CallMBB);
- else {
-#ifndef NDEBUG
- FuncInfo.CatchInfoLost.insert(&I);
-#endif
- // FIXME: Mark exception selector register as live in. Hack for PR1508.
- unsigned Reg = TLI.getExceptionSelectorRegister();
- if (Reg) FuncInfo.MBB->addLiveIn(Reg);
- }
-
- // Insert the EHSELECTION instruction.
- SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
- SDValue Ops[2];
- Ops[0] = getValue(I.getArgOperand(0));
- Ops[1] = getRoot();
- SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2);
- DAG.setRoot(Op.getValue(1));
- setValue(&I, DAG.getSExtOrTrunc(Op, dl, MVT::i32));
- return 0;
- }
case Intrinsic::eh_typeid_for: {
// Find the type id for the given typeinfo.
@@ -4775,11 +4767,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getRoot(), getValue(I.getArgOperand(0))));
return 0;
}
- case Intrinsic::eh_sjlj_dispatch_setup: {
- DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
- getRoot(), getValue(I.getArgOperand(0))));
- return 0;
- }
case Intrinsic::x86_mmx_pslli_w:
case Intrinsic::x86_mmx_pslli_d:
@@ -4841,6 +4828,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
setValue(&I, Res);
return 0;
}
+ case Intrinsic::x86_avx_vinsertf128_pd_256:
+ case Intrinsic::x86_avx_vinsertf128_ps_256:
+ case Intrinsic::x86_avx_vinsertf128_si_256:
+ case Intrinsic::x86_avx2_vinserti128: {
+ DebugLoc dl = getCurDebugLoc();
+ EVT DestVT = TLI.getValueType(I.getType());
+ EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType());
+ uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) *
+ ElVT.getVectorNumElements();
+ Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, DestVT,
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ DAG.getConstant(Idx, MVT::i32));
+ setValue(&I, Res);
+ return 0;
+ }
case Intrinsic::convertff:
case Intrinsic::convertfsi:
case Intrinsic::convertfui:
@@ -4852,6 +4855,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::convertuu: {
ISD::CvtCode Code = ISD::CVT_INVALID;
switch (Intrinsic) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::convertff: Code = ISD::CVT_FF; break;
case Intrinsic::convertfsi: Code = ISD::CVT_FS; break;
case Intrinsic::convertfui: Code = ISD::CVT_FU; break;
@@ -4946,14 +4950,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return 0;
case Intrinsic::cttz: {
SDValue Arg = getValue(I.getArgOperand(0));
+ ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
EVT Ty = Arg.getValueType();
- setValue(&I, DAG.getNode(ISD::CTTZ, dl, Ty, Arg));
+ setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF,
+ dl, Ty, Arg));
return 0;
}
case Intrinsic::ctlz: {
SDValue Arg = getValue(I.getArgOperand(0));
+ ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
EVT Ty = Arg.getValueType();
- setValue(&I, DAG.getNode(ISD::CTLZ, dl, Ty, Arg));
+ setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF,
+ dl, Ty, Arg));
return 0;
}
case Intrinsic::ctpop: {
@@ -5052,7 +5060,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::gcread:
case Intrinsic::gcwrite:
llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
- return 0;
case Intrinsic::flt_rounds:
setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32));
return 0;
@@ -5064,7 +5071,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::trap: {
- StringRef TrapFuncName = getTrapFunctionName();
+ StringRef TrapFuncName = TM.Options.getTrapFunctionName();
if (TrapFuncName.empty()) {
DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
return 0;
@@ -5073,25 +5080,36 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
std::pair<SDValue, SDValue> Result =
TLI.LowerCallTo(getRoot(), I.getType(),
false, false, false, false, 0, CallingConv::C,
- /*isTailCall=*/false, /*isReturnValueUsed=*/true,
+ /*isTailCall=*/false,
+ /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()),
Args, DAG, getCurDebugLoc());
DAG.setRoot(Result.second);
return 0;
}
case Intrinsic::uadd_with_overflow:
- return implVisitAluOverflow(I, ISD::UADDO);
case Intrinsic::sadd_with_overflow:
- return implVisitAluOverflow(I, ISD::SADDO);
case Intrinsic::usub_with_overflow:
- return implVisitAluOverflow(I, ISD::USUBO);
case Intrinsic::ssub_with_overflow:
- return implVisitAluOverflow(I, ISD::SSUBO);
case Intrinsic::umul_with_overflow:
- return implVisitAluOverflow(I, ISD::UMULO);
- case Intrinsic::smul_with_overflow:
- return implVisitAluOverflow(I, ISD::SMULO);
+ case Intrinsic::smul_with_overflow: {
+ ISD::NodeType Op;
+ switch (Intrinsic) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break;
+ case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break;
+ case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break;
+ case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break;
+ case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break;
+ case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break;
+ }
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
+ setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2));
+ return 0;
+ }
case Intrinsic::prefetch: {
SDValue Ops[5];
unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
@@ -5226,7 +5244,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
// If there's a possibility that fast-isel has already selected some amount
// of the current basic block, don't emit a tail call.
- if (isTailCall && EnableFastISel)
+ if (isTailCall && TM.Options.EnableFastISel)
isTailCall = false;
std::pair<SDValue,SDValue> Result =
@@ -5236,6 +5254,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(),
CS.getCallingConv(),
isTailCall,
+ CS.doesNotReturn(),
!CS.getInstruction()->use_empty(),
Callee, Args, DAG, getCurDebugLoc());
assert((isTailCall || Result.second.getNode()) &&
@@ -5264,7 +5283,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second,
Add,
MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]),
- false, false, 1);
+ false, false, false, 1);
Values[i] = L;
Chains[i] = L.getValue(1);
}
@@ -5375,7 +5394,8 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root,
Ptr, MachinePointerInfo(PtrVal),
false /*volatile*/,
- false /*nontemporal*/, 1 /* align=1 */);
+ false /*nontemporal*/,
+ false /*isinvariant*/, 1 /* align=1 */);
if (!ConstantMemory)
Builder.PendingLoads.push_back(LoadVal.getValue(1));
@@ -5470,23 +5490,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
return;
}
- // See if any floating point values are being passed to this function. This is
- // used to emit an undefined reference to fltused on Windows.
- FunctionType *FT =
- cast<FunctionType>(I.getCalledValue()->getType()->getContainedType(0));
MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
- if (FT->isVarArg() &&
- !MMI.callsExternalVAFunctionWithFloatingPointArguments()) {
- for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
- Type* T = I.getArgOperand(i)->getType();
- for (po_iterator<Type*> i = po_begin(T), e = po_end(T);
- i != e; ++i) {
- if (!i->isFloatingPointTy()) continue;
- MMI.setCallsExternalVAFunctionWithFloatingPointArguments(true);
- break;
- }
- }
- }
+ ComputeUsesVAFloatArgument(I, &MMI);
const char *RenameFn = 0;
if (Function *F = I.getCalledFunction()) {
@@ -5509,7 +5514,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
// can't be a library call.
if (!F->hasLocalLinkage() && F->hasName()) {
StringRef Name = F->getName();
- if (Name == "copysign" || Name == "copysignf" || Name == "copysignl") {
+ if ((LibInfo->has(LibFunc::copysign) && Name == "copysign") ||
+ (LibInfo->has(LibFunc::copysignf) && Name == "copysignf") ||
+ (LibInfo->has(LibFunc::copysignl) && Name == "copysignl")) {
if (I.getNumArgOperands() == 2 && // Basic sanity checks.
I.getArgOperand(0)->getType()->isFloatingPointTy() &&
I.getType() == I.getArgOperand(0)->getType() &&
@@ -5520,7 +5527,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
LHS.getValueType(), LHS, RHS));
return;
}
- } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") {
+ } else if ((LibInfo->has(LibFunc::fabs) && Name == "fabs") ||
+ (LibInfo->has(LibFunc::fabsf) && Name == "fabsf") ||
+ (LibInfo->has(LibFunc::fabsl) && Name == "fabsl")) {
if (I.getNumArgOperands() == 1 && // Basic sanity checks.
I.getArgOperand(0)->getType()->isFloatingPointTy() &&
I.getType() == I.getArgOperand(0)->getType()) {
@@ -5529,7 +5538,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
Tmp.getValueType(), Tmp));
return;
}
- } else if (Name == "sin" || Name == "sinf" || Name == "sinl") {
+ } else if ((LibInfo->has(LibFunc::sin) && Name == "sin") ||
+ (LibInfo->has(LibFunc::sinf) && Name == "sinf") ||
+ (LibInfo->has(LibFunc::sinl) && Name == "sinl")) {
if (I.getNumArgOperands() == 1 && // Basic sanity checks.
I.getArgOperand(0)->getType()->isFloatingPointTy() &&
I.getType() == I.getArgOperand(0)->getType() &&
@@ -5539,7 +5550,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
Tmp.getValueType(), Tmp));
return;
}
- } else if (Name == "cos" || Name == "cosf" || Name == "cosl") {
+ } else if ((LibInfo->has(LibFunc::cos) && Name == "cos") ||
+ (LibInfo->has(LibFunc::cosf) && Name == "cosf") ||
+ (LibInfo->has(LibFunc::cosl) && Name == "cosl")) {
if (I.getNumArgOperands() == 1 && // Basic sanity checks.
I.getArgOperand(0)->getType()->isFloatingPointTy() &&
I.getType() == I.getArgOperand(0)->getType() &&
@@ -5549,7 +5562,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
Tmp.getValueType(), Tmp));
return;
}
- } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") {
+ } else if ((LibInfo->has(LibFunc::sqrt) && Name == "sqrt") ||
+ (LibInfo->has(LibFunc::sqrtf) && Name == "sqrtf") ||
+ (LibInfo->has(LibFunc::sqrtl) && Name == "sqrtl")) {
if (I.getNumArgOperands() == 1 && // Basic sanity checks.
I.getArgOperand(0)->getType()->isFloatingPointTy() &&
I.getType() == I.getArgOperand(0)->getType() &&
@@ -5559,6 +5574,85 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
Tmp.getValueType(), Tmp));
return;
}
+ } else if ((LibInfo->has(LibFunc::floor) && Name == "floor") ||
+ (LibInfo->has(LibFunc::floorf) && Name == "floorf") ||
+ (LibInfo->has(LibFunc::floorl) && Name == "floorl")) {
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType()) {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(ISD::FFLOOR, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if ((LibInfo->has(LibFunc::nearbyint) && Name == "nearbyint") ||
+ (LibInfo->has(LibFunc::nearbyintf) && Name == "nearbyintf") ||
+ (LibInfo->has(LibFunc::nearbyintl) && Name == "nearbyintl")) {
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType()) {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(ISD::FNEARBYINT, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if ((LibInfo->has(LibFunc::ceil) && Name == "ceil") ||
+ (LibInfo->has(LibFunc::ceilf) && Name == "ceilf") ||
+ (LibInfo->has(LibFunc::ceill) && Name == "ceill")) {
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType()) {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(ISD::FCEIL, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if ((LibInfo->has(LibFunc::rint) && Name == "rint") ||
+ (LibInfo->has(LibFunc::rintf) && Name == "rintf") ||
+ (LibInfo->has(LibFunc::rintl) && Name == "rintl")) {
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType()) {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(ISD::FRINT, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if ((LibInfo->has(LibFunc::trunc) && Name == "trunc") ||
+ (LibInfo->has(LibFunc::truncf) && Name == "truncf") ||
+ (LibInfo->has(LibFunc::truncl) && Name == "truncl")) {
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType()) {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(ISD::FTRUNC, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if ((LibInfo->has(LibFunc::log2) && Name == "log2") ||
+ (LibInfo->has(LibFunc::log2f) && Name == "log2f") ||
+ (LibInfo->has(LibFunc::log2l) && Name == "log2l")) {
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType() &&
+ I.onlyReadsMemory()) {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(ISD::FLOG2, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if ((LibInfo->has(LibFunc::exp2) && Name == "exp2") ||
+ (LibInfo->has(LibFunc::exp2f) && Name == "exp2f") ||
+ (LibInfo->has(LibFunc::exp2l) && Name == "exp2l")) {
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType() &&
+ I.onlyReadsMemory()) {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(ISD::FEXP2, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
} else if (Name == "memcmp") {
if (visitMemCmpCall(I))
return;
@@ -5596,22 +5690,6 @@ public:
: TargetLowering::AsmOperandInfo(info), CallOperand(0,0) {
}
- /// MarkAllocatedRegs - Once AssignedRegs is set, mark the assigned registers
- /// busy in OutputRegs/InputRegs.
- void MarkAllocatedRegs(bool isOutReg, bool isInReg,
- std::set<unsigned> &OutputRegs,
- std::set<unsigned> &InputRegs,
- const TargetRegisterInfo &TRI) const {
- if (isOutReg) {
- for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
- MarkRegAndAliases(AssignedRegs.Regs[i], OutputRegs, TRI);
- }
- if (isInReg) {
- for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
- MarkRegAndAliases(AssignedRegs.Regs[i], InputRegs, TRI);
- }
- }
-
/// getCallOperandValEVT - Return the EVT of the Value* that this operand
/// corresponds to. If there is no Value* for this operand, it returns
/// MVT::Other.
@@ -5659,18 +5737,6 @@ public:
return TLI.getValueType(OpTy, true);
}
-
-private:
- /// MarkRegAndAliases - Mark the specified register and all aliases in the
- /// specified set.
- static void MarkRegAndAliases(unsigned Reg, std::set<unsigned> &Regs,
- const TargetRegisterInfo &TRI) {
- assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "Isn't a physreg");
- Regs.insert(Reg);
- if (const unsigned *Aliases = TRI.getAliasSet(Reg))
- for (; *Aliases; ++Aliases)
- Regs.insert(*Aliases);
- }
};
typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
@@ -5684,39 +5750,13 @@ typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
/// allocation. This produces generally horrible, but correct, code.
///
/// OpInfo describes the operand.
-/// Input and OutputRegs are the set of already allocated physical registers.
///
static void GetRegistersForValue(SelectionDAG &DAG,
const TargetLowering &TLI,
DebugLoc DL,
- SDISelAsmOperandInfo &OpInfo,
- std::set<unsigned> &OutputRegs,
- std::set<unsigned> &InputRegs) {
+ SDISelAsmOperandInfo &OpInfo) {
LLVMContext &Context = *DAG.getContext();
- // Compute whether this value requires an input register, an output register,
- // or both.
- bool isOutReg = false;
- bool isInReg = false;
- switch (OpInfo.Type) {
- case InlineAsm::isOutput:
- isOutReg = true;
-
- // If there is an input constraint that matches this, we need to reserve
- // the input register so no other inputs allocate to it.
- isInReg = OpInfo.hasMatchingInput();
- break;
- case InlineAsm::isInput:
- isInReg = true;
- isOutReg = false;
- break;
- case InlineAsm::isClobber:
- isOutReg = true;
- isInReg = true;
- break;
- }
-
-
MachineFunction &MF = DAG.getMachineFunction();
SmallVector<unsigned, 4> Regs;
@@ -5790,8 +5830,6 @@ static void GetRegistersForValue(SelectionDAG &DAG,
}
OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
- const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
- OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
return;
}
@@ -5822,8 +5860,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
/// ConstraintOperands - Information about all of the constraints.
SDISelAsmOperandInfoVector ConstraintOperands;
- std::set<unsigned> OutputRegs, InputRegs;
-
TargetLowering::AsmOperandInfoVector
TargetConstraints = TLI.ParseConstraints(CS);
@@ -5956,7 +5992,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// constant pool entry to get its address.
const Value *OpVal = OpInfo.CallOperandVal;
if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
- isa<ConstantVector>(OpVal)) {
+ isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) {
OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
TLI.getPointerTy());
} else {
@@ -5985,8 +6021,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// If this constraint is for a specific register, allocate it before
// anything else.
if (OpInfo.ConstraintType == TargetLowering::C_Register)
- GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs,
- InputRegs);
+ GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo);
}
// Second pass - Loop over all of the operands, assigning virtual or physregs
@@ -5997,8 +6032,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// C_Register operands have already been allocated, Other/Memory don't need
// to be.
if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
- GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs,
- InputRegs);
+ GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo);
}
// AsmNodeOperands - The operands for the ISD::INLINEASM node.
@@ -6052,9 +6086,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// Copy the output from the appropriate register. Find a register that
// we can use.
- if (OpInfo.AssignedRegs.Regs.empty())
- report_fatal_error("Couldn't allocate output reg for constraint '" +
- Twine(OpInfo.ConstraintCode) + "'!");
+ if (OpInfo.AssignedRegs.Regs.empty()) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Ctx.emitError(CS.getInstruction(),
+ "couldn't allocate output register for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ break;
+ }
// If this is an indirect operand, store through the pointer after the
// asm.
@@ -6154,9 +6192,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
std::vector<SDValue> Ops;
TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
Ops, DAG);
- if (Ops.empty())
- report_fatal_error("Invalid operand for inline asm constraint '" +
- Twine(OpInfo.ConstraintCode) + "'!");
+ if (Ops.empty()) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Ctx.emitError(CS.getInstruction(),
+ "invalid operand for inline asm constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ break;
+ }
// Add information to the INLINEASM node to know about this input.
unsigned ResOpType =
@@ -6187,9 +6229,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
"Don't know how to handle indirect register inputs yet!");
// Copy the input into the appropriate registers.
- if (OpInfo.AssignedRegs.Regs.empty())
- report_fatal_error("Couldn't allocate input reg for constraint '" +
- Twine(OpInfo.ConstraintCode) + "'!");
+ if (OpInfo.AssignedRegs.Regs.empty()) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Ctx.emitError(CS.getInstruction(),
+ "couldn't allocate input reg for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ break;
+ }
OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
Chain, &Flag);
@@ -6327,7 +6373,7 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy,
bool RetSExt, bool RetZExt, bool isVarArg,
bool isInreg, unsigned NumFixedArgs,
CallingConv::ID CallConv, bool isTailCall,
- bool isReturnValueUsed,
+ bool doesNotRet, bool isReturnValueUsed,
SDValue Callee,
ArgListTy &Args, SelectionDAG &DAG,
DebugLoc dl) const {
@@ -6424,7 +6470,7 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy,
}
SmallVector<SDValue, 4> InVals;
- Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall,
+ Chain = LowerCall(Chain, Callee, CallConv, isVarArg, doesNotRet, isTailCall,
Outs, OutVals, Ins, dl, DAG, InVals);
// Verify that the target's LowerCall behaved as expected.
@@ -6493,7 +6539,6 @@ void TargetLowering::LowerOperationWrapper(SDNode *N,
SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("LowerOperation not implemented for this target!");
- return SDValue();
}
void
@@ -6515,10 +6560,10 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
/// entry block, return true. This includes arguments used by switches, since
/// the switch may expand into multiple basic blocks.
-static bool isOnlyUsedInEntryBlock(const Argument *A) {
+static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
// With FastISel active, we may be splitting blocks, so force creation
// of virtual registers for all non-dead arguments.
- if (EnableFastISel)
+ if (FastISel)
return A->use_empty();
const BasicBlock *Entry = A->getParent()->begin();
@@ -6708,7 +6753,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
SDB->getCurDebugLoc());
SDB->setValue(I, Res);
- if (!EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
+ if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
if (LoadSDNode *LNode =
dyn_cast<LoadSDNode>(Res.getOperand(0).getNode()))
if (FrameIndexSDNode *FI =
@@ -6718,7 +6763,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
// If this argument is live outside of the entry block, insert a copy from
// wherever we got it to the vreg that other BB's will reference it as.
- if (!EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) {
+ if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) {
// If we can, though, try to skip creating an unnecessary vreg.
// FIXME: This isn't very clean... it would be nice to make this more
// general. It's also subtly incompatible with the hacks FastISel
@@ -6729,7 +6774,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
continue;
}
}
- if (!isOnlyUsedInEntryBlock(I)) {
+ if (!isOnlyUsedInEntryBlock(I, TM.Options.EnableFastISel)) {
FuncInfo->InitializeRegForValue(I);
SDB->CopyToExportRegsIfNeeded(I);
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 0a21ca3472ca..8393b414926a 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -67,11 +67,11 @@ class SIToFPInst;
class StoreInst;
class SwitchInst;
class TargetData;
+class TargetLibraryInfo;
class TargetLowering;
class TruncInst;
class UIToFPInst;
class UnreachableInst;
-class UnwindInst;
class VAArgInst;
class ZExtInst;
@@ -129,13 +129,13 @@ private:
/// Case - A struct to record the Value for a switch case, and the
/// case's target basic block.
struct Case {
- Constant* Low;
- Constant* High;
+ const Constant *Low;
+ const Constant *High;
MachineBasicBlock* BB;
uint32_t ExtraWeight;
Case() : Low(0), High(0), BB(0), ExtraWeight(0) { }
- Case(Constant* low, Constant* high, MachineBasicBlock* bb,
+ Case(const Constant *low, const Constant *high, MachineBasicBlock *bb,
uint32_t extraweight) : Low(low), High(high), BB(bb),
ExtraWeight(extraweight) { }
@@ -294,6 +294,7 @@ public:
SelectionDAG &DAG;
const TargetData *TD;
AliasAnalysis *AA;
+ const TargetLibraryInfo *LibInfo;
/// SwitchCases - Vector of CaseBlock structures used to communicate
/// SwitchInst code generation information.
@@ -338,7 +339,8 @@ public:
HasTailCall(false), Context(dag.getContext()) {
}
- void init(GCFunctionInfo *gfi, AliasAnalysis &aa);
+ void init(GCFunctionInfo *gfi, AliasAnalysis &aa,
+ const TargetLibraryInfo *li);
/// clear - Clear out the current SelectionDAG and the associated
/// state and prepare this SelectionDAGBuilder object to be used
@@ -451,7 +453,8 @@ private:
MachineBasicBlock* Default,
MachineBasicBlock *SwitchBB);
- uint32_t getEdgeWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst);
+ uint32_t getEdgeWeight(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const;
void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst,
uint32_t Weight = 0);
public:
@@ -471,7 +474,6 @@ private:
// These all get lowered before this pass.
void visitInvoke(const InvokeInst &I);
void visitResume(const ResumeInst &I);
- void visitUnwind(const UnwindInst &I);
void visitBinary(const User &I, unsigned OpCode);
void visitShift(const User &I, unsigned Opcode);
@@ -554,8 +556,6 @@ private:
void visitUserOp2(const Instruction &I) {
llvm_unreachable("UserOp2 should not exist at instruction selection time!");
}
-
- const char *implVisitAluOverflow(const CallInst &I, ISD::NodeType Op);
void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
new file mode 100644
index 000000000000..f981afb437b0
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -0,0 +1,631 @@
+//===-- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG::dump method and friends.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace llvm;
+
+std::string SDNode::getOperationName(const SelectionDAG *G) const {
+ switch (getOpcode()) {
+ default:
+ if (getOpcode() < ISD::BUILTIN_OP_END)
+ return "<<Unknown DAG Node>>";
+ if (isMachineOpcode()) {
+ if (G)
+ if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo())
+ if (getMachineOpcode() < TII->getNumOpcodes())
+ return TII->getName(getMachineOpcode());
+ return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>";
+ }
+ if (G) {
+ const TargetLowering &TLI = G->getTargetLoweringInfo();
+ const char *Name = TLI.getTargetNodeName(getOpcode());
+ if (Name) return Name;
+ return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>";
+ }
+ return "<<Unknown Node #" + utostr(getOpcode()) + ">>";
+
+#ifndef NDEBUG
+ case ISD::DELETED_NODE: return "<<Deleted Node!>>";
+#endif
+ case ISD::PREFETCH: return "Prefetch";
+ case ISD::MEMBARRIER: return "MemBarrier";
+ case ISD::ATOMIC_FENCE: return "AtomicFence";
+ case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap";
+ case ISD::ATOMIC_SWAP: return "AtomicSwap";
+ case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd";
+ case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub";
+ case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd";
+ case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr";
+ case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor";
+ case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand";
+ case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin";
+ case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax";
+ case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin";
+ case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax";
+ case ISD::ATOMIC_LOAD: return "AtomicLoad";
+ case ISD::ATOMIC_STORE: return "AtomicStore";
+ case ISD::PCMARKER: return "PCMarker";
+ case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
+ case ISD::SRCVALUE: return "SrcValue";
+ case ISD::MDNODE_SDNODE: return "MDNode";
+ case ISD::EntryToken: return "EntryToken";
+ case ISD::TokenFactor: return "TokenFactor";
+ case ISD::AssertSext: return "AssertSext";
+ case ISD::AssertZext: return "AssertZext";
+
+ case ISD::BasicBlock: return "BasicBlock";
+ case ISD::VALUETYPE: return "ValueType";
+ case ISD::Register: return "Register";
+ case ISD::RegisterMask: return "RegisterMask";
+ case ISD::Constant: return "Constant";
+ case ISD::ConstantFP: return "ConstantFP";
+ case ISD::GlobalAddress: return "GlobalAddress";
+ case ISD::GlobalTLSAddress: return "GlobalTLSAddress";
+ case ISD::FrameIndex: return "FrameIndex";
+ case ISD::JumpTable: return "JumpTable";
+ case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE";
+ case ISD::RETURNADDR: return "RETURNADDR";
+ case ISD::FRAMEADDR: return "FRAMEADDR";
+ case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET";
+ case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR";
+ case ISD::LSDAADDR: return "LSDAADDR";
+ case ISD::EHSELECTION: return "EHSELECTION";
+ case ISD::EH_RETURN: return "EH_RETURN";
+ case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
+ case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
+ case ISD::ConstantPool: return "ConstantPool";
+ case ISD::ExternalSymbol: return "ExternalSymbol";
+ case ISD::BlockAddress: return "BlockAddress";
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN: {
+ unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
+ unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();
+ if (IID < Intrinsic::num_intrinsics)
+ return Intrinsic::getName((Intrinsic::ID)IID);
+ else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
+ return TII->getName(IID);
+ llvm_unreachable("Invalid intrinsic ID");
+ }
+
+ case ISD::BUILD_VECTOR: return "BUILD_VECTOR";
+ case ISD::TargetConstant: return "TargetConstant";
+ case ISD::TargetConstantFP: return "TargetConstantFP";
+ case ISD::TargetGlobalAddress: return "TargetGlobalAddress";
+ case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress";
+ case ISD::TargetFrameIndex: return "TargetFrameIndex";
+ case ISD::TargetJumpTable: return "TargetJumpTable";
+ case ISD::TargetConstantPool: return "TargetConstantPool";
+ case ISD::TargetExternalSymbol: return "TargetExternalSymbol";
+ case ISD::TargetBlockAddress: return "TargetBlockAddress";
+
+ case ISD::CopyToReg: return "CopyToReg";
+ case ISD::CopyFromReg: return "CopyFromReg";
+ case ISD::UNDEF: return "undef";
+ case ISD::MERGE_VALUES: return "merge_values";
+ case ISD::INLINEASM: return "inlineasm";
+ case ISD::EH_LABEL: return "eh_label";
+ case ISD::HANDLENODE: return "handlenode";
+
+ // Unary operators
+ case ISD::FABS: return "fabs";
+ case ISD::FNEG: return "fneg";
+ case ISD::FSQRT: return "fsqrt";
+ case ISD::FSIN: return "fsin";
+ case ISD::FCOS: return "fcos";
+ case ISD::FTRUNC: return "ftrunc";
+ case ISD::FFLOOR: return "ffloor";
+ case ISD::FCEIL: return "fceil";
+ case ISD::FRINT: return "frint";
+ case ISD::FNEARBYINT: return "fnearbyint";
+ case ISD::FEXP: return "fexp";
+ case ISD::FEXP2: return "fexp2";
+ case ISD::FLOG: return "flog";
+ case ISD::FLOG2: return "flog2";
+ case ISD::FLOG10: return "flog10";
+
+ // Binary operators
+ case ISD::ADD: return "add";
+ case ISD::SUB: return "sub";
+ case ISD::MUL: return "mul";
+ case ISD::MULHU: return "mulhu";
+ case ISD::MULHS: return "mulhs";
+ case ISD::SDIV: return "sdiv";
+ case ISD::UDIV: return "udiv";
+ case ISD::SREM: return "srem";
+ case ISD::UREM: return "urem";
+ case ISD::SMUL_LOHI: return "smul_lohi";
+ case ISD::UMUL_LOHI: return "umul_lohi";
+ case ISD::SDIVREM: return "sdivrem";
+ case ISD::UDIVREM: return "udivrem";
+ case ISD::AND: return "and";
+ case ISD::OR: return "or";
+ case ISD::XOR: return "xor";
+ case ISD::SHL: return "shl";
+ case ISD::SRA: return "sra";
+ case ISD::SRL: return "srl";
+ case ISD::ROTL: return "rotl";
+ case ISD::ROTR: return "rotr";
+ case ISD::FADD: return "fadd";
+ case ISD::FSUB: return "fsub";
+ case ISD::FMUL: return "fmul";
+ case ISD::FDIV: return "fdiv";
+ case ISD::FMA: return "fma";
+ case ISD::FREM: return "frem";
+ case ISD::FCOPYSIGN: return "fcopysign";
+ case ISD::FGETSIGN: return "fgetsign";
+ case ISD::FPOW: return "fpow";
+
+ case ISD::FPOWI: return "fpowi";
+ case ISD::SETCC: return "setcc";
+ case ISD::SELECT: return "select";
+ case ISD::VSELECT: return "vselect";
+ case ISD::SELECT_CC: return "select_cc";
+ case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt";
+ case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt";
+ case ISD::CONCAT_VECTORS: return "concat_vectors";
+ case ISD::INSERT_SUBVECTOR: return "insert_subvector";
+ case ISD::EXTRACT_SUBVECTOR: return "extract_subvector";
+ case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector";
+ case ISD::VECTOR_SHUFFLE: return "vector_shuffle";
+ case ISD::CARRY_FALSE: return "carry_false";
+ case ISD::ADDC: return "addc";
+ case ISD::ADDE: return "adde";
+ case ISD::SADDO: return "saddo";
+ case ISD::UADDO: return "uaddo";
+ case ISD::SSUBO: return "ssubo";
+ case ISD::USUBO: return "usubo";
+ case ISD::SMULO: return "smulo";
+ case ISD::UMULO: return "umulo";
+ case ISD::SUBC: return "subc";
+ case ISD::SUBE: return "sube";
+ case ISD::SHL_PARTS: return "shl_parts";
+ case ISD::SRA_PARTS: return "sra_parts";
+ case ISD::SRL_PARTS: return "srl_parts";
+
+ // Conversion operators.
+ case ISD::SIGN_EXTEND: return "sign_extend";
+ case ISD::ZERO_EXTEND: return "zero_extend";
+ case ISD::ANY_EXTEND: return "any_extend";
+ case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg";
+ case ISD::TRUNCATE: return "truncate";
+ case ISD::FP_ROUND: return "fp_round";
+ case ISD::FLT_ROUNDS_: return "flt_rounds";
+ case ISD::FP_ROUND_INREG: return "fp_round_inreg";
+ case ISD::FP_EXTEND: return "fp_extend";
+
+ case ISD::SINT_TO_FP: return "sint_to_fp";
+ case ISD::UINT_TO_FP: return "uint_to_fp";
+ case ISD::FP_TO_SINT: return "fp_to_sint";
+ case ISD::FP_TO_UINT: return "fp_to_uint";
+ case ISD::BITCAST: return "bitcast";
+ case ISD::FP16_TO_FP32: return "fp16_to_fp32";
+ case ISD::FP32_TO_FP16: return "fp32_to_fp16";
+
+ case ISD::CONVERT_RNDSAT: {
+ switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) {
+ default: llvm_unreachable("Unknown cvt code!");
+ case ISD::CVT_FF: return "cvt_ff";
+ case ISD::CVT_FS: return "cvt_fs";
+ case ISD::CVT_FU: return "cvt_fu";
+ case ISD::CVT_SF: return "cvt_sf";
+ case ISD::CVT_UF: return "cvt_uf";
+ case ISD::CVT_SS: return "cvt_ss";
+ case ISD::CVT_SU: return "cvt_su";
+ case ISD::CVT_US: return "cvt_us";
+ case ISD::CVT_UU: return "cvt_uu";
+ }
+ }
+
+ // Control flow instructions
+ case ISD::BR: return "br";
+ case ISD::BRIND: return "brind";
+ case ISD::BR_JT: return "br_jt";
+ case ISD::BRCOND: return "brcond";
+ case ISD::BR_CC: return "br_cc";
+ case ISD::CALLSEQ_START: return "callseq_start";
+ case ISD::CALLSEQ_END: return "callseq_end";
+
+ // Other operators
+ case ISD::LOAD: return "load";
+ case ISD::STORE: return "store";
+ case ISD::VAARG: return "vaarg";
+ case ISD::VACOPY: return "vacopy";
+ case ISD::VAEND: return "vaend";
+ case ISD::VASTART: return "vastart";
+ case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc";
+ case ISD::EXTRACT_ELEMENT: return "extract_element";
+ case ISD::BUILD_PAIR: return "build_pair";
+ case ISD::STACKSAVE: return "stacksave";
+ case ISD::STACKRESTORE: return "stackrestore";
+ case ISD::TRAP: return "trap";
+
+ // Bit manipulation
+ case ISD::BSWAP: return "bswap";
+ case ISD::CTPOP: return "ctpop";
+ case ISD::CTTZ: return "cttz";
+ case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef";
+ case ISD::CTLZ: return "ctlz";
+ case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef";
+
+ // Trampolines
+ case ISD::INIT_TRAMPOLINE: return "init_trampoline";
+ case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline";
+
+ case ISD::CONDCODE:
+ switch (cast<CondCodeSDNode>(this)->get()) {
+ default: llvm_unreachable("Unknown setcc condition!");
+ case ISD::SETOEQ: return "setoeq";
+ case ISD::SETOGT: return "setogt";
+ case ISD::SETOGE: return "setoge";
+ case ISD::SETOLT: return "setolt";
+ case ISD::SETOLE: return "setole";
+ case ISD::SETONE: return "setone";
+
+ case ISD::SETO: return "seto";
+ case ISD::SETUO: return "setuo";
+ case ISD::SETUEQ: return "setue";
+ case ISD::SETUGT: return "setugt";
+ case ISD::SETUGE: return "setuge";
+ case ISD::SETULT: return "setult";
+ case ISD::SETULE: return "setule";
+ case ISD::SETUNE: return "setune";
+
+ case ISD::SETEQ: return "seteq";
+ case ISD::SETGT: return "setgt";
+ case ISD::SETGE: return "setge";
+ case ISD::SETLT: return "setlt";
+ case ISD::SETLE: return "setle";
+ case ISD::SETNE: return "setne";
+
+ case ISD::SETTRUE: return "settrue";
+ case ISD::SETTRUE2: return "settrue2";
+ case ISD::SETFALSE: return "setfalse";
+ case ISD::SETFALSE2: return "setfalse2";
+ }
+ }
+}
+
+const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) {
+ switch (AM) {
+ default: return "";
+ case ISD::PRE_INC: return "<pre-inc>";
+ case ISD::PRE_DEC: return "<pre-dec>";
+ case ISD::POST_INC: return "<post-inc>";
+ case ISD::POST_DEC: return "<post-dec>";
+ }
+}
+
+void SDNode::dump() const { dump(0); }
+void SDNode::dump(const SelectionDAG *G) const {
+ print(dbgs(), G);
+ dbgs() << '\n';
+}
+
+void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
+ OS << (void*)this << ": ";
+
+ for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
+ if (i) OS << ",";
+ if (getValueType(i) == MVT::Other)
+ OS << "ch";
+ else
+ OS << getValueType(i).getEVTString();
+ }
+ OS << " = " << getOperationName(G);
+}
+
+void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
+ if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) {
+ if (!MN->memoperands_empty()) {
+ OS << "<";
+ OS << "Mem:";
+ for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(),
+ e = MN->memoperands_end(); i != e; ++i) {
+ OS << **i;
+ if (llvm::next(i) != e)
+ OS << " ";
+ }
+ OS << ">";
+ }
+ } else if (const ShuffleVectorSDNode *SVN =
+ dyn_cast<ShuffleVectorSDNode>(this)) {
+ OS << "<";
+ for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (i) OS << ",";
+ if (Idx < 0)
+ OS << "u";
+ else
+ OS << Idx;
+ }
+ OS << ">";
+ } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
+ OS << '<' << CSDN->getAPIntValue() << '>';
+ } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
+ if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle)
+ OS << '<' << CSDN->getValueAPF().convertToFloat() << '>';
+ else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble)
+ OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';
+ else {
+ OS << "<APFloat(";
+ CSDN->getValueAPF().bitcastToAPInt().dump();
+ OS << ")>";
+ }
+ } else if (const GlobalAddressSDNode *GADN =
+ dyn_cast<GlobalAddressSDNode>(this)) {
+ int64_t offset = GADN->getOffset();
+ OS << '<';
+ WriteAsOperand(OS, GADN->getGlobal());
+ OS << '>';
+ if (offset > 0)
+ OS << " + " << offset;
+ else
+ OS << " " << offset;
+ if (unsigned int TF = GADN->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) {
+ OS << "<" << FIDN->getIndex() << ">";
+ } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) {
+ OS << "<" << JTDN->getIndex() << ">";
+ if (unsigned int TF = JTDN->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){
+ int offset = CP->getOffset();
+ if (CP->isMachineConstantPoolEntry())
+ OS << "<" << *CP->getMachineCPVal() << ">";
+ else
+ OS << "<" << *CP->getConstVal() << ">";
+ if (offset > 0)
+ OS << " + " << offset;
+ else
+ OS << " " << offset;
+ if (unsigned int TF = CP->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
+ OS << "<";
+ const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
+ if (LBB)
+ OS << LBB->getName() << " ";
+ OS << (const void*)BBDN->getBasicBlock() << ">";
+ } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
+ OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0);
+ } else if (const ExternalSymbolSDNode *ES =
+ dyn_cast<ExternalSymbolSDNode>(this)) {
+ OS << "'" << ES->getSymbol() << "'";
+ if (unsigned int TF = ES->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) {
+ if (M->getValue())
+ OS << "<" << M->getValue() << ">";
+ else
+ OS << "<null>";
+ } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) {
+ if (MD->getMD())
+ OS << "<" << MD->getMD() << ">";
+ else
+ OS << "<null>";
+ } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
+ OS << ":" << N->getVT().getEVTString();
+ }
+ else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
+ OS << "<" << *LD->getMemOperand();
+
+ bool doExt = true;
+ switch (LD->getExtensionType()) {
+ default: doExt = false; break;
+ case ISD::EXTLOAD: OS << ", anyext"; break;
+ case ISD::SEXTLOAD: OS << ", sext"; break;
+ case ISD::ZEXTLOAD: OS << ", zext"; break;
+ }
+ if (doExt)
+ OS << " from " << LD->getMemoryVT().getEVTString();
+
+ const char *AM = getIndexedModeName(LD->getAddressingMode());
+ if (*AM)
+ OS << ", " << AM;
+
+ OS << ">";
+ } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) {
+ OS << "<" << *ST->getMemOperand();
+
+ if (ST->isTruncatingStore())
+ OS << ", trunc to " << ST->getMemoryVT().getEVTString();
+
+ const char *AM = getIndexedModeName(ST->getAddressingMode());
+ if (*AM)
+ OS << ", " << AM;
+
+ OS << ">";
+ } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
+ OS << "<" << *M->getMemOperand() << ">";
+ } else if (const BlockAddressSDNode *BA =
+ dyn_cast<BlockAddressSDNode>(this)) {
+ OS << "<";
+ WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false);
+ OS << ", ";
+ WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false);
+ OS << ">";
+ if (unsigned int TF = BA->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ }
+
+ if (G)
+ if (unsigned Order = G->GetOrdering(this))
+ OS << " [ORD=" << Order << ']';
+
+ if (getNodeId() != -1)
+ OS << " [ID=" << getNodeId() << ']';
+
+ DebugLoc dl = getDebugLoc();
+ if (G && !dl.isUnknown()) {
+ DIScope
+ Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext()));
+ OS << " dbg:";
+ // Omit the directory, since it's usually long and uninteresting.
+ if (Scope.Verify())
+ OS << Scope.getFilename();
+ else
+ OS << "<unknown>";
+ OS << ':' << dl.getLine();
+ if (dl.getCol() != 0)
+ OS << ':' << dl.getCol();
+ }
+}
+
+static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (N->getOperand(i).getNode()->hasOneUse())
+ DumpNodes(N->getOperand(i).getNode(), indent+2, G);
+ else
+ dbgs() << "\n" << std::string(indent+2, ' ')
+ << (void*)N->getOperand(i).getNode() << ": <multiple use>";
+
+ dbgs() << '\n';
+ dbgs().indent(indent);
+ N->dump(G);
+}
+
+void SelectionDAG::dump() const {
+ dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:";
+
+ for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
+ I != E; ++I) {
+ const SDNode *N = I;
+ if (!N->hasOneUse() && N != getRoot().getNode())
+ DumpNodes(N, 2, this);
+ }
+
+ if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
+ dbgs() << "\n\n";
+}
+
+void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
+ print_types(OS, G);
+ print_details(OS, G);
+}
+
+typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet;
+static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
+ const SelectionDAG *G, VisitedSDNodeSet &once) {
+ if (!once.insert(N)) // If we've been here before, return now.
+ return;
+
+ // Dump the current SDNode, but don't end the line yet.
+ OS.indent(indent);
+ N->printr(OS, G);
+
+ // Having printed this SDNode, walk the children:
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ const SDNode *child = N->getOperand(i).getNode();
+
+ if (i) OS << ",";
+ OS << " ";
+
+ if (child->getNumOperands() == 0) {
+ // This child has no grandchildren; print it inline right here.
+ child->printr(OS, G);
+ once.insert(child);
+ } else { // Just the address. FIXME: also print the child's opcode.
+ OS << (void*)child;
+ if (unsigned RN = N->getOperand(i).getResNo())
+ OS << ":" << RN;
+ }
+ }
+
+ OS << "\n";
+
+ // Dump children that have grandchildren on their own line(s).
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ const SDNode *child = N->getOperand(i).getNode();
+ DumpNodesr(OS, child, indent+2, G, once);
+ }
+}
+
+void SDNode::dumpr() const {
+ VisitedSDNodeSet once;
+ DumpNodesr(dbgs(), this, 0, 0, once);
+}
+
+void SDNode::dumpr(const SelectionDAG *G) const {
+ VisitedSDNodeSet once;
+ DumpNodesr(dbgs(), this, 0, G, once);
+}
+
+static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
+ const SelectionDAG *G, unsigned depth,
+ unsigned indent) {
+ if (depth == 0)
+ return;
+
+ OS.indent(indent);
+
+ N->print(OS, G);
+
+ if (depth < 1)
+ return;
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ // Don't follow chain operands.
+ if (N->getOperand(i).getValueType() == MVT::Other)
+ continue;
+ OS << '\n';
+ printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2);
+ }
+}
+
+void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G,
+ unsigned depth) const {
+ printrWithDepthHelper(OS, this, G, depth, 0);
+}
+
+void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {
+ // Don't print impossibly deep things.
+ printrWithDepth(OS, G, 10);
+}
+
+void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
+ printrWithDepth(dbgs(), G, depth);
+}
+
+void SDNode::dumprFull(const SelectionDAG *G) const {
+ // Don't print impossibly deep things.
+ dumprWithDepth(G, 10);
+}
+
+void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
+ print_types(OS, G);
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ if (i) OS << ", "; else OS << " ";
+ OS << (void*)getOperand(i).getNode();
+ if (unsigned RN = getOperand(i).getResNo())
+ OS << ":" << RN;
+ }
+ print_details(OS, G);
+}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 68b9146adfe1..605509bd227a 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -41,6 +41,7 @@
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -61,6 +62,80 @@ STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel");
STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG");
STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path");
+#ifndef NDEBUG
+static cl::opt<bool>
+EnableFastISelVerbose2("fast-isel-verbose2", cl::Hidden,
+ cl::desc("Enable extra verbose messages in the \"fast\" "
+ "instruction selector"));
+ // Terminators
+STATISTIC(NumFastIselFailRet,"Fast isel fails on Ret");
+STATISTIC(NumFastIselFailBr,"Fast isel fails on Br");
+STATISTIC(NumFastIselFailSwitch,"Fast isel fails on Switch");
+STATISTIC(NumFastIselFailIndirectBr,"Fast isel fails on IndirectBr");
+STATISTIC(NumFastIselFailInvoke,"Fast isel fails on Invoke");
+STATISTIC(NumFastIselFailResume,"Fast isel fails on Resume");
+STATISTIC(NumFastIselFailUnreachable,"Fast isel fails on Unreachable");
+
+ // Standard binary operators...
+STATISTIC(NumFastIselFailAdd,"Fast isel fails on Add");
+STATISTIC(NumFastIselFailFAdd,"Fast isel fails on FAdd");
+STATISTIC(NumFastIselFailSub,"Fast isel fails on Sub");
+STATISTIC(NumFastIselFailFSub,"Fast isel fails on FSub");
+STATISTIC(NumFastIselFailMul,"Fast isel fails on Mul");
+STATISTIC(NumFastIselFailFMul,"Fast isel fails on FMul");
+STATISTIC(NumFastIselFailUDiv,"Fast isel fails on UDiv");
+STATISTIC(NumFastIselFailSDiv,"Fast isel fails on SDiv");
+STATISTIC(NumFastIselFailFDiv,"Fast isel fails on FDiv");
+STATISTIC(NumFastIselFailURem,"Fast isel fails on URem");
+STATISTIC(NumFastIselFailSRem,"Fast isel fails on SRem");
+STATISTIC(NumFastIselFailFRem,"Fast isel fails on FRem");
+
+ // Logical operators...
+STATISTIC(NumFastIselFailAnd,"Fast isel fails on And");
+STATISTIC(NumFastIselFailOr,"Fast isel fails on Or");
+STATISTIC(NumFastIselFailXor,"Fast isel fails on Xor");
+
+ // Memory instructions...
+STATISTIC(NumFastIselFailAlloca,"Fast isel fails on Alloca");
+STATISTIC(NumFastIselFailLoad,"Fast isel fails on Load");
+STATISTIC(NumFastIselFailStore,"Fast isel fails on Store");
+STATISTIC(NumFastIselFailAtomicCmpXchg,"Fast isel fails on AtomicCmpXchg");
+STATISTIC(NumFastIselFailAtomicRMW,"Fast isel fails on AtomicRWM");
+STATISTIC(NumFastIselFailFence,"Fast isel fails on Frence");
+STATISTIC(NumFastIselFailGetElementPtr,"Fast isel fails on GetElementPtr");
+
+ // Convert instructions...
+STATISTIC(NumFastIselFailTrunc,"Fast isel fails on Trunc");
+STATISTIC(NumFastIselFailZExt,"Fast isel fails on ZExt");
+STATISTIC(NumFastIselFailSExt,"Fast isel fails on SExt");
+STATISTIC(NumFastIselFailFPTrunc,"Fast isel fails on FPTrunc");
+STATISTIC(NumFastIselFailFPExt,"Fast isel fails on FPExt");
+STATISTIC(NumFastIselFailFPToUI,"Fast isel fails on FPToUI");
+STATISTIC(NumFastIselFailFPToSI,"Fast isel fails on FPToSI");
+STATISTIC(NumFastIselFailUIToFP,"Fast isel fails on UIToFP");
+STATISTIC(NumFastIselFailSIToFP,"Fast isel fails on SIToFP");
+STATISTIC(NumFastIselFailIntToPtr,"Fast isel fails on IntToPtr");
+STATISTIC(NumFastIselFailPtrToInt,"Fast isel fails on PtrToInt");
+STATISTIC(NumFastIselFailBitCast,"Fast isel fails on BitCast");
+
+ // Other instructions...
+STATISTIC(NumFastIselFailICmp,"Fast isel fails on ICmp");
+STATISTIC(NumFastIselFailFCmp,"Fast isel fails on FCmp");
+STATISTIC(NumFastIselFailPHI,"Fast isel fails on PHI");
+STATISTIC(NumFastIselFailSelect,"Fast isel fails on Select");
+STATISTIC(NumFastIselFailCall,"Fast isel fails on Call");
+STATISTIC(NumFastIselFailShl,"Fast isel fails on Shl");
+STATISTIC(NumFastIselFailLShr,"Fast isel fails on LShr");
+STATISTIC(NumFastIselFailAShr,"Fast isel fails on AShr");
+STATISTIC(NumFastIselFailVAArg,"Fast isel fails on VAArg");
+STATISTIC(NumFastIselFailExtractElement,"Fast isel fails on ExtractElement");
+STATISTIC(NumFastIselFailInsertElement,"Fast isel fails on InsertElement");
+STATISTIC(NumFastIselFailShuffleVector,"Fast isel fails on ShuffleVector");
+STATISTIC(NumFastIselFailExtractValue,"Fast isel fails on ExtractValue");
+STATISTIC(NumFastIselFailInsertValue,"Fast isel fails on InsertValue");
+STATISTIC(NumFastIselFailLandingPad,"Fast isel fails on LandingPad");
+#endif
+
static cl::opt<bool>
EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
cl::desc("Enable verbose messages in the \"fast\" "
@@ -142,14 +217,15 @@ namespace llvm {
CodeGenOpt::Level OptLevel) {
const TargetLowering &TLI = IS->getTargetLowering();
- if (OptLevel == CodeGenOpt::None)
+ if (OptLevel == CodeGenOpt::None ||
+ TLI.getSchedulingPreference() == Sched::Source)
return createSourceListDAGScheduler(IS, OptLevel);
- if (TLI.getSchedulingPreference() == Sched::Latency)
- return createTDListDAGScheduler(IS, OptLevel);
if (TLI.getSchedulingPreference() == Sched::RegPressure)
return createBURRListDAGScheduler(IS, OptLevel);
if (TLI.getSchedulingPreference() == Sched::Hybrid)
return createHybridListDAGScheduler(IS, OptLevel);
+ if (TLI.getSchedulingPreference() == Sched::VLIW)
+ return createVLIWDAGScheduler(IS, OptLevel);
assert(TLI.getSchedulingPreference() == Sched::ILP &&
"Unknown sched type!");
return createILPListDAGScheduler(IS, OptLevel);
@@ -174,12 +250,11 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
"TargetLowering::EmitInstrWithCustomInserter!";
#endif
llvm_unreachable(0);
- return 0;
}
void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
SDNode *Node) const {
- assert(!MI->getDesc().hasPostISelHook() &&
+ assert(!MI->hasPostISelHook() &&
"If a target marks an instruction with 'hasPostISelHook', "
"it must implement TargetLowering::AdjustInstrPostInstrSelection!");
}
@@ -188,11 +263,13 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
// SelectionDAGISel code
//===----------------------------------------------------------------------===//
+void SelectionDAGISel::ISelUpdater::anchor() { }
+
SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm,
CodeGenOpt::Level OL) :
MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()),
FuncInfo(new FunctionLoweringInfo(TLI)),
- CurDAG(new SelectionDAG(tm)),
+ CurDAG(new SelectionDAG(tm, OL)),
SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
GFI(),
OptLevel(OL),
@@ -200,6 +277,7 @@ SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm,
initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry());
initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry());
+ initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry());
}
SelectionDAGISel::~SelectionDAGISel() {
@@ -213,6 +291,7 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<AliasAnalysis>();
AU.addRequired<GCModuleInfo>();
AU.addPreserved<GCModuleInfo>();
+ AU.addRequired<TargetLibraryInfo>();
if (UseMBPI && OptLevel != CodeGenOpt::None)
AU.addRequired<BranchProbabilityInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
@@ -258,9 +337,9 @@ static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) {
bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// Do some sanity-checking on the command-line options.
- assert((!EnableFastISelVerbose || EnableFastISel) &&
+ assert((!EnableFastISelVerbose || TM.Options.EnableFastISel) &&
"-fast-isel-verbose requires -fast-isel");
- assert((!EnableFastISelAbort || EnableFastISel) &&
+ assert((!EnableFastISelAbort || TM.Options.EnableFastISel) &&
"-fast-isel-abort requires -fast-isel");
const Function &Fn = *mf.getFunction();
@@ -270,6 +349,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
RegInfo = &MF->getRegInfo();
AA = &getAnalysis<AliasAnalysis>();
+ LibInfo = &getAnalysis<TargetLibraryInfo>();
GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : 0;
DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
@@ -284,7 +364,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
else
FuncInfo->BPI = 0;
- SDB->init(GFI, *AA);
+ SDB->init(GFI, *AA, LibInfo);
SelectAllBasicBlocks(Fn);
@@ -348,7 +428,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
TII.get(TargetOpcode::DBG_VALUE))
.addReg(CopyUseMI->getOperand(0).getReg(), RegState::Debug)
.addImm(Offset).addMetadata(Variable);
- EntryMBB->insertAfter(CopyUseMI, NewMI);
+ MachineBasicBlock::iterator Pos = CopyUseMI;
+ EntryMBB->insertAfter(Pos, NewMI);
}
}
}
@@ -374,7 +455,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
}
// Determine if there is a call to setjmp in the machine function.
- MF->setCallsSetJmp(Fn.callsFunctionThatReturnsTwice());
+ MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice());
// Replace forward-declared registers with the registers containing
// the desired value.
@@ -427,7 +508,6 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
Worklist.push_back(CurDAG->getRoot().getNode());
- APInt Mask;
APInt KnownZero;
APInt KnownOne;
@@ -458,8 +538,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
continue;
unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src);
- Mask = APInt::getAllOnesValue(SrcVT.getSizeInBits());
- CurDAG->ComputeMaskedBits(Src, Mask, KnownZero, KnownOne);
+ CurDAG->ComputeMaskedBits(Src, KnownZero, KnownOne);
FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, KnownZero, KnownOne);
} while (!Worklist.empty());
}
@@ -478,8 +557,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#endif
{
BlockNumber = FuncInfo->MBB->getNumber();
- BlockName = MF->getFunction()->getNameStr() + ":" +
- FuncInfo->MBB->getBasicBlock()->getNameStr();
+ BlockName = MF->getFunction()->getName().str() + ":" +
+ FuncInfo->MBB->getBasicBlock()->getName().str();
}
DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber
<< " '" << BlockName << "'\n"; CurDAG->dump());
@@ -489,7 +568,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
// Run the DAG combiner in pre-legalize mode.
{
NamedRegionTimer T("DAG Combining 1", GroupName, TimePassesIsEnabled);
- CurDAG->Combine(Unrestricted, *AA, OptLevel);
+ CurDAG->Combine(BeforeLegalizeTypes, *AA, OptLevel);
}
DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber
@@ -517,7 +596,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
{
NamedRegionTimer T("DAG Combining after legalize types", GroupName,
TimePassesIsEnabled);
- CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);
+ CurDAG->Combine(AfterLegalizeTypes, *AA, OptLevel);
}
DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber
@@ -542,7 +621,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
{
NamedRegionTimer T("DAG Combining after legalize vectors", GroupName,
TimePassesIsEnabled);
- CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
+ CurDAG->Combine(AfterLegalizeVectorOps, *AA, OptLevel);
}
DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#"
@@ -564,7 +643,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
// Run the DAG combiner in post-legalize mode.
{
NamedRegionTimer T("DAG Combining 2", GroupName, TimePassesIsEnabled);
- CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
+ CurDAG->Combine(AfterLegalizeDAG, *AA, OptLevel);
}
DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber
@@ -592,7 +671,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
{
NamedRegionTimer T("Instruction Scheduling", GroupName,
TimePassesIsEnabled);
- Scheduler->Run(CurDAG, FuncInfo->MBB, FuncInfo->InsertPt);
+ Scheduler->Run(CurDAG, FuncInfo->MBB);
}
if (ViewSUnitDAGs) Scheduler->viewGraph();
@@ -603,8 +682,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
{
NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled);
- LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule();
- FuncInfo->InsertPt = Scheduler->InsertPos;
+ // FuncInfo->InsertPt is passed by reference and set to the end of the
+ // scheduled instructions.
+ LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule(FuncInfo->InsertPt);
}
// If the block was split, make sure we update any references that are used to
@@ -693,43 +773,18 @@ void SelectionDAGISel::PrepareEHLandingPad() {
// Assign the call site to the landing pad's begin label.
MF->getMMI().setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]);
-
+
const MCInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL);
BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
.addSym(Label);
// Mark exception register as live in.
- unsigned Reg = TLI.getExceptionAddressRegister();
+ unsigned Reg = TLI.getExceptionPointerRegister();
if (Reg) MBB->addLiveIn(Reg);
// Mark exception selector register as live in.
Reg = TLI.getExceptionSelectorRegister();
if (Reg) MBB->addLiveIn(Reg);
-
- // FIXME: Hack around an exception handling flaw (PR1508): the personality
- // function and list of typeids logically belong to the invoke (or, if you
- // like, the basic block containing the invoke), and need to be associated
- // with it in the dwarf exception handling tables. Currently however the
- // information is provided by an intrinsic (eh.selector) that can be moved
- // to unexpected places by the optimizers: if the unwind edge is critical,
- // then breaking it can result in the intrinsics being in the successor of
- // the landing pad, not the landing pad itself. This results
- // in exceptions not being caught because no typeids are associated with
- // the invoke. This may not be the only way things can go wrong, but it
- // is the only way we try to work around for the moment.
- const BasicBlock *LLVMBB = MBB->getBasicBlock();
- const BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator());
-
- if (Br && Br->isUnconditional()) { // Critical edge?
- BasicBlock::const_iterator I, E;
- for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I)
- if (isa<EHSelectorInst>(I))
- break;
-
- if (I == E)
- // No catch info found - try to extract some from the successor.
- CopyCatchInfo(Br->getSuccessor(0), LLVMBB, &MF->getMMI(), *FuncInfo);
- }
}
/// TryToFoldFastISelLoad - We're checking to see if we can fold the specified
@@ -822,10 +877,90 @@ static bool isFoldedOrDeadInstruction(const Instruction *I,
!FuncInfo->isExportedInst(I); // Exported instrs must be computed.
}
+#ifndef NDEBUG
+// Collect per Instruction statistics for fast-isel misses. Only those
+// instructions that cause the bail are accounted for. It does not account for
+// instructions higher in the block. Thus, summing the per instructions stats
+// will not add up to what is reported by NumFastIselFailures.
+static void collectFailStats(const Instruction *I) {
+ switch (I->getOpcode()) {
+ default: assert (0 && "<Invalid operator> ");
+
+ // Terminators
+ case Instruction::Ret: NumFastIselFailRet++; return;
+ case Instruction::Br: NumFastIselFailBr++; return;
+ case Instruction::Switch: NumFastIselFailSwitch++; return;
+ case Instruction::IndirectBr: NumFastIselFailIndirectBr++; return;
+ case Instruction::Invoke: NumFastIselFailInvoke++; return;
+ case Instruction::Resume: NumFastIselFailResume++; return;
+ case Instruction::Unreachable: NumFastIselFailUnreachable++; return;
+
+ // Standard binary operators...
+ case Instruction::Add: NumFastIselFailAdd++; return;
+ case Instruction::FAdd: NumFastIselFailFAdd++; return;
+ case Instruction::Sub: NumFastIselFailSub++; return;
+ case Instruction::FSub: NumFastIselFailFSub++; return;
+ case Instruction::Mul: NumFastIselFailMul++; return;
+ case Instruction::FMul: NumFastIselFailFMul++; return;
+ case Instruction::UDiv: NumFastIselFailUDiv++; return;
+ case Instruction::SDiv: NumFastIselFailSDiv++; return;
+ case Instruction::FDiv: NumFastIselFailFDiv++; return;
+ case Instruction::URem: NumFastIselFailURem++; return;
+ case Instruction::SRem: NumFastIselFailSRem++; return;
+ case Instruction::FRem: NumFastIselFailFRem++; return;
+
+ // Logical operators...
+ case Instruction::And: NumFastIselFailAnd++; return;
+ case Instruction::Or: NumFastIselFailOr++; return;
+ case Instruction::Xor: NumFastIselFailXor++; return;
+
+ // Memory instructions...
+ case Instruction::Alloca: NumFastIselFailAlloca++; return;
+ case Instruction::Load: NumFastIselFailLoad++; return;
+ case Instruction::Store: NumFastIselFailStore++; return;
+ case Instruction::AtomicCmpXchg: NumFastIselFailAtomicCmpXchg++; return;
+ case Instruction::AtomicRMW: NumFastIselFailAtomicRMW++; return;
+ case Instruction::Fence: NumFastIselFailFence++; return;
+ case Instruction::GetElementPtr: NumFastIselFailGetElementPtr++; return;
+
+ // Convert instructions...
+ case Instruction::Trunc: NumFastIselFailTrunc++; return;
+ case Instruction::ZExt: NumFastIselFailZExt++; return;
+ case Instruction::SExt: NumFastIselFailSExt++; return;
+ case Instruction::FPTrunc: NumFastIselFailFPTrunc++; return;
+ case Instruction::FPExt: NumFastIselFailFPExt++; return;
+ case Instruction::FPToUI: NumFastIselFailFPToUI++; return;
+ case Instruction::FPToSI: NumFastIselFailFPToSI++; return;
+ case Instruction::UIToFP: NumFastIselFailUIToFP++; return;
+ case Instruction::SIToFP: NumFastIselFailSIToFP++; return;
+ case Instruction::IntToPtr: NumFastIselFailIntToPtr++; return;
+ case Instruction::PtrToInt: NumFastIselFailPtrToInt++; return;
+ case Instruction::BitCast: NumFastIselFailBitCast++; return;
+
+ // Other instructions...
+ case Instruction::ICmp: NumFastIselFailICmp++; return;
+ case Instruction::FCmp: NumFastIselFailFCmp++; return;
+ case Instruction::PHI: NumFastIselFailPHI++; return;
+ case Instruction::Select: NumFastIselFailSelect++; return;
+ case Instruction::Call: NumFastIselFailCall++; return;
+ case Instruction::Shl: NumFastIselFailShl++; return;
+ case Instruction::LShr: NumFastIselFailLShr++; return;
+ case Instruction::AShr: NumFastIselFailAShr++; return;
+ case Instruction::VAArg: NumFastIselFailVAArg++; return;
+ case Instruction::ExtractElement: NumFastIselFailExtractElement++; return;
+ case Instruction::InsertElement: NumFastIselFailInsertElement++; return;
+ case Instruction::ShuffleVector: NumFastIselFailShuffleVector++; return;
+ case Instruction::ExtractValue: NumFastIselFailExtractValue++; return;
+ case Instruction::InsertValue: NumFastIselFailInsertValue++; return;
+ case Instruction::LandingPad: NumFastIselFailLandingPad++; return;
+ }
+}
+#endif
+
void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Initialize the Fast-ISel state, if needed.
FastISel *FastIS = 0;
- if (EnableFastISel)
+ if (TM.Options.EnableFastISel)
FastIS = TLI.createFastISel(*FuncInfo);
// Iterate over all basic blocks in the function.
@@ -894,13 +1029,16 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FastIS->setLastLocalValue(0);
}
+ unsigned NumFastIselRemaining = std::distance(Begin, End);
// Do FastISel on as many instructions as possible.
for (; BI != Begin; --BI) {
const Instruction *Inst = llvm::prior(BI);
// If we no longer require this instruction, skip it.
- if (isFoldedOrDeadInstruction(Inst, FuncInfo))
+ if (isFoldedOrDeadInstruction(Inst, FuncInfo)) {
+ --NumFastIselRemaining;
continue;
+ }
// Bottom-up: reset the insert pos at the top, after any local-value
// instructions.
@@ -908,6 +1046,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Try to select the instruction with FastISel.
if (FastIS->SelectInstruction(Inst)) {
+ --NumFastIselRemaining;
++NumFastIselSuccess;
// If fast isel succeeded, skip over all the folded instructions, and
// then see if there is a load right before the selected instructions.
@@ -920,15 +1059,23 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
}
if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) &&
BeforeInst->hasOneUse() &&
- TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS))
+ TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS)) {
// If we succeeded, don't re-select the load.
BI = llvm::next(BasicBlock::const_iterator(BeforeInst));
+ --NumFastIselRemaining;
+ ++NumFastIselSuccess;
+ }
continue;
}
+#ifndef NDEBUG
+ if (EnableFastISelVerbose2)
+ collectFailStats(Inst);
+#endif
+
// Then handle certain instructions as single-LLVM-Instruction blocks.
if (isa<CallInst>(Inst)) {
- ++NumFastIselFailures;
+
if (EnableFastISelVerbose || EnableFastISelAbort) {
dbgs() << "FastISel missed call: ";
Inst->dump();
@@ -943,24 +1090,30 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
bool HadTailCall = false;
SelectBasicBlock(Inst, BI, HadTailCall);
+ // Recompute NumFastIselRemaining as Selection DAG instruction
+ // selection may have handled the call, input args, etc.
+ unsigned RemainingNow = std::distance(Begin, BI);
+ NumFastIselFailures += NumFastIselRemaining - RemainingNow;
+
// If the call was emitted as a tail call, we're done with the block.
if (HadTailCall) {
--BI;
break;
}
+ NumFastIselRemaining = RemainingNow;
continue;
}
if (isa<TerminatorInst>(Inst) && !isa<BranchInst>(Inst)) {
// Don't abort, and use a different message for terminator misses.
- ++NumFastIselFailures;
+ NumFastIselFailures += NumFastIselRemaining;
if (EnableFastISelVerbose || EnableFastISelAbort) {
dbgs() << "FastISel missed terminator: ";
Inst->dump();
}
} else {
- ++NumFastIselFailures;
+ NumFastIselFailures += NumFastIselRemaining;
if (EnableFastISelVerbose || EnableFastISelAbort) {
dbgs() << "FastISel miss: ";
Inst->dump();
@@ -1289,7 +1442,7 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS,
APInt NeededMask = DesiredMask & ~ActualMask;
APInt KnownZero, KnownOne;
- CurDAG->ComputeMaskedBits(LHS, NeededMask, KnownZero, KnownOne);
+ CurDAG->ComputeMaskedBits(LHS, KnownZero, KnownOne);
// If all the missing bits in the or are already known to be set, match!
if ((NeededMask & KnownOne) == NeededMask)
@@ -2025,6 +2178,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
case ISD::EntryToken: // These nodes remain the same.
case ISD::BasicBlock:
case ISD::Register:
+ case ISD::RegisterMask:
//case ISD::VALUETYPE:
//case ISD::CONDCODE:
case ISD::HANDLENODE:
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index cd1647b17b9b..6cde05aea82a 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -19,7 +19,6 @@
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetMachine.h"
@@ -28,7 +27,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/Config/config.h"
using namespace llvm;
namespace llvm {
@@ -148,7 +146,7 @@ std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
void SelectionDAG::viewGraph(const std::string &Title) {
// This code is only for debugging!
#ifndef NDEBUG
- ViewGraph(this, "dag." + getMachineFunction().getFunction()->getNameStr(),
+ ViewGraph(this, "dag." + getMachineFunction().getFunction()->getName(),
false, Title);
#else
errs() << "SelectionDAG::viewGraph is only available in debug builds on "
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 907d8d9da1af..09a2b1f3d7a5 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -36,31 +36,9 @@ using namespace llvm;
/// - the promotion of vector elements. This feature is disabled by default
/// and only enabled using this flag.
static cl::opt<bool>
-AllowPromoteIntElem("promote-elements", cl::Hidden,
+AllowPromoteIntElem("promote-elements", cl::Hidden, cl::init(true),
cl::desc("Allow promotion of integer vector element types"));
-namespace llvm {
-TLSModel::Model getTLSModel(const GlobalValue *GV, Reloc::Model reloc) {
- bool isLocal = GV->hasLocalLinkage();
- bool isDeclaration = GV->isDeclaration();
- // FIXME: what should we do for protected and internal visibility?
- // For variables, is internal different from hidden?
- bool isHidden = GV->hasHiddenVisibility();
-
- if (reloc == Reloc::PIC_) {
- if (isLocal || isHidden)
- return TLSModel::LocalDynamic;
- else
- return TLSModel::GeneralDynamic;
- } else {
- if (!isDeclaration || isHidden)
- return TLSModel::LocalExec;
- else
- return TLSModel::InitialExec;
- }
-}
-}
-
/// InitLibcallNames - Set default libcall names.
///
static void InitLibcallNames(const char **Names) {
@@ -572,21 +550,42 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
// ConstantFP nodes default to expand. Targets can either change this to
// Legal, in which case all fp constants are legal, or use isFPImmLegal()
// to optimize expansions for certain constants.
+ setOperationAction(ISD::ConstantFP, MVT::f16, Expand);
setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
// These library functions default to expand.
- setOperationAction(ISD::FLOG , MVT::f64, Expand);
- setOperationAction(ISD::FLOG2, MVT::f64, Expand);
- setOperationAction(ISD::FLOG10,MVT::f64, Expand);
- setOperationAction(ISD::FEXP , MVT::f64, Expand);
- setOperationAction(ISD::FEXP2, MVT::f64, Expand);
- setOperationAction(ISD::FLOG , MVT::f32, Expand);
- setOperationAction(ISD::FLOG2, MVT::f32, Expand);
- setOperationAction(ISD::FLOG10,MVT::f32, Expand);
- setOperationAction(ISD::FEXP , MVT::f32, Expand);
- setOperationAction(ISD::FEXP2, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG , MVT::f16, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f16, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f16, Expand);
+ setOperationAction(ISD::FEXP , MVT::f16, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f16, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::f16, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand);
+ setOperationAction(ISD::FCEIL, MVT::f16, Expand);
+ setOperationAction(ISD::FRINT, MVT::f16, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::f16, Expand);
+ setOperationAction(ISD::FLOG , MVT::f32, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f32, Expand);
+ setOperationAction(ISD::FEXP , MVT::f32, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f32, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::f32, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand);
+ setOperationAction(ISD::FCEIL, MVT::f32, Expand);
+ setOperationAction(ISD::FRINT, MVT::f32, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG , MVT::f64, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f64, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f64, Expand);
+ setOperationAction(ISD::FEXP , MVT::f64, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f64, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
+ setOperationAction(ISD::FCEIL, MVT::f64, Expand);
+ setOperationAction(ISD::FRINT, MVT::f64, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
// Default ISD::TRAP to expand (which turns it into abort).
setOperationAction(ISD::TRAP, MVT::Other, Expand);
@@ -610,7 +609,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
ExceptionSelectorRegister = 0;
BooleanContents = UndefinedBooleanContent;
BooleanVectorContents = UndefinedBooleanContent;
- SchedPreferenceInfo = Sched::Latency;
+ SchedPreferenceInfo = Sched::ILP;
JumpBufSize = 0;
JumpBufAlignment = 0;
MinFunctionAlignment = 0;
@@ -1080,8 +1079,12 @@ unsigned TargetLowering::getJumpTableEncoding() const {
SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
SelectionDAG &DAG) const {
// If our PIC model is GP relative, use the global offset table as the base.
- if (getJumpTableEncoding() == MachineJumpTableInfo::EK_GPRel32BlockAddress)
+ unsigned JTEncoding = getJumpTableEncoding();
+
+ if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
+ (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy());
+
return Table;
}
@@ -1223,7 +1226,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (Depth != 0) {
// If not at the root, Just compute the KnownZero/KnownOne bits to
// simplify things downstream.
- TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth);
+ TLO.DAG.ComputeMaskedBits(Op, KnownZero, KnownOne, Depth);
return false;
}
// If this is the root being simplified, allow it to have multiple uses,
@@ -1242,8 +1245,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
switch (Op.getOpcode()) {
case ISD::Constant:
// We know all of the bits for a constant!
- KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & NewMask;
- KnownZero = ~KnownOne & NewMask;
+ KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue();
+ KnownZero = ~KnownOne;
return false; // Don't fall through, will infinitely loop.
case ISD::AND:
// If the RHS is a constant, check to see if the LHS would be zero without
@@ -1253,8 +1256,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
APInt LHSZero, LHSOne;
// Do not increment Depth here; that can cause an infinite loop.
- TLO.DAG.ComputeMaskedBits(Op.getOperand(0), NewMask,
- LHSZero, LHSOne, Depth);
+ TLO.DAG.ComputeMaskedBits(Op.getOperand(0), LHSZero, LHSOne, Depth);
// If the LHS already has zeros where RHSC does, this and is dead.
if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
return TLO.CombineTo(Op, Op.getOperand(0));
@@ -1473,9 +1475,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) {
SDValue InnerOp = InOp.getNode()->getOperand(0);
EVT InnerVT = InnerOp.getValueType();
- if ((APInt::getHighBitsSet(BitWidth,
- BitWidth - InnerVT.getSizeInBits()) &
- DemandedMask) == 0 &&
+ unsigned InnerBits = InnerVT.getSizeInBits();
+ if (ShAmt < InnerBits && NewMask.lshr(InnerBits) == 0 &&
isTypeDesirableForOp(ISD::SHL, InnerVT)) {
EVT ShTy = getShiftAmountTy(InnerVT);
if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
@@ -1545,7 +1546,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// always convert this into a logical shr, even if the shift amount is
// variable. The low bit of the shift cannot be an input sign bit unless
// the shift amount is >= the size of the datatype, which is undefined.
- if (DemandedMask == 1)
+ if (NewMask == 1)
return TLO.CombineTo(Op,
TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),
Op.getOperand(0), Op.getOperand(1)));
@@ -1588,23 +1589,40 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
}
break;
case ISD::SIGN_EXTEND_INREG: {
- EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+
+ APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1);
+ // If we only care about the highest bit, don't bother shifting right.
+ if (MsbMask == DemandedMask) {
+ unsigned ShAmt = ExVT.getScalarType().getSizeInBits();
+ SDValue InOp = Op.getOperand(0);
+
+ // Compute the correct shift amount type, which must be getShiftAmountTy
+ // for scalar types after legalization.
+ EVT ShiftAmtTy = Op.getValueType();
+ if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
+ ShiftAmtTy = getShiftAmountTy(ShiftAmtTy);
+
+ SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, ShiftAmtTy);
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
+ Op.getValueType(), InOp, ShiftAmt));
+ }
// Sign extension. Compute the demanded bits in the result that are not
// present in the input.
APInt NewBits =
APInt::getHighBitsSet(BitWidth,
- BitWidth - EVT.getScalarType().getSizeInBits());
+ BitWidth - ExVT.getScalarType().getSizeInBits());
// If none of the extended bits are demanded, eliminate the sextinreg.
if ((NewBits & NewMask) == 0)
return TLO.CombineTo(Op, Op.getOperand(0));
APInt InSignBit =
- APInt::getSignBit(EVT.getScalarType().getSizeInBits()).zext(BitWidth);
+ APInt::getSignBit(ExVT.getScalarType().getSizeInBits()).zext(BitWidth);
APInt InputDemandedBits =
APInt::getLowBitsSet(BitWidth,
- EVT.getScalarType().getSizeInBits()) &
+ ExVT.getScalarType().getSizeInBits()) &
NewMask;
// Since the sign extended bits are demanded, we know that the sign
@@ -1622,7 +1640,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If the input sign bit is known zero, convert this into a zero extension.
if (KnownZero.intersects(InSignBit))
return TLO.CombineTo(Op,
- TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,EVT));
+ TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,ExVT));
if (KnownOne.intersects(InSignBit)) { // Input sign bit known set
KnownOne |= NewBits;
@@ -1688,11 +1706,11 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If the sign bit is known one, the top bits match.
if (KnownOne.intersects(InSignBit)) {
- KnownOne |= NewBits;
- KnownZero &= ~NewBits;
+ KnownOne |= NewBits;
+ assert((KnownZero & NewBits) == 0);
} else { // Otherwise, top bits aren't known.
- KnownOne &= ~NewBits;
- KnownZero &= ~NewBits;
+ assert((KnownOne & NewBits) == 0);
+ assert((KnownZero & NewBits) == 0);
}
break;
}
@@ -1783,7 +1801,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
case ISD::BITCAST:
// If this is an FP->Int bitcast and if the sign bit is the only
// thing demanded, turn this into a FGETSIGN.
- if (!Op.getOperand(0).getValueType().isVector() &&
+ if (!TLO.LegalOperations() &&
+ !Op.getValueType().isVector() &&
+ !Op.getOperand(0).getValueType().isVector() &&
NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) &&
Op.getOperand(0).getValueType().isFloatingPoint()) {
bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType());
@@ -1824,7 +1844,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// FALL THROUGH
default:
// Just use ComputeMaskedBits to compute output bits.
- TLO.DAG.ComputeMaskedBits(Op, NewMask, KnownZero, KnownOne, Depth);
+ TLO.DAG.ComputeMaskedBits(Op, KnownZero, KnownOne, Depth);
break;
}
@@ -1840,7 +1860,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
/// in Mask are known to be either zero or one and return them in the
/// KnownZero/KnownOne bitsets.
void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
- const APInt &Mask,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
@@ -1851,7 +1870,7 @@ void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
Op.getOpcode() == ISD::INTRINSIC_VOID) &&
"Should use MaskedValueIsZero if you don't know whether Op"
" is a target node!");
- KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+ KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0);
}
/// ComputeNumSignBitsForTargetNode - This method can be implemented by
@@ -1895,9 +1914,8 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {
// Fall back to ComputeMaskedBits to catch other known cases.
EVT OpVT = Val.getValueType();
unsigned BitWidth = OpVT.getScalarType().getSizeInBits();
- APInt Mask = APInt::getAllOnesValue(BitWidth);
APInt KnownZero, KnownOne;
- DAG.ComputeMaskedBits(Val, Mask, KnownZero, KnownOne);
+ DAG.ComputeMaskedBits(Val, KnownZero, KnownOne);
return (KnownZero.countPopulation() == BitWidth - 1) &&
(KnownOne.countPopulation() == 1);
}
@@ -2060,7 +2078,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
Lod->getPointerInfo().getWithOffset(bestOffset),
- false, false, NewAlign);
+ false, false, false, NewAlign);
return DAG.getSetCC(dl, VT,
DAG.getNode(ISD::AND, dl, newVT, NewLoad,
DAG.getConstant(bestMask.trunc(bestWidth),
@@ -2393,8 +2411,15 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (N0 == N1) {
// We can always fold X == X for integer setcc's.
- if (N0.getValueType().isInteger())
- return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
+ if (N0.getValueType().isInteger()) {
+ switch (getBooleanContents(N0.getValueType().isVector())) {
+ case UndefinedBooleanContent:
+ case ZeroOrOneBooleanContent:
+ return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
+ case ZeroOrNegativeOneBooleanContent:
+ return DAG.getConstant(ISD::isTrueWhenEqual(Cond) ? -1 : 0, VT);
+ }
+ }
unsigned UOF = ISD::getUnorderedFlavor(Cond);
if (UOF == 2) // FP operators that are undefined on NaNs.
return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
@@ -2428,6 +2453,10 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
+ // If RHS is a legal immediate value for a compare instruction, we need
+ // to be careful about increasing register pressure needlessly.
+ bool LegalRHSImm = false;
+
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) {
if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
// Turn (X+C1) == C2 --> X == C2-C1
@@ -2462,25 +2491,33 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
Cond);
}
}
+
+ // Could RHSC fold directly into a compare?
+ if (RHSC->getValueType(0).getSizeInBits() <= 64)
+ LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
}
// Simplify (X+Z) == X --> Z == 0
- if (N0.getOperand(0) == N1)
- return DAG.getSetCC(dl, VT, N0.getOperand(1),
- DAG.getConstant(0, N0.getValueType()), Cond);
- if (N0.getOperand(1) == N1) {
- if (DAG.isCommutativeBinOp(N0.getOpcode()))
- return DAG.getSetCC(dl, VT, N0.getOperand(0),
- DAG.getConstant(0, N0.getValueType()), Cond);
- else if (N0.getNode()->hasOneUse()) {
- assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
- // (Z-X) == X --> Z == X<<1
- SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(),
- N1,
+ // Don't do this if X is an immediate that can fold into a cmp
+ // instruction and X+Z has other uses. It could be an induction variable
+ // chain, and the transform would increase register pressure.
+ if (!LegalRHSImm || N0.getNode()->hasOneUse()) {
+ if (N0.getOperand(0) == N1)
+ return DAG.getSetCC(dl, VT, N0.getOperand(1),
+ DAG.getConstant(0, N0.getValueType()), Cond);
+ if (N0.getOperand(1) == N1) {
+ if (DAG.isCommutativeBinOp(N0.getOpcode()))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(0, N0.getValueType()), Cond);
+ else if (N0.getNode()->hasOneUse()) {
+ assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
+ // (Z-X) == X --> Z == X<<1
+ SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N1,
DAG.getConstant(1, getShiftAmountTy(N1.getValueType())));
- if (!DCI.isCalledByLegalizer())
- DCI.AddToWorklist(SH.getNode());
- return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(SH.getNode());
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
+ }
}
}
}
@@ -2984,7 +3021,6 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
/// is.
static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
switch (CT) {
- default: llvm_unreachable("Unknown constraint type!");
case TargetLowering::C_Other:
case TargetLowering::C_Unknown:
return 0;
@@ -2995,6 +3031,7 @@ static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
case TargetLowering::C_Memory:
return 3;
}
+ llvm_unreachable("Invalid constraint type");
}
/// Examine constraint type and operand type and determine a weight value.
@@ -3242,8 +3279,9 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number. See:
/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
-SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
- std::vector<SDNode*>* Created) const {
+SDValue TargetLowering::
+BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
+ std::vector<SDNode*>* Created) const {
EVT VT = N->getValueType(0);
DebugLoc dl= N->getDebugLoc();
@@ -3258,10 +3296,12 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
// Multiply the numerator (operand 0) by the magic value
// FIXME: We should support doing a MUL in a wider type
SDValue Q;
- if (isOperationLegalOrCustom(ISD::MULHS, VT))
+ if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT) :
+ isOperationLegalOrCustom(ISD::MULHS, VT))
Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0),
DAG.getConstant(magics.m, VT));
- else if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT))
+ else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT) :
+ isOperationLegalOrCustom(ISD::SMUL_LOHI, VT))
Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT),
N->getOperand(0),
DAG.getConstant(magics.m, VT)).getNode(), 1);
@@ -3299,8 +3339,9 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number. See:
/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
-SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
- std::vector<SDNode*>* Created) const {
+SDValue TargetLowering::
+BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
+ std::vector<SDNode*>* Created) const {
EVT VT = N->getValueType(0);
DebugLoc dl = N->getDebugLoc();
@@ -3332,9 +3373,11 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
// Multiply the numerator (operand 0) by the magic value
// FIXME: We should support doing a MUL in a wider type
- if (isOperationLegalOrCustom(ISD::MULHU, VT))
+ if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT) :
+ isOperationLegalOrCustom(ISD::MULHU, VT))
Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, VT));
- else if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT))
+ else if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT) :
+ isOperationLegalOrCustom(ISD::UMUL_LOHI, VT))
Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q,
DAG.getConstant(magics.m, VT)).getNode(), 1);
else
diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp
index 2609256c8ffa..0016047a134e 100644
--- a/lib/CodeGen/ShadowStackGC.cpp
+++ b/lib/CodeGen/ShadowStackGC.cpp
@@ -116,8 +116,7 @@ namespace {
// Branches and invokes do not escape, only unwind, resume, and return
// do.
TerminatorInst *TI = CurBB->getTerminator();
- if (!isa<UnwindInst>(TI) && !isa<ReturnInst>(TI) &&
- !isa<ResumeInst>(TI))
+ if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI))
continue;
Builder.SetInsertPoint(TI->getParent(), TI);
diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp
index 160f38f69236..21ae2f5e56eb 100644
--- a/lib/CodeGen/ShrinkWrapping.cpp
+++ b/lib/CodeGen/ShrinkWrapping.cpp
@@ -93,6 +93,7 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
}
AU.addPreserved<MachineLoopInfo>();
AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<TargetPassConfig>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -124,7 +125,7 @@ MachineLoop* PEI::getTopLevelLoopParent(MachineLoop *LP) {
}
bool PEI::isReturnBlock(MachineBasicBlock* MBB) {
- return (MBB && !MBB->empty() && MBB->back().getDesc().isReturn());
+ return (MBB && !MBB->empty() && MBB->back().isReturn());
}
// Initialize shrink wrapping DFA sets, called before iterations.
@@ -158,7 +159,7 @@ void PEI::initShrinkWrappingInfo() {
// via --shrink-wrap-func=<funcname>.
#ifndef NDEBUG
if (ShrinkWrapFunc != "") {
- std::string MFName = MF->getFunction()->getNameStr();
+ std::string MFName = MF->getFunction()->getName().str();
ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc);
}
#endif
@@ -1045,7 +1046,7 @@ std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) {
return "";
if (MBB->getBasicBlock())
- return MBB->getBasicBlock()->getNameStr();
+ return MBB->getBasicBlock()->getName().str();
std::ostringstream name;
name << "_MBB_" << MBB->getNumber();
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index ded2459d4278..9a86f32d8f96 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -1,4 +1,4 @@
-//===- SjLjEHPass.cpp - Eliminate Invoke & Unwind instructions -----------===//
+//===- SjLjEHPrepare.cpp - Eliminate Invoke & Unwind instructions ---------===//
//
// The LLVM Compiler Infrastructure
//
@@ -29,21 +29,20 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include <set>
using namespace llvm;
-static cl::opt<bool> DisableOldSjLjEH("disable-old-sjlj-eh", cl::Hidden,
- cl::desc("Disable the old SjLj EH preparation pass"));
-
STATISTIC(NumInvokes, "Number of invokes replaced");
-STATISTIC(NumUnwinds, "Number of unwinds replaced");
STATISTIC(NumSpilled, "Number of registers live across unwind edges");
namespace {
- class SjLjEHPass : public FunctionPass {
+ class SjLjEHPrepare : public FunctionPass {
const TargetLowering *TLI;
Type *FunctionContextTy;
Constant *RegisterFn;
@@ -54,16 +53,12 @@ namespace {
Constant *StackRestoreFn;
Constant *LSDAAddrFn;
Value *PersonalityFn;
- Constant *SelectorFn;
- Constant *ExceptionFn;
Constant *CallSiteFn;
- Constant *DispatchSetupFn;
Constant *FuncCtxFn;
- Value *CallSite;
- DenseMap<InvokeInst*, BasicBlock*> LPadSuccMap;
+ AllocaInst *FuncCtx;
public:
static char ID; // Pass identification, replacement for typeid
- explicit SjLjEHPass(const TargetLowering *tli = NULL)
+ explicit SjLjEHPrepare(const TargetLowering *tli = NULL)
: FunctionPass(ID), TLI(tli) { }
bool doInitialization(Module &M);
bool runOnFunction(Function &F);
@@ -75,28 +70,24 @@ namespace {
private:
bool setupEntryBlockAndCallSites(Function &F);
+ void substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
+ Value *SelVal);
Value *setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads);
void lowerIncomingArguments(Function &F);
void lowerAcrossUnwindEdges(Function &F, ArrayRef<InvokeInst*> Invokes);
-
- void insertCallSiteStore(Instruction *I, int Number, Value *CallSite);
- void markInvokeCallSite(InvokeInst *II, int InvokeNo, Value *CallSite,
- SwitchInst *CatchSwitch);
- void splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes);
- void splitLandingPad(InvokeInst *II);
- bool insertSjLjEHSupport(Function &F);
+ void insertCallSiteStore(Instruction *I, int Number);
};
} // end anonymous namespace
-char SjLjEHPass::ID = 0;
+char SjLjEHPrepare::ID = 0;
-// Public Interface To the SjLjEHPass pass.
-FunctionPass *llvm::createSjLjEHPass(const TargetLowering *TLI) {
- return new SjLjEHPass(TLI);
+// Public Interface To the SjLjEHPrepare pass.
+FunctionPass *llvm::createSjLjEHPreparePass(const TargetLowering *TLI) {
+ return new SjLjEHPrepare(TLI);
}
// doInitialization - Set up decalarations and types needed to process
// exceptions.
-bool SjLjEHPass::doInitialization(Module &M) {
+bool SjLjEHPrepare::doInitialization(Module &M) {
// Build the function context structure.
// builtin_setjmp uses a five word jbuf
Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
@@ -123,11 +114,7 @@ bool SjLjEHPass::doInitialization(Module &M) {
StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
BuiltinSetjmpFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp);
LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda);
- SelectorFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_selector);
- ExceptionFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_exception);
CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite);
- DispatchSetupFn
- = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_dispatch_setup);
FuncCtxFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_functioncontext);
PersonalityFn = 0;
@@ -136,583 +123,67 @@ bool SjLjEHPass::doInitialization(Module &M) {
/// insertCallSiteStore - Insert a store of the call-site value to the
/// function context
-void SjLjEHPass::insertCallSiteStore(Instruction *I, int Number,
- Value *CallSite) {
- ConstantInt *CallSiteNoC = ConstantInt::get(Type::getInt32Ty(I->getContext()),
- Number);
- // Insert a store of the call-site number
- new StoreInst(CallSiteNoC, CallSite, true, I); // volatile
-}
-
-/// splitLandingPad - Split a landing pad. This takes considerable care because
-/// of PHIs and other nasties. The problem is that the jump table needs to jump
-/// to the landing pad block. However, the landing pad block can be jumped to
-/// only by an invoke instruction. So we clone the landingpad instruction into
-/// its own basic block, have the invoke jump to there. The landingpad
-/// instruction's basic block's successor is now the target for the jump table.
-///
-/// But because of PHI nodes, we need to create another basic block for the jump
-/// table to jump to. This is definitely a hack, because the values for the PHI
-/// nodes may not be defined on the edge from the jump table. But that's okay,
-/// because the jump table is simply a construct to mimic what is happening in
-/// the CFG. So the values are mysteriously there, even though there is no value
-/// for the PHI from the jump table's edge (hence calling this a hack).
-void SjLjEHPass::splitLandingPad(InvokeInst *II) {
- SmallVector<BasicBlock*, 2> NewBBs;
- SplitLandingPadPredecessors(II->getUnwindDest(), II->getParent(),
- ".1", ".2", this, NewBBs);
-
- // Create an empty block so that the jump table has something to jump to
- // which doesn't have any PHI nodes.
- BasicBlock *LPad = NewBBs[0];
- BasicBlock *Succ = *succ_begin(LPad);
- BasicBlock *JumpTo = BasicBlock::Create(II->getContext(), "jt.land",
- LPad->getParent(), Succ);
- LPad->getTerminator()->eraseFromParent();
- BranchInst::Create(JumpTo, LPad);
- BranchInst::Create(Succ, JumpTo);
- LPadSuccMap[II] = JumpTo;
-
- for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
- PHINode *PN = cast<PHINode>(I);
- Value *Val = PN->removeIncomingValue(LPad, false);
- PN->addIncoming(Val, JumpTo);
- }
-}
-
-/// markInvokeCallSite - Insert code to mark the call_site for this invoke
-void SjLjEHPass::markInvokeCallSite(InvokeInst *II, int InvokeNo,
- Value *CallSite,
- SwitchInst *CatchSwitch) {
- ConstantInt *CallSiteNoC= ConstantInt::get(Type::getInt32Ty(II->getContext()),
- InvokeNo);
- // The runtime comes back to the dispatcher with the call_site - 1 in
- // the context. Odd, but there it is.
- ConstantInt *SwitchValC = ConstantInt::get(Type::getInt32Ty(II->getContext()),
- InvokeNo - 1);
-
- // If the unwind edge has phi nodes, split the edge.
- if (isa<PHINode>(II->getUnwindDest()->begin())) {
- // FIXME: New EH - This if-condition will be always true in the new scheme.
- if (II->getUnwindDest()->isLandingPad())
- splitLandingPad(II);
- else
- SplitCriticalEdge(II, 1, this);
-
- // If there are any phi nodes left, they must have a single predecessor.
- while (PHINode *PN = dyn_cast<PHINode>(II->getUnwindDest()->begin())) {
- PN->replaceAllUsesWith(PN->getIncomingValue(0));
- PN->eraseFromParent();
- }
- }
+void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) {
+ IRBuilder<> Builder(I);
- // Insert the store of the call site value
- insertCallSiteStore(II, InvokeNo, CallSite);
-
- // Record the call site value for the back end so it stays associated with
- // the invoke.
- CallInst::Create(CallSiteFn, CallSiteNoC, "", II);
-
- // Add a switch case to our unwind block.
- if (BasicBlock *SuccBB = LPadSuccMap[II]) {
- CatchSwitch->addCase(SwitchValC, SuccBB);
- } else {
- CatchSwitch->addCase(SwitchValC, II->getUnwindDest());
- }
+ // Get a reference to the call_site field.
+ Type *Int32Ty = Type::getInt32Ty(I->getContext());
+ Value *Zero = ConstantInt::get(Int32Ty, 0);
+ Value *One = ConstantInt::get(Int32Ty, 1);
+ Value *Idxs[2] = { Zero, One };
+ Value *CallSite = Builder.CreateGEP(FuncCtx, Idxs, "call_site");
- // We still want this to look like an invoke so we emit the LSDA properly,
- // so we don't transform the invoke into a call here.
+ // Insert a store of the call-site number
+ ConstantInt *CallSiteNoC = ConstantInt::get(Type::getInt32Ty(I->getContext()),
+ Number);
+ Builder.CreateStore(CallSiteNoC, CallSite, true/*volatile*/);
}
/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until
/// we reach blocks we've already seen.
-static void MarkBlocksLiveIn(BasicBlock *BB, std::set<BasicBlock*> &LiveBBs) {
- if (!LiveBBs.insert(BB).second) return; // already been here.
+static void MarkBlocksLiveIn(BasicBlock *BB,
+ SmallPtrSet<BasicBlock*, 64> &LiveBBs) {
+ if (!LiveBBs.insert(BB)) return; // already been here.
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
MarkBlocksLiveIn(*PI, LiveBBs);
}
-/// splitLiveRangesAcrossInvokes - Each value that is live across an unwind edge
-/// we spill into a stack location, guaranteeing that there is nothing live
-/// across the unwind edge. This process also splits all critical edges
-/// coming out of invoke's.
-/// FIXME: Move this function to a common utility file (Local.cpp?) so
-/// both SjLj and LowerInvoke can use it.
-void SjLjEHPass::
-splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) {
- // First step, split all critical edges from invoke instructions.
- for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
- InvokeInst *II = Invokes[i];
- SplitCriticalEdge(II, 0, this);
-
- // FIXME: New EH - This if-condition will be always true in the new scheme.
- if (II->getUnwindDest()->isLandingPad())
- splitLandingPad(II);
- else
- SplitCriticalEdge(II, 1, this);
-
- assert(!isa<PHINode>(II->getNormalDest()) &&
- !isa<PHINode>(II->getUnwindDest()) &&
- "Critical edge splitting left single entry phi nodes?");
- }
-
- Function *F = Invokes.back()->getParent()->getParent();
-
- // To avoid having to handle incoming arguments specially, we lower each arg
- // to a copy instruction in the entry block. This ensures that the argument
- // value itself cannot be live across the entry block.
- BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin();
- while (isa<AllocaInst>(AfterAllocaInsertPt) &&
- isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsertPt)->getArraySize()))
- ++AfterAllocaInsertPt;
- for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
- AI != E; ++AI) {
- Type *Ty = AI->getType();
- // Aggregate types can't be cast, but are legal argument types, so we have
- // to handle them differently. We use an extract/insert pair as a
- // lightweight method to achieve the same goal.
- if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
- Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt);
- Instruction *NI = InsertValueInst::Create(AI, EI, 0);
- NI->insertAfter(EI);
- AI->replaceAllUsesWith(NI);
- // Set the operand of the instructions back to the AllocaInst.
- EI->setOperand(0, AI);
- NI->setOperand(0, AI);
- } else {
- // This is always a no-op cast because we're casting AI to AI->getType()
- // so src and destination types are identical. BitCast is the only
- // possibility.
- CastInst *NC = new BitCastInst(
- AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt);
- AI->replaceAllUsesWith(NC);
- // Set the operand of the cast instruction back to the AllocaInst.
- // Normally it's forbidden to replace a CastInst's operand because it
- // could cause the opcode to reflect an illegal conversion. However,
- // we're replacing it here with the same value it was constructed with.
- // We do this because the above replaceAllUsesWith() clobbered the
- // operand, but we want this one to remain.
- NC->setOperand(0, AI);
- }
- }
-
- // Finally, scan the code looking for instructions with bad live ranges.
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
- for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
- // Ignore obvious cases we don't have to handle. In particular, most
- // instructions either have no uses or only have a single use inside the
- // current block. Ignore them quickly.
- Instruction *Inst = II;
- if (Inst->use_empty()) continue;
- if (Inst->hasOneUse() &&
- cast<Instruction>(Inst->use_back())->getParent() == BB &&
- !isa<PHINode>(Inst->use_back())) continue;
-
- // If this is an alloca in the entry block, it's not a real register
- // value.
- if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst))
- if (isa<ConstantInt>(AI->getArraySize()) && BB == F->begin())
- continue;
-
- // Avoid iterator invalidation by copying users to a temporary vector.
- SmallVector<Instruction*,16> Users;
- for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
- UI != E; ++UI) {
- Instruction *User = cast<Instruction>(*UI);
- if (User->getParent() != BB || isa<PHINode>(User))
- Users.push_back(User);
- }
-
- // Find all of the blocks that this value is live in.
- std::set<BasicBlock*> LiveBBs;
- LiveBBs.insert(Inst->getParent());
- while (!Users.empty()) {
- Instruction *U = Users.back();
- Users.pop_back();
-
- if (!isa<PHINode>(U)) {
- MarkBlocksLiveIn(U->getParent(), LiveBBs);
- } else {
- // Uses for a PHI node occur in their predecessor block.
- PHINode *PN = cast<PHINode>(U);
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (PN->getIncomingValue(i) == Inst)
- MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs);
- }
- }
-
- // Now that we know all of the blocks that this thing is live in, see if
- // it includes any of the unwind locations.
- bool NeedsSpill = false;
- for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
- BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
- if (UnwindBlock != BB && LiveBBs.count(UnwindBlock))
- NeedsSpill = true;
- }
-
- // If we decided we need a spill, do it.
- // FIXME: Spilling this way is overkill, as it forces all uses of
- // the value to be reloaded from the stack slot, even those that aren't
- // in the unwind blocks. We should be more selective.
- if (NeedsSpill) {
- ++NumSpilled;
- DemoteRegToStack(*Inst, true);
- }
- }
-}
-
-/// CreateLandingPadLoad - Load the exception handling values and insert them
-/// into a structure.
-static Instruction *CreateLandingPadLoad(Function &F, Value *ExnAddr,
- Value *SelAddr,
- BasicBlock::iterator InsertPt) {
- Value *Exn = new LoadInst(ExnAddr, "exn", false,
- InsertPt);
- Type *Ty = Type::getInt8PtrTy(F.getContext());
- Exn = CastInst::Create(Instruction::IntToPtr, Exn, Ty, "", InsertPt);
- Value *Sel = new LoadInst(SelAddr, "sel", false, InsertPt);
-
- Ty = StructType::get(Exn->getType(), Sel->getType(), NULL);
- InsertValueInst *LPadVal = InsertValueInst::Create(llvm::UndefValue::get(Ty),
- Exn, 0,
- "lpad.val", InsertPt);
- return InsertValueInst::Create(LPadVal, Sel, 1, "lpad.val", InsertPt);
-}
-
-/// ReplaceLandingPadVal - Replace the landingpad instruction's value with a
-/// load from the stored values (via CreateLandingPadLoad). This looks through
-/// PHI nodes, and removes them if they are dead.
-static void ReplaceLandingPadVal(Function &F, Instruction *Inst, Value *ExnAddr,
- Value *SelAddr) {
- if (Inst->use_empty()) return;
-
- while (!Inst->use_empty()) {
- Instruction *I = cast<Instruction>(Inst->use_back());
-
- if (PHINode *PN = dyn_cast<PHINode>(I)) {
- ReplaceLandingPadVal(F, PN, ExnAddr, SelAddr);
- if (PN->use_empty()) PN->eraseFromParent();
- continue;
- }
-
- I->replaceUsesOfWith(Inst, CreateLandingPadLoad(F, ExnAddr, SelAddr, I));
- }
-}
-
-bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
- SmallVector<ReturnInst*,16> Returns;
- SmallVector<UnwindInst*,16> Unwinds;
- SmallVector<InvokeInst*,16> Invokes;
-
- // Look through the terminators of the basic blocks to find invokes, returns
- // and unwinds.
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
- // Remember all return instructions in case we insert an invoke into this
- // function.
- Returns.push_back(RI);
- } else if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
- Invokes.push_back(II);
- } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
- Unwinds.push_back(UI);
- }
- }
-
- NumInvokes += Invokes.size();
- NumUnwinds += Unwinds.size();
-
- // If we don't have any invokes, there's nothing to do.
- if (Invokes.empty()) return false;
-
- // Find the eh.selector.*, eh.exception and alloca calls.
- //
- // Remember any allocas() that aren't in the entry block, as the
- // jmpbuf saved SP will need to be updated for them.
- //
- // We'll use the first eh.selector to determine the right personality
- // function to use. For SJLJ, we always use the same personality for the
- // whole function, not on a per-selector basis.
- // FIXME: That's a bit ugly. Better way?
- SmallVector<CallInst*,16> EH_Selectors;
- SmallVector<CallInst*,16> EH_Exceptions;
- SmallVector<Instruction*,16> JmpbufUpdatePoints;
-
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- // Note: Skip the entry block since there's nothing there that interests
- // us. eh.selector and eh.exception shouldn't ever be there, and we
- // want to disregard any allocas that are there.
- //
- // FIXME: This is awkward. The new EH scheme won't need to skip the entry
- // block.
- if (BB == F.begin()) {
- if (InvokeInst *II = dyn_cast<InvokeInst>(F.begin()->getTerminator())) {
- // FIXME: This will be always non-NULL in the new EH.
- if (LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst())
- if (!PersonalityFn) PersonalityFn = LPI->getPersonalityFn();
- }
-
- continue;
- }
-
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
- if (CallInst *CI = dyn_cast<CallInst>(I)) {
- if (CI->getCalledFunction() == SelectorFn) {
- if (!PersonalityFn) PersonalityFn = CI->getArgOperand(1);
- EH_Selectors.push_back(CI);
- } else if (CI->getCalledFunction() == ExceptionFn) {
- EH_Exceptions.push_back(CI);
- } else if (CI->getCalledFunction() == StackRestoreFn) {
- JmpbufUpdatePoints.push_back(CI);
- }
- } else if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
- JmpbufUpdatePoints.push_back(AI);
- } else if (InvokeInst *II = dyn_cast<InvokeInst>(I)) {
- // FIXME: This will be always non-NULL in the new EH.
- if (LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst())
- if (!PersonalityFn) PersonalityFn = LPI->getPersonalityFn();
- }
- }
- }
-
- // If we don't have any eh.selector calls, we can't determine the personality
- // function. Without a personality function, we can't process exceptions.
- if (!PersonalityFn) return false;
-
- // We have invokes, so we need to add register/unregister calls to get this
- // function onto the global unwind stack.
- //
- // First thing we need to do is scan the whole function for values that are
- // live across unwind edges. Each value that is live across an unwind edge we
- // spill into a stack location, guaranteeing that there is nothing live across
- // the unwind edge. This process also splits all critical edges coming out of
- // invoke's.
- splitLiveRangesAcrossInvokes(Invokes);
-
-
- SmallVector<LandingPadInst*, 16> LandingPads;
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
- // FIXME: This will be always non-NULL in the new EH.
- if (LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst())
- LandingPads.push_back(LPI);
+/// substituteLPadValues - Substitute the values returned by the landingpad
+/// instruction with those returned by the personality function.
+void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
+ Value *SelVal) {
+ SmallVector<Value*, 8> UseWorkList(LPI->use_begin(), LPI->use_end());
+ while (!UseWorkList.empty()) {
+ Value *Val = UseWorkList.pop_back_val();
+ ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Val);
+ if (!EVI) continue;
+ if (EVI->getNumIndices() != 1) continue;
+ if (*EVI->idx_begin() == 0)
+ EVI->replaceAllUsesWith(ExnVal);
+ else if (*EVI->idx_begin() == 1)
+ EVI->replaceAllUsesWith(SelVal);
+ if (EVI->getNumUses() == 0)
+ EVI->eraseFromParent();
}
+ if (LPI->getNumUses() == 0) return;
- BasicBlock *EntryBB = F.begin();
- // Create an alloca for the incoming jump buffer ptr and the new jump buffer
- // that needs to be restored on all exits from the function. This is an
- // alloca because the value needs to be added to the global context list.
- unsigned Align = 4; // FIXME: Should be a TLI check?
- AllocaInst *FunctionContext =
- new AllocaInst(FunctionContextTy, 0, Align,
- "fcn_context", F.begin()->begin());
-
- Value *Idxs[2];
- Type *Int32Ty = Type::getInt32Ty(F.getContext());
- Value *Zero = ConstantInt::get(Int32Ty, 0);
- // We need to also keep around a reference to the call_site field
- Idxs[0] = Zero;
- Idxs[1] = ConstantInt::get(Int32Ty, 1);
- CallSite = GetElementPtrInst::Create(FunctionContext, Idxs, "call_site",
- EntryBB->getTerminator());
-
- // The exception selector comes back in context->data[1]
- Idxs[1] = ConstantInt::get(Int32Ty, 2);
- Value *FCData = GetElementPtrInst::Create(FunctionContext, Idxs, "fc_data",
- EntryBB->getTerminator());
- Idxs[1] = ConstantInt::get(Int32Ty, 1);
- Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs,
- "exc_selector_gep",
- EntryBB->getTerminator());
- // The exception value comes back in context->data[0]
- Idxs[1] = Zero;
- Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs,
- "exception_gep",
- EntryBB->getTerminator());
-
- // The result of the eh.selector call will be replaced with a a reference to
- // the selector value returned in the function context. We leave the selector
- // itself so the EH analysis later can use it.
- for (int i = 0, e = EH_Selectors.size(); i < e; ++i) {
- CallInst *I = EH_Selectors[i];
- Value *SelectorVal = new LoadInst(SelectorAddr, "select_val", true, I);
- I->replaceAllUsesWith(SelectorVal);
- }
-
- // eh.exception calls are replaced with references to the proper location in
- // the context. Unlike eh.selector, the eh.exception calls are removed
- // entirely.
- for (int i = 0, e = EH_Exceptions.size(); i < e; ++i) {
- CallInst *I = EH_Exceptions[i];
- // Possible for there to be duplicates, so check to make sure the
- // instruction hasn't already been removed.
- if (!I->getParent()) continue;
- Value *Val = new LoadInst(ExceptionAddr, "exception", true, I);
- Type *Ty = Type::getInt8PtrTy(F.getContext());
- Val = CastInst::Create(Instruction::IntToPtr, Val, Ty, "", I);
-
- I->replaceAllUsesWith(Val);
- I->eraseFromParent();
- }
-
- for (unsigned i = 0, e = LandingPads.size(); i != e; ++i)
- ReplaceLandingPadVal(F, LandingPads[i], ExceptionAddr, SelectorAddr);
-
- // The entry block changes to have the eh.sjlj.setjmp, with a conditional
- // branch to a dispatch block for non-zero returns. If we return normally,
- // we're not handling an exception and just register the function context and
- // continue.
-
- // Create the dispatch block. The dispatch block is basically a big switch
- // statement that goes to all of the invoke landing pads.
- BasicBlock *DispatchBlock =
- BasicBlock::Create(F.getContext(), "eh.sjlj.setjmp.catch", &F);
-
- // Insert a load of the callsite in the dispatch block, and a switch on its
- // value. By default, we issue a trap statement.
- BasicBlock *TrapBlock =
- BasicBlock::Create(F.getContext(), "trapbb", &F);
- CallInst::Create(Intrinsic::getDeclaration(F.getParent(), Intrinsic::trap),
- "", TrapBlock);
- new UnreachableInst(F.getContext(), TrapBlock);
-
- Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true,
- DispatchBlock);
- SwitchInst *DispatchSwitch =
- SwitchInst::Create(DispatchLoad, TrapBlock, Invokes.size(),
- DispatchBlock);
- // Split the entry block to insert the conditional branch for the setjmp.
- BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
- "eh.sjlj.setjmp.cont");
-
- // Populate the Function Context
- // 1. LSDA address
- // 2. Personality function address
- // 3. jmpbuf (save SP, FP and call eh.sjlj.setjmp)
-
- // LSDA address
- Idxs[0] = Zero;
- Idxs[1] = ConstantInt::get(Int32Ty, 4);
- Value *LSDAFieldPtr =
- GetElementPtrInst::Create(FunctionContext, Idxs, "lsda_gep",
- EntryBB->getTerminator());
- Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr",
- EntryBB->getTerminator());
- new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator());
-
- Idxs[1] = ConstantInt::get(Int32Ty, 3);
- Value *PersonalityFieldPtr =
- GetElementPtrInst::Create(FunctionContext, Idxs, "lsda_gep",
- EntryBB->getTerminator());
- new StoreInst(PersonalityFn, PersonalityFieldPtr, true,
- EntryBB->getTerminator());
-
- // Save the frame pointer.
- Idxs[1] = ConstantInt::get(Int32Ty, 5);
- Value *JBufPtr
- = GetElementPtrInst::Create(FunctionContext, Idxs, "jbuf_gep",
- EntryBB->getTerminator());
- Idxs[1] = ConstantInt::get(Int32Ty, 0);
- Value *FramePtr =
- GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_fp_gep",
- EntryBB->getTerminator());
-
- Value *Val = CallInst::Create(FrameAddrFn,
- ConstantInt::get(Int32Ty, 0),
- "fp",
- EntryBB->getTerminator());
- new StoreInst(Val, FramePtr, true, EntryBB->getTerminator());
-
- // Save the stack pointer.
- Idxs[1] = ConstantInt::get(Int32Ty, 2);
- Value *StackPtr =
- GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_sp_gep",
- EntryBB->getTerminator());
-
- Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator());
- new StoreInst(Val, StackPtr, true, EntryBB->getTerminator());
-
- // Call the setjmp instrinsic. It fills in the rest of the jmpbuf.
- Value *SetjmpArg =
- CastInst::Create(Instruction::BitCast, JBufPtr,
- Type::getInt8PtrTy(F.getContext()), "",
- EntryBB->getTerminator());
- Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg,
- "",
- EntryBB->getTerminator());
-
- // Add a call to dispatch_setup after the setjmp call. This is expanded to any
- // target-specific setup that needs to be done.
- CallInst::Create(DispatchSetupFn, DispatchVal, "", EntryBB->getTerminator());
+ // There are still some uses of LPI. Construct an aggregate with the exception
+ // values and replace the LPI with that aggregate.
+ Type *LPadType = LPI->getType();
+ Value *LPadVal = UndefValue::get(LPadType);
+ IRBuilder<>
+ Builder(llvm::next(BasicBlock::iterator(cast<Instruction>(SelVal))));
+ LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val");
+ LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val");
- // check the return value of the setjmp. non-zero goes to dispatcher.
- Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
- ICmpInst::ICMP_EQ, DispatchVal, Zero,
- "notunwind");
- // Nuke the uncond branch.
- EntryBB->getTerminator()->eraseFromParent();
-
- // Put in a new condbranch in its place.
- BranchInst::Create(ContBlock, DispatchBlock, IsNormal, EntryBB);
-
- // Register the function context and make sure it's known to not throw
- CallInst *Register =
- CallInst::Create(RegisterFn, FunctionContext, "",
- ContBlock->getTerminator());
- Register->setDoesNotThrow();
-
- // At this point, we are all set up, update the invoke instructions to mark
- // their call_site values, and fill in the dispatch switch accordingly.
- for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
- markInvokeCallSite(Invokes[i], i+1, CallSite, DispatchSwitch);
-
- // Mark call instructions that aren't nounwind as no-action (call_site ==
- // -1). Skip the entry block, as prior to then, no function context has been
- // created for this function and any unexpected exceptions thrown will go
- // directly to the caller's context, which is what we want anyway, so no need
- // to do anything here.
- for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) {
- for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I)
- if (CallInst *CI = dyn_cast<CallInst>(I)) {
- // Ignore calls to the EH builtins (eh.selector, eh.exception)
- Constant *Callee = CI->getCalledFunction();
- if (Callee != SelectorFn && Callee != ExceptionFn
- && !CI->doesNotThrow())
- insertCallSiteStore(CI, -1, CallSite);
- } else if (ResumeInst *RI = dyn_cast<ResumeInst>(I)) {
- insertCallSiteStore(RI, -1, CallSite);
- }
- }
-
- // Replace all unwinds with a branch to the unwind handler.
- // ??? Should this ever happen with sjlj exceptions?
- for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) {
- BranchInst::Create(TrapBlock, Unwinds[i]);
- Unwinds[i]->eraseFromParent();
- }
-
- // Following any allocas not in the entry block, update the saved SP in the
- // jmpbuf to the new value.
- for (unsigned i = 0, e = JmpbufUpdatePoints.size(); i != e; ++i) {
- Instruction *AI = JmpbufUpdatePoints[i];
- Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
- StackAddr->insertAfter(AI);
- Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true);
- StoreStackAddr->insertAfter(StackAddr);
- }
-
- // Finally, for any returns from this function, if this function contains an
- // invoke, add a call to unregister the function context.
- for (unsigned i = 0, e = Returns.size(); i != e; ++i)
- CallInst::Create(UnregisterFn, FunctionContext, "", Returns[i]);
-
- return true;
+ LPI->replaceAllUsesWith(LPadVal);
}
/// setupFunctionContext - Allocate the function context on the stack and fill
/// it with all of the data that we know at this point.
-Value *SjLjEHPass::
+Value *SjLjEHPrepare::
setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) {
BasicBlock *EntryBB = F.begin();
@@ -721,56 +192,42 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) {
// because the value needs to be added to the global context list.
unsigned Align =
TLI->getTargetData()->getPrefTypeAlignment(FunctionContextTy);
- AllocaInst *FuncCtx =
+ FuncCtx =
new AllocaInst(FunctionContextTy, 0, Align, "fn_context", EntryBB->begin());
// Fill in the function context structure.
- Value *Idxs[2];
Type *Int32Ty = Type::getInt32Ty(F.getContext());
Value *Zero = ConstantInt::get(Int32Ty, 0);
Value *One = ConstantInt::get(Int32Ty, 1);
+ Value *Two = ConstantInt::get(Int32Ty, 2);
+ Value *Three = ConstantInt::get(Int32Ty, 3);
+ Value *Four = ConstantInt::get(Int32Ty, 4);
- // Keep around a reference to the call_site field.
- Idxs[0] = Zero;
- Idxs[1] = One;
- CallSite = GetElementPtrInst::Create(FuncCtx, Idxs, "call_site",
- EntryBB->getTerminator());
-
- // Reference the __data field.
- Idxs[1] = ConstantInt::get(Int32Ty, 2);
- Value *FCData = GetElementPtrInst::Create(FuncCtx, Idxs, "__data",
- EntryBB->getTerminator());
-
- // The exception value comes back in context->__data[0].
- Idxs[1] = Zero;
- Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs,
- "exception_gep",
- EntryBB->getTerminator());
-
- // The exception selector comes back in context->__data[1].
- Idxs[1] = One;
- Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs,
- "exn_selector_gep",
- EntryBB->getTerminator());
+ Value *Idxs[2] = { Zero, 0 };
for (unsigned I = 0, E = LPads.size(); I != E; ++I) {
LandingPadInst *LPI = LPads[I];
IRBuilder<> Builder(LPI->getParent()->getFirstInsertionPt());
+ // Reference the __data field.
+ Idxs[1] = Two;
+ Value *FCData = Builder.CreateGEP(FuncCtx, Idxs, "__data");
+
+ // The exception values come back in context->__data[0].
+ Idxs[1] = Zero;
+ Value *ExceptionAddr = Builder.CreateGEP(FCData, Idxs, "exception_gep");
Value *ExnVal = Builder.CreateLoad(ExceptionAddr, true, "exn_val");
ExnVal = Builder.CreateIntToPtr(ExnVal, Type::getInt8PtrTy(F.getContext()));
- Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val");
- Type *LPadType = LPI->getType();
- Value *LPadVal = UndefValue::get(LPadType);
- LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val");
- LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val");
+ Idxs[1] = One;
+ Value *SelectorAddr = Builder.CreateGEP(FCData, Idxs, "exn_selector_gep");
+ Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val");
- LPI->replaceAllUsesWith(LPadVal);
+ substituteLPadValues(LPI, ExnVal, SelVal);
}
// Personality function
- Idxs[1] = ConstantInt::get(Int32Ty, 3);
+ Idxs[1] = Three;
if (!PersonalityFn)
PersonalityFn = LPads[0]->getPersonalityFn();
Value *PersonalityFieldPtr =
@@ -780,11 +237,11 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) {
EntryBB->getTerminator());
// LSDA address
- Idxs[1] = ConstantInt::get(Int32Ty, 4);
- Value *LSDAFieldPtr = GetElementPtrInst::Create(FuncCtx, Idxs, "lsda_gep",
- EntryBB->getTerminator());
Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr",
EntryBB->getTerminator());
+ Idxs[1] = Four;
+ Value *LSDAFieldPtr = GetElementPtrInst::Create(FuncCtx, Idxs, "lsda_gep",
+ EntryBB->getTerminator());
new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator());
return FuncCtx;
@@ -794,7 +251,7 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) {
/// specially, we lower each arg to a copy instruction in the entry block. This
/// ensures that the argument value itself cannot be live out of the entry
/// block.
-void SjLjEHPass::lowerIncomingArguments(Function &F) {
+void SjLjEHPrepare::lowerIncomingArguments(Function &F) {
BasicBlock::iterator AfterAllocaInsPt = F.begin()->begin();
while (isa<AllocaInst>(AfterAllocaInsPt) &&
isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsPt)->getArraySize()))
@@ -838,8 +295,8 @@ void SjLjEHPass::lowerIncomingArguments(Function &F) {
/// lowerAcrossUnwindEdges - Find all variables which are alive across an unwind
/// edge and spill them.
-void SjLjEHPass::lowerAcrossUnwindEdges(Function &F,
- ArrayRef<InvokeInst*> Invokes) {
+void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F,
+ ArrayRef<InvokeInst*> Invokes) {
// Finally, scan the code looking for instructions with bad live ranges.
for (Function::iterator
BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
@@ -870,7 +327,7 @@ void SjLjEHPass::lowerAcrossUnwindEdges(Function &F,
}
// Find all of the blocks that this value is live in.
- std::set<BasicBlock*> LiveBBs;
+ SmallPtrSet<BasicBlock*, 64> LiveBBs;
LiveBBs.insert(Inst->getParent());
while (!Users.empty()) {
Instruction *U = Users.back();
@@ -893,7 +350,10 @@ void SjLjEHPass::lowerAcrossUnwindEdges(Function &F,
for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) {
+ DEBUG(dbgs() << "SJLJ Spill: " << *Inst << " around "
+ << UnwindBlock->getName() << "\n");
NeedsSpill = true;
+ break;
}
}
@@ -902,36 +362,60 @@ void SjLjEHPass::lowerAcrossUnwindEdges(Function &F,
// the value to be reloaded from the stack slot, even those that aren't
// in the unwind blocks. We should be more selective.
if (NeedsSpill) {
- ++NumSpilled;
DemoteRegToStack(*Inst, true);
+ ++NumSpilled;
}
}
}
+
+ // Go through the landing pads and remove any PHIs there.
+ for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+ BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
+ LandingPadInst *LPI = UnwindBlock->getLandingPadInst();
+
+ // Place PHIs into a set to avoid invalidating the iterator.
+ SmallPtrSet<PHINode*, 8> PHIsToDemote;
+ for (BasicBlock::iterator
+ PN = UnwindBlock->begin(); isa<PHINode>(PN); ++PN)
+ PHIsToDemote.insert(cast<PHINode>(PN));
+ if (PHIsToDemote.empty()) continue;
+
+ // Demote the PHIs to the stack.
+ for (SmallPtrSet<PHINode*, 8>::iterator
+ I = PHIsToDemote.begin(), E = PHIsToDemote.end(); I != E; ++I)
+ DemotePHIToStack(*I);
+
+ // Move the landingpad instruction back to the top of the landing pad block.
+ LPI->moveBefore(UnwindBlock->begin());
+ }
}
/// setupEntryBlockAndCallSites - Setup the entry block by creating and filling
/// the function context and marking the call sites with the appropriate
/// values. These values are used by the DWARF EH emitter.
-bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) {
+bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
SmallVector<ReturnInst*, 16> Returns;
SmallVector<InvokeInst*, 16> Invokes;
- SmallVector<LandingPadInst*, 16> LPads;
+ SmallSetVector<LandingPadInst*, 16> LPads;
// Look through the terminators of the basic blocks to find invokes.
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
Invokes.push_back(II);
- LPads.push_back(II->getUnwindDest()->getLandingPadInst());
+ LPads.insert(II->getUnwindDest()->getLandingPadInst());
} else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
Returns.push_back(RI);
}
if (Invokes.empty()) return false;
+ NumInvokes += Invokes.size();
+
lowerIncomingArguments(F);
lowerAcrossUnwindEdges(F, Invokes);
- Value *FuncCtx = setupFunctionContext(F, LPads);
+ Value *FuncCtx =
+ setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end()));
BasicBlock *EntryBB = F.begin();
Type *Int32Ty = Type::getInt32Ty(F.getContext());
@@ -979,7 +463,7 @@ bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) {
// At this point, we are all set up, update the invoke instructions to mark
// their call_site values.
for (unsigned I = 0, E = Invokes.size(); I != E; ++I) {
- insertCallSiteStore(Invokes[I], I + 1, CallSite);
+ insertCallSiteStore(Invokes[I], I + 1);
ConstantInt *CallSiteNum =
ConstantInt::get(Type::getInt32Ty(F.getContext()), I + 1);
@@ -998,9 +482,9 @@ bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) {
for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I)
if (CallInst *CI = dyn_cast<CallInst>(I)) {
if (!CI->doesNotThrow())
- insertCallSiteStore(CI, -1, CallSite);
+ insertCallSiteStore(CI, -1);
} else if (ResumeInst *RI = dyn_cast<ResumeInst>(I)) {
- insertCallSiteStore(RI, -1, CallSite);
+ insertCallSiteStore(RI, -1);
}
// Register the function context and make sure it's known to not throw
@@ -1008,6 +492,25 @@ bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) {
EntryBB->getTerminator());
Register->setDoesNotThrow();
+ // Following any allocas not in the entry block, update the saved SP in the
+ // jmpbuf to the new value.
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (BB == F.begin())
+ continue;
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (CI->getCalledFunction() != StackRestoreFn)
+ continue;
+ } else if (!isa<AllocaInst>(I)) {
+ continue;
+ }
+ Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
+ StackAddr->insertAfter(I);
+ Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true);
+ StoreStackAddr->insertAfter(StackAddr);
+ }
+ }
+
// Finally, for any returns from this function, if this function contains an
// invoke, add a call to unregister the function context.
for (unsigned I = 0, E = Returns.size(); I != E; ++I)
@@ -1016,11 +519,7 @@ bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) {
return true;
}
-bool SjLjEHPass::runOnFunction(Function &F) {
- bool Res = false;
- if (!DisableOldSjLjEH)
- Res = insertSjLjEHSupport(F);
- else
- Res = setupEntryBlockAndCallSites(F);
+bool SjLjEHPrepare::runOnFunction(Function &F) {
+ bool Res = setupEntryBlockAndCallSites(F);
return Res;
}
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index ca79cafcf4be..c5bd3a3cae63 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -76,7 +76,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
MachineBasicBlock *mbb = &*mbbItr;
// Insert an index for the MBB start.
- SlotIndex blockStartIndex(back(), SlotIndex::LOAD);
+ SlotIndex blockStartIndex(back(), SlotIndex::Slot_Block);
for (MachineBasicBlock::iterator miItr = mbb->begin(), miEnd = mbb->end();
miItr != miEnd; ++miItr) {
@@ -88,7 +88,8 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
push_back(createEntry(mi, index += SlotIndex::InstrDist));
// Save this base index in the maps.
- mi2iMap.insert(std::make_pair(mi, SlotIndex(back(), SlotIndex::LOAD)));
+ mi2iMap.insert(std::make_pair(mi, SlotIndex(back(),
+ SlotIndex::Slot_Block)));
++functionSize;
}
@@ -97,14 +98,15 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
push_back(createEntry(0, index += SlotIndex::InstrDist));
MBBRanges[mbb->getNumber()].first = blockStartIndex;
- MBBRanges[mbb->getNumber()].second = SlotIndex(back(), SlotIndex::LOAD);
+ MBBRanges[mbb->getNumber()].second = SlotIndex(back(),
+ SlotIndex::Slot_Block);
idx2MBBMap.push_back(IdxMBBPair(blockStartIndex, mbb));
}
// Sort the Idx2MBBMap
std::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare());
- DEBUG(dump());
+ DEBUG(mf->print(dbgs(), this));
// And we're done!
return false;
@@ -166,7 +168,7 @@ void SlotIndexes::dump() const {
// Print a SlotIndex to a raw_ostream.
void SlotIndex::print(raw_ostream &os) const {
if (isValid())
- os << entry().getIndex() << "LudS"[getSlot()];
+ os << entry().getIndex() << "Berd"[getSlot()];
else
os << "invalid";
}
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index b6bbcd7176dd..4cd22eb60f55 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -11,8 +11,8 @@
#include "Spiller.h"
#include "VirtRegMap.h"
-#include "LiveRangeEdit.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -29,7 +29,7 @@
using namespace llvm;
namespace {
- enum SpillerName { trivial, standard, inline_ };
+ enum SpillerName { trivial, inline_ };
}
static cl::opt<SpillerName>
@@ -37,10 +37,9 @@ spillerOpt("spiller",
cl::desc("Spiller to use: (default: standard)"),
cl::Prefix,
cl::values(clEnumVal(trivial, "trivial spiller"),
- clEnumVal(standard, "default spiller"),
clEnumValN(inline_, "inline", "inline spiller"),
clEnumValEnd),
- cl::init(standard));
+ cl::init(trivial));
// Spiller virtual destructor implementation.
Spiller::~Spiller() {}
@@ -73,8 +72,9 @@ protected:
/// Add spill ranges for every use/def of the live interval, inserting loads
/// immediately before each use, and stores after each def. No folding or
/// remat is attempted.
- void trivialSpillEverywhere(LiveInterval *li,
- SmallVectorImpl<LiveInterval*> &newIntervals) {
+ void trivialSpillEverywhere(LiveRangeEdit& LRE) {
+ LiveInterval* li = &LRE.getParent();
+
DEBUG(dbgs() << "Spilling everywhere " << *li << "\n");
assert(li->weight != HUGE_VALF &&
@@ -116,17 +116,14 @@ protected:
}
// Create a new vreg & interval for this instr.
- unsigned newVReg = mri->createVirtualRegister(trc);
- vrm->grow();
- vrm->assignVirt2StackSlot(newVReg, ss);
- LiveInterval *newLI = &lis->getOrCreateInterval(newVReg);
+ LiveInterval *newLI = &LRE.create();
newLI->weight = HUGE_VALF;
// Update the reg operands & kill flags.
for (unsigned i = 0; i < indices.size(); ++i) {
unsigned mopIdx = indices[i];
MachineOperand &mop = mi->getOperand(mopIdx);
- mop.setReg(newVReg);
+ mop.setReg(newLI->reg);
if (mop.isUse() && !mi->isRegTiedToDefOperand(mopIdx)) {
mop.setIsKill(true);
}
@@ -136,33 +133,29 @@ protected:
// Insert reload if necessary.
MachineBasicBlock::iterator miItr(mi);
if (hasUse) {
- tii->loadRegFromStackSlot(*mi->getParent(), miItr, newVReg, ss, trc,
+ tii->loadRegFromStackSlot(*mi->getParent(), miItr, newLI->reg, ss, trc,
tri);
MachineInstr *loadInstr(prior(miItr));
SlotIndex loadIndex =
- lis->InsertMachineInstrInMaps(loadInstr).getDefIndex();
- vrm->addSpillSlotUse(ss, loadInstr);
+ lis->InsertMachineInstrInMaps(loadInstr).getRegSlot();
SlotIndex endIndex = loadIndex.getNextIndex();
VNInfo *loadVNI =
- newLI->getNextValue(loadIndex, 0, lis->getVNInfoAllocator());
+ newLI->getNextValue(loadIndex, lis->getVNInfoAllocator());
newLI->addRange(LiveRange(loadIndex, endIndex, loadVNI));
}
// Insert store if necessary.
if (hasDef) {
- tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr), newVReg,
+ tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr),newLI->reg,
true, ss, trc, tri);
MachineInstr *storeInstr(llvm::next(miItr));
SlotIndex storeIndex =
- lis->InsertMachineInstrInMaps(storeInstr).getDefIndex();
- vrm->addSpillSlotUse(ss, storeInstr);
+ lis->InsertMachineInstrInMaps(storeInstr).getRegSlot();
SlotIndex beginIndex = storeIndex.getPrevIndex();
VNInfo *storeVNI =
- newLI->getNextValue(beginIndex, 0, lis->getVNInfoAllocator());
+ newLI->getNextValue(beginIndex, lis->getVNInfoAllocator());
newLI->addRange(LiveRange(beginIndex, storeIndex, storeVNI));
}
-
- newIntervals.push_back(newLI);
}
}
};
@@ -182,60 +175,20 @@ public:
void spill(LiveRangeEdit &LRE) {
// Ignore spillIs - we don't use it.
- trivialSpillEverywhere(&LRE.getParent(), *LRE.getNewVRegs());
+ trivialSpillEverywhere(LRE);
}
};
} // end anonymous namespace
-namespace {
-
-/// Falls back on LiveIntervals::addIntervalsForSpills.
-class StandardSpiller : public Spiller {
-protected:
- MachineFunction *mf;
- LiveIntervals *lis;
- LiveStacks *lss;
- MachineLoopInfo *loopInfo;
- VirtRegMap *vrm;
-public:
- StandardSpiller(MachineFunctionPass &pass, MachineFunction &mf,
- VirtRegMap &vrm)
- : mf(&mf),
- lis(&pass.getAnalysis<LiveIntervals>()),
- lss(&pass.getAnalysis<LiveStacks>()),
- loopInfo(pass.getAnalysisIfAvailable<MachineLoopInfo>()),
- vrm(&vrm) {}
-
- /// Falls back on LiveIntervals::addIntervalsForSpills.
- void spill(LiveRangeEdit &LRE) {
- std::vector<LiveInterval*> added =
- lis->addIntervalsForSpills(LRE.getParent(), LRE.getUselessVRegs(),
- loopInfo, *vrm);
- LRE.getNewVRegs()->insert(LRE.getNewVRegs()->end(),
- added.begin(), added.end());
-
- // Update LiveStacks.
- int SS = vrm->getStackSlot(LRE.getReg());
- if (SS == VirtRegMap::NO_STACK_SLOT)
- return;
- const TargetRegisterClass *RC = mf->getRegInfo().getRegClass(LRE.getReg());
- LiveInterval &SI = lss->getOrCreateInterval(SS, RC);
- if (!SI.hasAtLeastOneValue())
- SI.getNextValue(SlotIndex(), 0, lss->getVNInfoAllocator());
- SI.MergeRangesInAsValue(LRE.getParent(), SI.getValNumInfo(0));
- }
-};
-
-} // end anonymous namespace
+void Spiller::anchor() { }
llvm::Spiller* llvm::createSpiller(MachineFunctionPass &pass,
MachineFunction &mf,
VirtRegMap &vrm) {
switch (spillerOpt) {
- default: assert(0 && "unknown spiller");
case trivial: return new TrivialSpiller(pass, mf, vrm);
- case standard: return new StandardSpiller(pass, mf, vrm);
case inline_: return createInlineSpiller(pass, mf, vrm);
}
+ llvm_unreachable("Invalid spiller optimization");
}
diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h
index 41f1727da439..b7d5beaab1b2 100644
--- a/lib/CodeGen/Spiller.h
+++ b/lib/CodeGen/Spiller.h
@@ -22,6 +22,7 @@ namespace llvm {
/// Implementations are utility classes which insert spill or remat code on
/// demand.
class Spiller {
+ virtual void anchor();
public:
virtual ~Spiller() = 0;
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index 63627800af69..9959f74d5f27 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -14,10 +14,10 @@
#define DEBUG_TYPE "regalloc"
#include "SplitKit.h"
-#include "LiveRangeEdit.h"
#include "VirtRegMap.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
@@ -62,13 +62,14 @@ SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) {
const MachineBasicBlock *MBB = MF.getBlockNumbered(Num);
const MachineBasicBlock *LPad = MBB->getLandingPadSuccessor();
std::pair<SlotIndex, SlotIndex> &LSP = LastSplitPoint[Num];
+ SlotIndex MBBEnd = LIS.getMBBEndIdx(MBB);
// Compute split points on the first call. The pair is independent of the
// current live interval.
if (!LSP.first.isValid()) {
MachineBasicBlock::const_iterator FirstTerm = MBB->getFirstTerminator();
if (FirstTerm == MBB->end())
- LSP.first = LIS.getMBBEndIdx(MBB);
+ LSP.first = MBBEnd;
else
LSP.first = LIS.getInstructionIndex(FirstTerm);
@@ -80,7 +81,7 @@ SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) {
for (MachineBasicBlock::const_iterator I = MBB->end(), E = MBB->begin();
I != E;) {
--I;
- if (I->getDesc().isCall()) {
+ if (I->isCall()) {
LSP.second = LIS.getInstructionIndex(I);
break;
}
@@ -89,10 +90,32 @@ SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) {
// If CurLI is live into a landing pad successor, move the last split point
// back to the call that may throw.
- if (LPad && LSP.second.isValid() && LIS.isLiveInToMBB(*CurLI, LPad))
- return LSP.second;
- else
+ if (!LPad || !LSP.second || !LIS.isLiveInToMBB(*CurLI, LPad))
+ return LSP.first;
+
+ // Find the value leaving MBB.
+ const VNInfo *VNI = CurLI->getVNInfoBefore(MBBEnd);
+ if (!VNI)
+ return LSP.first;
+
+ // If the value leaving MBB was defined after the call in MBB, it can't
+ // really be live-in to the landing pad. This can happen if the landing pad
+ // has a PHI, and this register is undef on the exceptional edge.
+ // <rdar://problem/10664933>
+ if (!SlotIndex::isEarlierInstr(VNI->def, LSP.second) && VNI->def < MBBEnd)
return LSP.first;
+
+ // Value is properly live-in to the landing pad.
+ // Only allow splits before the call.
+ return LSP.second;
+}
+
+MachineBasicBlock::iterator
+SplitAnalysis::getLastSplitPointIter(MachineBasicBlock *MBB) {
+ SlotIndex LSP = getLastSplitPoint(MBB->getNumber());
+ if (LSP == LIS.getMBBEndIdx(MBB))
+ return MBB->end();
+ return LIS.getInstructionFromIndex(LSP);
}
/// analyzeUses - Count instructions, basic blocks, and loops using CurLI.
@@ -112,7 +135,7 @@ void SplitAnalysis::analyzeUses() {
I = MRI.use_nodbg_begin(CurLI->reg), E = MRI.use_nodbg_end(); I != E;
++I)
if (!I.getOperand().isUndef())
- UseSlots.push_back(LIS.getInstructionIndex(&*I).getDefIndex());
+ UseSlots.push_back(LIS.getInstructionIndex(&*I).getRegSlot());
array_pod_sort(UseSlots.begin(), UseSlots.end());
@@ -328,7 +351,7 @@ void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) {
// We don't need an AliasAnalysis since we will only be performing
// cheap-as-a-copy remats anyway.
- Edit->anyRematerializable(LIS, TII, 0);
+ Edit->anyRematerializable(0);
}
void SplitEditor::dump() const {
@@ -351,7 +374,7 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx,
LiveInterval *LI = Edit->get(RegIdx);
// Create a new value.
- VNInfo *VNI = LI->getNextValue(Idx, 0, LIS.getVNInfoAllocator());
+ VNInfo *VNI = LI->getNextValue(Idx, LIS.getVNInfoAllocator());
// Use insert for lookup, so we can add missing values with a second lookup.
std::pair<ValueMap::iterator, bool> InsP =
@@ -366,14 +389,14 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx,
// If the previous value was a simple mapping, add liveness for it now.
if (VNInfo *OldVNI = InsP.first->second.getPointer()) {
SlotIndex Def = OldVNI->def;
- LI->addRange(LiveRange(Def, Def.getNextSlot(), OldVNI));
+ LI->addRange(LiveRange(Def, Def.getDeadSlot(), OldVNI));
// No longer a simple mapping. Switch to a complex, non-forced mapping.
InsP.first->second = ValueForcePair();
}
// This is a complex mapping, add liveness for VNI
SlotIndex Def = VNI->def;
- LI->addRange(LiveRange(Def, Def.getNextSlot(), VNI));
+ LI->addRange(LiveRange(Def, Def.getDeadSlot(), VNI));
return VNI;
}
@@ -393,7 +416,7 @@ void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI) {
// This was previously a single mapping. Make sure the old def is represented
// by a trivial live range.
SlotIndex Def = VNI->def;
- Edit->get(RegIdx)->addRange(LiveRange(Def, Def.getNextSlot(), VNI));
+ Edit->get(RegIdx)->addRange(LiveRange(Def, Def.getDeadSlot(), VNI));
// Mark as complex mapped, forced.
VFP = ValueForcePair(0, true);
}
@@ -413,33 +436,31 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
// Attempt cheap-as-a-copy rematerialization.
LiveRangeEdit::Remat RM(ParentVNI);
- if (Edit->canRematerializeAt(RM, UseIdx, true, LIS)) {
- Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, LIS, TII, TRI, Late);
+ if (Edit->canRematerializeAt(RM, UseIdx, true)) {
+ Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late);
++NumRemats;
} else {
// Can't remat, just insert a copy from parent.
CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg)
.addReg(Edit->getReg());
Def = LIS.getSlotIndexes()->insertMachineInstrInMaps(CopyMI, Late)
- .getDefIndex();
+ .getRegSlot();
++NumCopies;
}
// Define the value in Reg.
- VNInfo *VNI = defValue(RegIdx, ParentVNI, Def);
- VNI->setCopy(CopyMI);
- return VNI;
+ return defValue(RegIdx, ParentVNI, Def);
}
/// Create a new virtual register and live interval.
unsigned SplitEditor::openIntv() {
// Create the complement as index 0.
if (Edit->empty())
- Edit->create(LIS, VRM);
+ Edit->create();
// Create the open interval.
OpenIdx = Edit->size();
- Edit->create(LIS, VRM);
+ Edit->create();
return OpenIdx;
}
@@ -497,7 +518,7 @@ SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) {
}
DEBUG(dbgs() << ": valno " << ParentVNI->id);
VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Last, MBB,
- LIS.getLastSplitPoint(Edit->getParent(), &MBB));
+ SA.getLastSplitPointIter(&MBB));
RegAssign.insert(VNI->def, End, OpenIdx);
DEBUG(dump());
return VNI->def;
@@ -586,7 +607,7 @@ SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) {
assert(OpenIdx && "openIntv not called before overlapIntv");
const VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start);
- assert(ParentVNI == Edit->getParent().getVNInfoAt(End.getPrevSlot()) &&
+ assert(ParentVNI == Edit->getParent().getVNInfoBefore(End) &&
"Parent changes value in extended range");
assert(LIS.getMBBFromIndex(Start) == LIS.getMBBFromIndex(End) &&
"Range cannot span basic blocks");
@@ -640,7 +661,7 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) {
DEBUG(dbgs() << " cannot find simple kill of RegIdx " << RegIdx << '\n');
forceRecompute(RegIdx, Edit->getParent().getVNInfoAt(Def));
} else {
- SlotIndex Kill = LIS.getInstructionIndex(MBBI).getDefIndex();
+ SlotIndex Kill = LIS.getInstructionIndex(MBBI).getRegSlot();
DEBUG(dbgs() << " move kill to " << Kill << '\t' << *MBBI);
AssignI.setStop(Kill);
}
@@ -780,7 +801,7 @@ void SplitEditor::hoistCopiesForSize() {
SlotIndex Last = LIS.getMBBEndIdx(Dom.first).getPrevSlot();
Dom.second =
defFromParent(0, ParentVNI, Last, *Dom.first,
- LIS.getLastSplitPoint(Edit->getParent(), Dom.first))->def;
+ SA.getLastSplitPointIter(Dom.first))->def;
}
// Remove redundant back-copies that are now known to be dominated by another
@@ -958,7 +979,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
// use the same register as the def, so just do that always.
SlotIndex Idx = LIS.getInstructionIndex(MI);
if (MO.isDef() || MO.isUndef())
- Idx = MO.isEarlyClobber() ? Idx.getUseIndex() : Idx.getDefIndex();
+ Idx = Idx.getRegSlot(MO.isEarlyClobber());
// Rewrite to the mapped register at Idx.
unsigned RegIdx = RegAssign.lookup(Idx);
@@ -981,7 +1002,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
if (!Edit->getParent().liveAt(Idx))
continue;
} else
- Idx = Idx.getUseIndex();
+ Idx = Idx.getRegSlot(true);
getLRCalc(RegIdx).extend(LI, Idx.getNextSlot(), LIS.getSlotIndexes(),
&MDT, &LIS.getVNInfoAllocator());
@@ -994,8 +1015,8 @@ void SplitEditor::deleteRematVictims() {
LiveInterval *LI = *I;
for (LiveInterval::const_iterator LII = LI->begin(), LIE = LI->end();
LII != LIE; ++LII) {
- // Dead defs end at the store slot.
- if (LII->end != LII->valno->def.getNextSlot())
+ // Dead defs end at the dead slot.
+ if (LII->end != LII->valno->def.getDeadSlot())
continue;
MachineInstr *MI = LIS.getInstructionFromIndex(LII->valno->def);
assert(MI && "Missing instruction for dead def");
@@ -1012,7 +1033,7 @@ void SplitEditor::deleteRematVictims() {
if (Dead.empty())
return;
- Edit->eliminateDeadDefs(Dead, LIS, VRM, TII);
+ Edit->eliminateDeadDefs(Dead);
}
void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
@@ -1030,7 +1051,6 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
unsigned RegIdx = RegAssign.lookup(ParentVNI->def);
VNInfo *VNI = defValue(RegIdx, ParentVNI, ParentVNI->def);
VNI->setIsPHIDef(ParentVNI->isPHIDef());
- VNI->setCopy(ParentVNI->getCopy());
// Force rematted values to be recomputed everywhere.
// The new live ranges may be truncated.
@@ -1049,7 +1069,6 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
break;
case SM_Speed:
llvm_unreachable("Spill mode 'speed' not implemented yet");
- break;
}
// Transfer the simply mapped values, check if any are skipped.
@@ -1089,7 +1108,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
SmallVector<LiveInterval*, 8> dups;
dups.push_back(li);
for (unsigned j = 1; j != NumComp; ++j)
- dups.push_back(&Edit->create(LIS, VRM));
+ dups.push_back(&Edit->create());
ConEQ.Distribute(&dups[0], MRI);
// The new intervals all map back to i.
if (LRMap)
@@ -1097,7 +1116,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
}
// Calculate spill weight and allocation hints for new intervals.
- Edit->calculateRegClassAndHint(VRM.getMachineFunction(), LIS, SA.Loops);
+ Edit->calculateRegClassAndHint(VRM.getMachineFunction(), SA.Loops);
assert(!LRMap || LRMap->size() == Edit->size());
}
diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h
index d8fc2122a3c7..4005a3d5cbbf 100644
--- a/lib/CodeGen/SplitKit.h
+++ b/lib/CodeGen/SplitKit.h
@@ -46,9 +46,6 @@ public:
const MachineLoopInfo &Loops;
const TargetInstrInfo &TII;
- // Sorted slot indexes of using instructions.
- SmallVector<SlotIndex, 8> UseSlots;
-
/// Additional information about basic blocks where the current variable is
/// live. Such a block will look like one of these templates:
///
@@ -85,6 +82,9 @@ private:
// Current live interval.
const LiveInterval *CurLI;
+ // Sorted slot indexes of using instructions.
+ SmallVector<SlotIndex, 8> UseSlots;
+
/// LastSplitPoint - Last legal split point in each basic block in the current
/// function. The first entry is the first terminator, the second entry is the
/// last valid split point for a variable that is live in to a landing pad
@@ -135,7 +135,7 @@ public:
/// getParent - Return the last analyzed interval.
const LiveInterval &getParent() const { return *CurLI; }
- /// getLastSplitPoint - Return that base index of the last valid split point
+ /// getLastSplitPoint - Return the base index of the last valid split point
/// in the basic block numbered Num.
SlotIndex getLastSplitPoint(unsigned Num) {
// Inline the common simple case.
@@ -145,6 +145,9 @@ public:
return computeLastSplitPoint(Num);
}
+ /// getLastSplitPointIter - Returns the last split point as an iterator.
+ MachineBasicBlock::iterator getLastSplitPointIter(MachineBasicBlock*);
+
/// isOriginalEndpoint - Return true if the original live range was killed or
/// (re-)defined at Idx. Idx should be the 'def' slot for a normal kill/def,
/// and 'use' for an early-clobber def.
@@ -152,6 +155,10 @@ public:
/// splitting.
bool isOriginalEndpoint(SlotIndex Idx) const;
+ /// getUseSlots - Return an array of SlotIndexes of instructions using CurLI.
+ /// This include both use and def operands, at most one entry per instruction.
+ ArrayRef<SlotIndex> getUseSlots() const { return UseSlots; }
+
/// getUseBlocks - Return an array of BlockInfo objects for the basic blocks
/// where CurLI has uses.
ArrayRef<BlockInfo> getUseBlocks() const { return UseBlocks; }
diff --git a/lib/CodeGen/Splitter.cpp b/lib/CodeGen/Splitter.cpp
deleted file mode 100644
index 77973b72bbc8..000000000000
--- a/lib/CodeGen/Splitter.cpp
+++ /dev/null
@@ -1,827 +0,0 @@
-//===-- llvm/CodeGen/Splitter.cpp - Splitter -----------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "loopsplitter"
-
-#include "Splitter.h"
-
-#include "llvm/Module.h"
-#include "llvm/CodeGen/CalcSpillWeights.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/LiveStackAnalysis.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/SlotIndexes.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-
-using namespace llvm;
-
-char LoopSplitter::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopSplitter, "loop-splitting",
- "Split virtual regists across loop boundaries.", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_END(LoopSplitter, "loop-splitting",
- "Split virtual regists across loop boundaries.", false, false)
-
-namespace llvm {
-
- class StartSlotComparator {
- public:
- StartSlotComparator(LiveIntervals &lis) : lis(lis) {}
- bool operator()(const MachineBasicBlock *mbb1,
- const MachineBasicBlock *mbb2) const {
- return lis.getMBBStartIdx(mbb1) < lis.getMBBStartIdx(mbb2);
- }
- private:
- LiveIntervals &lis;
- };
-
- class LoopSplit {
- public:
- LoopSplit(LoopSplitter &ls, LiveInterval &li, MachineLoop &loop)
- : ls(ls), li(li), loop(loop), valid(true), inSplit(false), newLI(0) {
- assert(TargetRegisterInfo::isVirtualRegister(li.reg) &&
- "Cannot split physical registers.");
- }
-
- LiveInterval& getLI() const { return li; }
-
- MachineLoop& getLoop() const { return loop; }
-
- bool isValid() const { return valid; }
-
- bool isWorthwhile() const { return valid && (inSplit || !outSplits.empty()); }
-
- void invalidate() { valid = false; }
-
- void splitIncoming() { inSplit = true; }
-
- void splitOutgoing(MachineLoop::Edge &edge) { outSplits.insert(edge); }
-
- void addLoopInstr(MachineInstr *i) { loopInstrs.push_back(i); }
-
- void apply() {
- assert(valid && "Attempt to apply invalid split.");
- applyIncoming();
- applyOutgoing();
- copyRanges();
- renameInside();
- }
-
- private:
- LoopSplitter &ls;
- LiveInterval &li;
- MachineLoop &loop;
- bool valid, inSplit;
- std::set<MachineLoop::Edge> outSplits;
- std::vector<MachineInstr*> loopInstrs;
-
- LiveInterval *newLI;
- std::map<VNInfo*, VNInfo*> vniMap;
-
- LiveInterval* getNewLI() {
- if (newLI == 0) {
- const TargetRegisterClass *trc = ls.mri->getRegClass(li.reg);
- unsigned vreg = ls.mri->createVirtualRegister(trc);
- newLI = &ls.lis->getOrCreateInterval(vreg);
- }
- return newLI;
- }
-
- VNInfo* getNewVNI(VNInfo *oldVNI) {
- VNInfo *newVNI = vniMap[oldVNI];
-
- if (newVNI == 0) {
- newVNI = getNewLI()->createValueCopy(oldVNI,
- ls.lis->getVNInfoAllocator());
- vniMap[oldVNI] = newVNI;
- }
-
- return newVNI;
- }
-
- void applyIncoming() {
- if (!inSplit) {
- return;
- }
-
- MachineBasicBlock *preHeader = loop.getLoopPreheader();
- if (preHeader == 0) {
- assert(ls.canInsertPreHeader(loop) &&
- "Can't insert required preheader.");
- preHeader = &ls.insertPreHeader(loop);
- }
-
- LiveRange *preHeaderRange =
- ls.lis->findExitingRange(li, preHeader);
- assert(preHeaderRange != 0 && "Range not live into preheader.");
-
- // Insert the new copy.
- MachineInstr *copy = BuildMI(*preHeader,
- preHeader->getFirstTerminator(),
- DebugLoc(),
- ls.tii->get(TargetOpcode::COPY))
- .addReg(getNewLI()->reg, RegState::Define)
- .addReg(li.reg, RegState::Kill);
-
- ls.lis->InsertMachineInstrInMaps(copy);
-
- SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getDefIndex();
-
- VNInfo *newVal = getNewVNI(preHeaderRange->valno);
- newVal->def = copyDefIdx;
- newVal->setCopy(copy);
- li.removeRange(copyDefIdx, ls.lis->getMBBEndIdx(preHeader), true);
-
- getNewLI()->addRange(LiveRange(copyDefIdx,
- ls.lis->getMBBEndIdx(preHeader),
- newVal));
- }
-
- void applyOutgoing() {
-
- for (std::set<MachineLoop::Edge>::iterator osItr = outSplits.begin(),
- osEnd = outSplits.end();
- osItr != osEnd; ++osItr) {
- MachineLoop::Edge edge = *osItr;
- MachineBasicBlock *outBlock = edge.second;
- if (ls.isCriticalEdge(edge)) {
- assert(ls.canSplitEdge(edge) && "Unsplitable critical edge.");
- outBlock = &ls.splitEdge(edge, loop);
- }
- LiveRange *outRange = ls.lis->findEnteringRange(li, outBlock);
- assert(outRange != 0 && "No exiting range?");
-
- MachineInstr *copy = BuildMI(*outBlock, outBlock->begin(),
- DebugLoc(),
- ls.tii->get(TargetOpcode::COPY))
- .addReg(li.reg, RegState::Define)
- .addReg(getNewLI()->reg, RegState::Kill);
-
- ls.lis->InsertMachineInstrInMaps(copy);
-
- SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getDefIndex();
-
- // Blow away output range definition.
- outRange->valno->def = ls.lis->getInvalidIndex();
- li.removeRange(ls.lis->getMBBStartIdx(outBlock), copyDefIdx);
-
- SlotIndex newDefIdx = ls.lis->getMBBStartIdx(outBlock);
- assert(ls.lis->getInstructionFromIndex(newDefIdx) == 0 &&
- "PHI def index points at actual instruction.");
- VNInfo *newVal =
- getNewLI()->getNextValue(newDefIdx, 0, ls.lis->getVNInfoAllocator());
-
- getNewLI()->addRange(LiveRange(ls.lis->getMBBStartIdx(outBlock),
- copyDefIdx, newVal));
-
- }
- }
-
- void copyRange(LiveRange &lr) {
- std::pair<bool, LoopSplitter::SlotPair> lsr =
- ls.getLoopSubRange(lr, loop);
-
- if (!lsr.first)
- return;
-
- LiveRange loopRange(lsr.second.first, lsr.second.second,
- getNewVNI(lr.valno));
-
- li.removeRange(loopRange.start, loopRange.end, true);
-
- getNewLI()->addRange(loopRange);
- }
-
- void copyRanges() {
- for (std::vector<MachineInstr*>::iterator iItr = loopInstrs.begin(),
- iEnd = loopInstrs.end();
- iItr != iEnd; ++iItr) {
- MachineInstr &instr = **iItr;
- SlotIndex instrIdx = ls.lis->getInstructionIndex(&instr);
- if (instr.modifiesRegister(li.reg, 0)) {
- LiveRange *defRange =
- li.getLiveRangeContaining(instrIdx.getDefIndex());
- if (defRange != 0) // May have caught this already.
- copyRange(*defRange);
- }
- if (instr.readsRegister(li.reg, 0)) {
- LiveRange *useRange =
- li.getLiveRangeContaining(instrIdx.getUseIndex());
- if (useRange != 0) { // May have caught this already.
- copyRange(*useRange);
- }
- }
- }
-
- for (MachineLoop::block_iterator bbItr = loop.block_begin(),
- bbEnd = loop.block_end();
- bbItr != bbEnd; ++bbItr) {
- MachineBasicBlock &loopBlock = **bbItr;
- LiveRange *enteringRange =
- ls.lis->findEnteringRange(li, &loopBlock);
- if (enteringRange != 0) {
- copyRange(*enteringRange);
- }
- }
- }
-
- void renameInside() {
- for (std::vector<MachineInstr*>::iterator iItr = loopInstrs.begin(),
- iEnd = loopInstrs.end();
- iItr != iEnd; ++iItr) {
- MachineInstr &instr = **iItr;
- for (unsigned i = 0; i < instr.getNumOperands(); ++i) {
- MachineOperand &mop = instr.getOperand(i);
- if (mop.isReg() && mop.getReg() == li.reg) {
- mop.setReg(getNewLI()->reg);
- }
- }
- }
- }
-
- };
-
- void LoopSplitter::getAnalysisUsage(AnalysisUsage &au) const {
- au.addRequired<MachineDominatorTree>();
- au.addPreserved<MachineDominatorTree>();
- au.addRequired<MachineLoopInfo>();
- au.addPreserved<MachineLoopInfo>();
- au.addPreservedID(RegisterCoalescerPassID);
- au.addPreserved<CalculateSpillWeights>();
- au.addPreserved<LiveStacks>();
- au.addRequired<SlotIndexes>();
- au.addPreserved<SlotIndexes>();
- au.addRequired<LiveIntervals>();
- au.addPreserved<LiveIntervals>();
- MachineFunctionPass::getAnalysisUsage(au);
- }
-
- bool LoopSplitter::runOnMachineFunction(MachineFunction &fn) {
-
- mf = &fn;
- mri = &mf->getRegInfo();
- tii = mf->getTarget().getInstrInfo();
- tri = mf->getTarget().getRegisterInfo();
- sis = &getAnalysis<SlotIndexes>();
- lis = &getAnalysis<LiveIntervals>();
- mli = &getAnalysis<MachineLoopInfo>();
- mdt = &getAnalysis<MachineDominatorTree>();
-
- fqn = mf->getFunction()->getParent()->getModuleIdentifier() + "." +
- mf->getFunction()->getName().str();
-
- dbgs() << "Splitting " << mf->getFunction()->getName() << ".";
-
- dumpOddTerminators();
-
-// dbgs() << "----------------------------------------\n";
-// lis->dump();
-// dbgs() << "----------------------------------------\n";
-
-// std::deque<MachineLoop*> loops;
-// std::copy(mli->begin(), mli->end(), std::back_inserter(loops));
-// dbgs() << "Loops:\n";
-// while (!loops.empty()) {
-// MachineLoop &loop = *loops.front();
-// loops.pop_front();
-// std::copy(loop.begin(), loop.end(), std::back_inserter(loops));
-
-// dumpLoopInfo(loop);
-// }
-
- //lis->dump();
- //exit(0);
-
- // Setup initial intervals.
- for (LiveIntervals::iterator liItr = lis->begin(), liEnd = lis->end();
- liItr != liEnd; ++liItr) {
- LiveInterval *li = liItr->second;
-
- if (TargetRegisterInfo::isVirtualRegister(li->reg) &&
- !lis->intervalIsInOneMBB(*li)) {
- intervals.push_back(li);
- }
- }
-
- processIntervals();
-
- intervals.clear();
-
-// dbgs() << "----------------------------------------\n";
-// lis->dump();
-// dbgs() << "----------------------------------------\n";
-
- dumpOddTerminators();
-
- //exit(1);
-
- return false;
- }
-
- void LoopSplitter::releaseMemory() {
- fqn.clear();
- intervals.clear();
- loopRangeMap.clear();
- }
-
- void LoopSplitter::dumpOddTerminators() {
- for (MachineFunction::iterator bbItr = mf->begin(), bbEnd = mf->end();
- bbItr != bbEnd; ++bbItr) {
- MachineBasicBlock *mbb = &*bbItr;
- MachineBasicBlock *a = 0, *b = 0;
- SmallVector<MachineOperand, 4> c;
- if (tii->AnalyzeBranch(*mbb, a, b, c)) {
- dbgs() << "MBB#" << mbb->getNumber() << " has multiway terminator.\n";
- dbgs() << " Terminators:\n";
- for (MachineBasicBlock::iterator iItr = mbb->begin(), iEnd = mbb->end();
- iItr != iEnd; ++iItr) {
- MachineInstr *instr= &*iItr;
- dbgs() << " " << *instr << "";
- }
- dbgs() << "\n Listed successors: [ ";
- for (MachineBasicBlock::succ_iterator sItr = mbb->succ_begin(), sEnd = mbb->succ_end();
- sItr != sEnd; ++sItr) {
- MachineBasicBlock *succMBB = *sItr;
- dbgs() << succMBB->getNumber() << " ";
- }
- dbgs() << "]\n\n";
- }
- }
- }
-
- void LoopSplitter::dumpLoopInfo(MachineLoop &loop) {
- MachineBasicBlock &headerBlock = *loop.getHeader();
- typedef SmallVector<MachineLoop::Edge, 8> ExitEdgesList;
- ExitEdgesList exitEdges;
- loop.getExitEdges(exitEdges);
-
- dbgs() << " Header: BB#" << headerBlock.getNumber() << ", Contains: [ ";
- for (std::vector<MachineBasicBlock*>::const_iterator
- subBlockItr = loop.getBlocks().begin(),
- subBlockEnd = loop.getBlocks().end();
- subBlockItr != subBlockEnd; ++subBlockItr) {
- MachineBasicBlock &subBlock = **subBlockItr;
- dbgs() << "BB#" << subBlock.getNumber() << " ";
- }
- dbgs() << "], Exit edges: [ ";
- for (ExitEdgesList::iterator exitEdgeItr = exitEdges.begin(),
- exitEdgeEnd = exitEdges.end();
- exitEdgeItr != exitEdgeEnd; ++exitEdgeItr) {
- MachineLoop::Edge &exitEdge = *exitEdgeItr;
- dbgs() << "(MBB#" << exitEdge.first->getNumber()
- << ", MBB#" << exitEdge.second->getNumber() << ") ";
- }
- dbgs() << "], Sub-Loop Headers: [ ";
- for (MachineLoop::iterator subLoopItr = loop.begin(),
- subLoopEnd = loop.end();
- subLoopItr != subLoopEnd; ++subLoopItr) {
- MachineLoop &subLoop = **subLoopItr;
- MachineBasicBlock &subLoopBlock = *subLoop.getHeader();
- dbgs() << "BB#" << subLoopBlock.getNumber() << " ";
- }
- dbgs() << "]\n";
- }
-
- void LoopSplitter::updateTerminators(MachineBasicBlock &mbb) {
- mbb.updateTerminator();
-
- for (MachineBasicBlock::iterator miItr = mbb.begin(), miEnd = mbb.end();
- miItr != miEnd; ++miItr) {
- if (lis->isNotInMIMap(miItr)) {
- lis->InsertMachineInstrInMaps(miItr);
- }
- }
- }
-
- bool LoopSplitter::canInsertPreHeader(MachineLoop &loop) {
- MachineBasicBlock *header = loop.getHeader();
- MachineBasicBlock *a = 0, *b = 0;
- SmallVector<MachineOperand, 4> c;
-
- for (MachineBasicBlock::pred_iterator pbItr = header->pred_begin(),
- pbEnd = header->pred_end();
- pbItr != pbEnd; ++pbItr) {
- MachineBasicBlock *predBlock = *pbItr;
- if (!!tii->AnalyzeBranch(*predBlock, a, b, c)) {
- return false;
- }
- }
-
- MachineFunction::iterator headerItr(header);
- if (headerItr == mf->begin())
- return true;
- MachineBasicBlock *headerLayoutPred = llvm::prior(headerItr);
- assert(headerLayoutPred != 0 && "Header should have layout pred.");
-
- return (!tii->AnalyzeBranch(*headerLayoutPred, a, b, c));
- }
-
- MachineBasicBlock& LoopSplitter::insertPreHeader(MachineLoop &loop) {
- assert(loop.getLoopPreheader() == 0 && "Loop already has preheader.");
-
- MachineBasicBlock &header = *loop.getHeader();
-
- // Save the preds - we'll need to update them once we insert the preheader.
- typedef std::set<MachineBasicBlock*> HeaderPreds;
- HeaderPreds headerPreds;
-
- for (MachineBasicBlock::pred_iterator predItr = header.pred_begin(),
- predEnd = header.pred_end();
- predItr != predEnd; ++predItr) {
- if (!loop.contains(*predItr))
- headerPreds.insert(*predItr);
- }
-
- assert(!headerPreds.empty() && "No predecessors for header?");
-
- //dbgs() << fqn << " MBB#" << header.getNumber() << " inserting preheader...";
-
- MachineBasicBlock *preHeader =
- mf->CreateMachineBasicBlock(header.getBasicBlock());
-
- assert(preHeader != 0 && "Failed to create pre-header.");
-
- mf->insert(header, preHeader);
-
- for (HeaderPreds::iterator hpItr = headerPreds.begin(),
- hpEnd = headerPreds.end();
- hpItr != hpEnd; ++hpItr) {
- assert(*hpItr != 0 && "How'd a null predecessor get into this set?");
- MachineBasicBlock &hp = **hpItr;
- hp.ReplaceUsesOfBlockWith(&header, preHeader);
- }
- preHeader->addSuccessor(&header);
-
- MachineBasicBlock *oldLayoutPred =
- llvm::prior(MachineFunction::iterator(preHeader));
- if (oldLayoutPred != 0) {
- updateTerminators(*oldLayoutPred);
- }
-
- lis->InsertMBBInMaps(preHeader);
-
- if (MachineLoop *parentLoop = loop.getParentLoop()) {
- assert(parentLoop->getHeader() != loop.getHeader() &&
- "Parent loop has same header?");
- parentLoop->addBasicBlockToLoop(preHeader, mli->getBase());
-
- // Invalidate all parent loop ranges.
- while (parentLoop != 0) {
- loopRangeMap.erase(parentLoop);
- parentLoop = parentLoop->getParentLoop();
- }
- }
-
- for (LiveIntervals::iterator liItr = lis->begin(),
- liEnd = lis->end();
- liItr != liEnd; ++liItr) {
- LiveInterval &li = *liItr->second;
-
- // Is this safe for physregs?
- // TargetRegisterInfo::isPhysicalRegister(li.reg) ||
- if (!lis->isLiveInToMBB(li, &header))
- continue;
-
- if (lis->isLiveInToMBB(li, preHeader)) {
- assert(lis->isLiveOutOfMBB(li, preHeader) &&
- "Range terminates in newly added preheader?");
- continue;
- }
-
- bool insertRange = false;
-
- for (MachineBasicBlock::pred_iterator predItr = preHeader->pred_begin(),
- predEnd = preHeader->pred_end();
- predItr != predEnd; ++predItr) {
- MachineBasicBlock *predMBB = *predItr;
- if (lis->isLiveOutOfMBB(li, predMBB)) {
- insertRange = true;
- break;
- }
- }
-
- if (!insertRange)
- continue;
-
- SlotIndex newDefIdx = lis->getMBBStartIdx(preHeader);
- assert(lis->getInstructionFromIndex(newDefIdx) == 0 &&
- "PHI def index points at actual instruction.");
- VNInfo *newVal = li.getNextValue(newDefIdx, 0, lis->getVNInfoAllocator());
- li.addRange(LiveRange(lis->getMBBStartIdx(preHeader),
- lis->getMBBEndIdx(preHeader),
- newVal));
- }
-
-
- //dbgs() << "Dumping SlotIndexes:\n";
- //sis->dump();
-
- //dbgs() << "done. (Added MBB#" << preHeader->getNumber() << ")\n";
-
- return *preHeader;
- }
-
- bool LoopSplitter::isCriticalEdge(MachineLoop::Edge &edge) {
- assert(edge.first->succ_size() > 1 && "Non-sensical edge.");
- if (edge.second->pred_size() > 1)
- return true;
- return false;
- }
-
- bool LoopSplitter::canSplitEdge(MachineLoop::Edge &edge) {
- MachineFunction::iterator outBlockItr(edge.second);
- if (outBlockItr == mf->begin())
- return true;
- MachineBasicBlock *outBlockLayoutPred = llvm::prior(outBlockItr);
- assert(outBlockLayoutPred != 0 && "Should have a layout pred if out!=begin.");
- MachineBasicBlock *a = 0, *b = 0;
- SmallVector<MachineOperand, 4> c;
- return (!tii->AnalyzeBranch(*outBlockLayoutPred, a, b, c) &&
- !tii->AnalyzeBranch(*edge.first, a, b, c));
- }
-
- MachineBasicBlock& LoopSplitter::splitEdge(MachineLoop::Edge &edge,
- MachineLoop &loop) {
-
- MachineBasicBlock &inBlock = *edge.first;
- MachineBasicBlock &outBlock = *edge.second;
-
- assert((inBlock.succ_size() > 1) && (outBlock.pred_size() > 1) &&
- "Splitting non-critical edge?");
-
- //dbgs() << fqn << " Splitting edge (MBB#" << inBlock.getNumber()
- // << " -> MBB#" << outBlock.getNumber() << ")...";
-
- MachineBasicBlock *splitBlock =
- mf->CreateMachineBasicBlock();
-
- assert(splitBlock != 0 && "Failed to create split block.");
-
- mf->insert(&outBlock, splitBlock);
-
- inBlock.ReplaceUsesOfBlockWith(&outBlock, splitBlock);
- splitBlock->addSuccessor(&outBlock);
-
- MachineBasicBlock *oldLayoutPred =
- llvm::prior(MachineFunction::iterator(splitBlock));
- if (oldLayoutPred != 0) {
- updateTerminators(*oldLayoutPred);
- }
-
- lis->InsertMBBInMaps(splitBlock);
-
- loopRangeMap.erase(&loop);
-
- MachineLoop *splitParentLoop = loop.getParentLoop();
- while (splitParentLoop != 0 &&
- !splitParentLoop->contains(&outBlock)) {
- splitParentLoop = splitParentLoop->getParentLoop();
- }
-
- if (splitParentLoop != 0) {
- assert(splitParentLoop->contains(&loop) &&
- "Split-block parent doesn't contain original loop?");
- splitParentLoop->addBasicBlockToLoop(splitBlock, mli->getBase());
-
- // Invalidate all parent loop ranges.
- while (splitParentLoop != 0) {
- loopRangeMap.erase(splitParentLoop);
- splitParentLoop = splitParentLoop->getParentLoop();
- }
- }
-
-
- for (LiveIntervals::iterator liItr = lis->begin(),
- liEnd = lis->end();
- liItr != liEnd; ++liItr) {
- LiveInterval &li = *liItr->second;
- bool intersects = lis->isLiveOutOfMBB(li, &inBlock) &&
- lis->isLiveInToMBB(li, &outBlock);
- if (lis->isLiveInToMBB(li, splitBlock)) {
- if (!intersects) {
- li.removeRange(lis->getMBBStartIdx(splitBlock),
- lis->getMBBEndIdx(splitBlock), true);
- }
- } else if (intersects) {
- SlotIndex newDefIdx = lis->getMBBStartIdx(splitBlock);
- assert(lis->getInstructionFromIndex(newDefIdx) == 0 &&
- "PHI def index points at actual instruction.");
- VNInfo *newVal = li.getNextValue(newDefIdx, 0,
- lis->getVNInfoAllocator());
- li.addRange(LiveRange(lis->getMBBStartIdx(splitBlock),
- lis->getMBBEndIdx(splitBlock),
- newVal));
- }
- }
-
- //dbgs() << "done. (Added MBB#" << splitBlock->getNumber() << ")\n";
-
- return *splitBlock;
- }
-
- LoopSplitter::LoopRanges& LoopSplitter::getLoopRanges(MachineLoop &loop) {
- typedef std::set<MachineBasicBlock*, StartSlotComparator> LoopMBBSet;
- LoopRangeMap::iterator lrItr = loopRangeMap.find(&loop);
- if (lrItr == loopRangeMap.end()) {
- LoopMBBSet loopMBBs((StartSlotComparator(*lis)));
- std::copy(loop.block_begin(), loop.block_end(),
- std::inserter(loopMBBs, loopMBBs.begin()));
-
- assert(!loopMBBs.empty() && "No blocks in loop?");
-
- LoopRanges &loopRanges = loopRangeMap[&loop];
- assert(loopRanges.empty() && "Loop encountered but not processed?");
- SlotIndex oldEnd = lis->getMBBEndIdx(*loopMBBs.begin());
- loopRanges.push_back(
- std::make_pair(lis->getMBBStartIdx(*loopMBBs.begin()),
- lis->getInvalidIndex()));
- for (LoopMBBSet::iterator curBlockItr = llvm::next(loopMBBs.begin()),
- curBlockEnd = loopMBBs.end();
- curBlockItr != curBlockEnd; ++curBlockItr) {
- SlotIndex newStart = lis->getMBBStartIdx(*curBlockItr);
- if (newStart != oldEnd) {
- loopRanges.back().second = oldEnd;
- loopRanges.push_back(std::make_pair(newStart,
- lis->getInvalidIndex()));
- }
- oldEnd = lis->getMBBEndIdx(*curBlockItr);
- }
-
- loopRanges.back().second =
- lis->getMBBEndIdx(*llvm::prior(loopMBBs.end()));
-
- return loopRanges;
- }
- return lrItr->second;
- }
-
- std::pair<bool, LoopSplitter::SlotPair> LoopSplitter::getLoopSubRange(
- const LiveRange &lr,
- MachineLoop &loop) {
- LoopRanges &loopRanges = getLoopRanges(loop);
- LoopRanges::iterator lrItr = loopRanges.begin(),
- lrEnd = loopRanges.end();
- while (lrItr != lrEnd && lr.start >= lrItr->second) {
- ++lrItr;
- }
-
- if (lrItr == lrEnd) {
- SlotIndex invalid = lis->getInvalidIndex();
- return std::make_pair(false, SlotPair(invalid, invalid));
- }
-
- SlotIndex srStart(lr.start < lrItr->first ? lrItr->first : lr.start);
- SlotIndex srEnd(lr.end > lrItr->second ? lrItr->second : lr.end);
-
- return std::make_pair(true, SlotPair(srStart, srEnd));
- }
-
- void LoopSplitter::dumpLoopRanges(MachineLoop &loop) {
- LoopRanges &loopRanges = getLoopRanges(loop);
- dbgs() << "For loop MBB#" << loop.getHeader()->getNumber() << ", subranges are: [ ";
- for (LoopRanges::iterator lrItr = loopRanges.begin(), lrEnd = loopRanges.end();
- lrItr != lrEnd; ++lrItr) {
- dbgs() << "[" << lrItr->first << ", " << lrItr->second << ") ";
- }
- dbgs() << "]\n";
- }
-
- void LoopSplitter::processHeader(LoopSplit &split) {
- MachineBasicBlock &header = *split.getLoop().getHeader();
- //dbgs() << " Processing loop header BB#" << header.getNumber() << "\n";
-
- if (!lis->isLiveInToMBB(split.getLI(), &header))
- return; // Not live in, but nothing wrong so far.
-
- MachineBasicBlock *preHeader = split.getLoop().getLoopPreheader();
- if (!preHeader) {
-
- if (!canInsertPreHeader(split.getLoop())) {
- split.invalidate();
- return; // Couldn't insert a pre-header. Bail on this interval.
- }
-
- for (MachineBasicBlock::pred_iterator predItr = header.pred_begin(),
- predEnd = header.pred_end();
- predItr != predEnd; ++predItr) {
- if (lis->isLiveOutOfMBB(split.getLI(), *predItr)) {
- split.splitIncoming();
- break;
- }
- }
- } else if (lis->isLiveOutOfMBB(split.getLI(), preHeader)) {
- split.splitIncoming();
- }
- }
-
- void LoopSplitter::processLoopExits(LoopSplit &split) {
- typedef SmallVector<MachineLoop::Edge, 8> ExitEdgesList;
- ExitEdgesList exitEdges;
- split.getLoop().getExitEdges(exitEdges);
-
- //dbgs() << " Processing loop exits:\n";
-
- for (ExitEdgesList::iterator exitEdgeItr = exitEdges.begin(),
- exitEdgeEnd = exitEdges.end();
- exitEdgeItr != exitEdgeEnd; ++exitEdgeItr) {
- MachineLoop::Edge exitEdge = *exitEdgeItr;
-
- LiveRange *outRange =
- split.getLI().getLiveRangeContaining(lis->getMBBStartIdx(exitEdge.second));
-
- if (outRange != 0) {
- if (isCriticalEdge(exitEdge) && !canSplitEdge(exitEdge)) {
- split.invalidate();
- return;
- }
-
- split.splitOutgoing(exitEdge);
- }
- }
- }
-
- void LoopSplitter::processLoopUses(LoopSplit &split) {
- std::set<MachineInstr*> processed;
-
- for (MachineRegisterInfo::reg_iterator
- rItr = mri->reg_begin(split.getLI().reg),
- rEnd = mri->reg_end();
- rItr != rEnd; ++rItr) {
- MachineInstr &instr = *rItr;
- if (split.getLoop().contains(&instr) && processed.count(&instr) == 0) {
- split.addLoopInstr(&instr);
- processed.insert(&instr);
- }
- }
-
- //dbgs() << " Rewriting reg" << li.reg << " to reg" << newLI->reg
- // << " in blocks [ ";
- //dbgs() << "]\n";
- }
-
- bool LoopSplitter::splitOverLoop(LiveInterval &li, MachineLoop &loop) {
- assert(TargetRegisterInfo::isVirtualRegister(li.reg) &&
- "Attempt to split physical register.");
-
- LoopSplit split(*this, li, loop);
- processHeader(split);
- if (split.isValid())
- processLoopExits(split);
- if (split.isValid())
- processLoopUses(split);
- if (split.isValid() /* && split.isWorthwhile() */) {
- split.apply();
- DEBUG(dbgs() << "Success.\n");
- return true;
- }
- DEBUG(dbgs() << "Failed.\n");
- return false;
- }
-
- void LoopSplitter::processInterval(LiveInterval &li) {
- std::deque<MachineLoop*> loops;
- std::copy(mli->begin(), mli->end(), std::back_inserter(loops));
-
- while (!loops.empty()) {
- MachineLoop &loop = *loops.front();
- loops.pop_front();
- DEBUG(
- dbgs() << fqn << " reg" << li.reg << " " << li.weight << " BB#"
- << loop.getHeader()->getNumber() << " ";
- );
- if (!splitOverLoop(li, loop)) {
- // Couldn't split over outer loop, schedule sub-loops to be checked.
- std::copy(loop.begin(), loop.end(), std::back_inserter(loops));
- }
- }
- }
-
- void LoopSplitter::processIntervals() {
- while (!intervals.empty()) {
- LiveInterval &li = *intervals.front();
- intervals.pop_front();
-
- assert(!lis->intervalIsInOneMBB(li) &&
- "Single interval in process worklist.");
-
- processInterval(li);
- }
- }
-
-}
diff --git a/lib/CodeGen/Splitter.h b/lib/CodeGen/Splitter.h
deleted file mode 100644
index 9fb1b8b30139..000000000000
--- a/lib/CodeGen/Splitter.h
+++ /dev/null
@@ -1,101 +0,0 @@
-//===-- llvm/CodeGen/Splitter.h - Splitter -*- C++ -*----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_SPLITTER_H
-#define LLVM_CODEGEN_SPLITTER_H
-
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/SlotIndexes.h"
-
-#include <deque>
-#include <map>
-#include <string>
-#include <vector>
-
-namespace llvm {
-
- class LiveInterval;
- class LiveIntervals;
- struct LiveRange;
- class LoopSplit;
- class MachineDominatorTree;
- class MachineRegisterInfo;
- class SlotIndexes;
- class TargetInstrInfo;
- class VNInfo;
-
- class LoopSplitter : public MachineFunctionPass {
- friend class LoopSplit;
- public:
- static char ID;
-
- LoopSplitter() : MachineFunctionPass(ID) {
- initializeLoopSplitterPass(*PassRegistry::getPassRegistry());
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &au) const;
-
- virtual bool runOnMachineFunction(MachineFunction &fn);
-
- virtual void releaseMemory();
-
-
- private:
-
- MachineFunction *mf;
- LiveIntervals *lis;
- MachineLoopInfo *mli;
- MachineRegisterInfo *mri;
- MachineDominatorTree *mdt;
- SlotIndexes *sis;
- const TargetInstrInfo *tii;
- const TargetRegisterInfo *tri;
-
- std::string fqn;
- std::deque<LiveInterval*> intervals;
-
- typedef std::pair<SlotIndex, SlotIndex> SlotPair;
- typedef std::vector<SlotPair> LoopRanges;
- typedef std::map<MachineLoop*, LoopRanges> LoopRangeMap;
- LoopRangeMap loopRangeMap;
-
- void dumpLoopInfo(MachineLoop &loop);
-
- void dumpOddTerminators();
-
- void updateTerminators(MachineBasicBlock &mbb);
-
- bool canInsertPreHeader(MachineLoop &loop);
- MachineBasicBlock& insertPreHeader(MachineLoop &loop);
-
- bool isCriticalEdge(MachineLoop::Edge &edge);
- bool canSplitEdge(MachineLoop::Edge &edge);
- MachineBasicBlock& splitEdge(MachineLoop::Edge &edge, MachineLoop &loop);
-
- LoopRanges& getLoopRanges(MachineLoop &loop);
- std::pair<bool, SlotPair> getLoopSubRange(const LiveRange &lr,
- MachineLoop &loop);
-
- void dumpLoopRanges(MachineLoop &loop);
-
- void processHeader(LoopSplit &split);
- void processLoopExits(LoopSplit &split);
- void processLoopUses(LoopSplit &split);
-
- bool splitOverLoop(LiveInterval &li, MachineLoop &loop);
-
- void processInterval(LiveInterval &li);
-
- void processIntervals();
- };
-
-}
-
-#endif
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index 1f0e5a2711ae..43a6ad8c97a4 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -123,16 +123,11 @@ bool StackProtector::RequiresStackProtector() const {
// protectors.
return true;
- if (ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) {
- // We apparently only care about character arrays.
- if (!AT->getElementType()->isIntegerTy(8))
- continue;
-
+ if (ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType()))
// If an array has more than SSPBufferSize bytes of allocated space,
// then we emit stack protectors.
if (SSPBufferSize <= TD->getTypeAllocSize(AT))
return true;
- }
}
}
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index 57cbe1ba5960..1e940b1d0711 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "stackcoloring"
-#include "VirtRegMap.h"
#include "llvm/Function.h"
#include "llvm/Module.h"
#include "llvm/CodeGen/Passes.h"
@@ -40,29 +39,17 @@ DisableSharing("no-stack-slot-sharing",
cl::init(false), cl::Hidden,
cl::desc("Suppress slot sharing during stack coloring"));
-static cl::opt<bool>
-ColorWithRegsOpt("color-ss-with-regs",
- cl::init(false), cl::Hidden,
- cl::desc("Color stack slots with free registers"));
-
-
static cl::opt<int> DCELimit("ssc-dce-limit", cl::init(-1), cl::Hidden);
STATISTIC(NumEliminated, "Number of stack slots eliminated due to coloring");
-STATISTIC(NumRegRepl, "Number of stack slot refs replaced with reg refs");
-STATISTIC(NumLoadElim, "Number of loads eliminated");
-STATISTIC(NumStoreElim, "Number of stores eliminated");
STATISTIC(NumDead, "Number of trivially dead stack accesses eliminated");
namespace {
class StackSlotColoring : public MachineFunctionPass {
bool ColorWithRegs;
LiveStacks* LS;
- VirtRegMap* VRM;
MachineFrameInfo *MFI;
- MachineRegisterInfo *MRI;
const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
const MachineLoopInfo *loopInfo;
// SSIntervals - Spill slot intervals.
@@ -98,18 +85,12 @@ namespace {
MachineFunctionPass(ID), ColorWithRegs(false), NextColor(-1) {
initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
}
- StackSlotColoring(bool RegColor) :
- MachineFunctionPass(ID), ColorWithRegs(RegColor), NextColor(-1) {
- initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
- }
-
+
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<SlotIndexes>();
AU.addPreserved<SlotIndexes>();
AU.addRequired<LiveStacks>();
- AU.addRequired<VirtRegMap>();
- AU.addPreserved<VirtRegMap>();
AU.addRequired<MachineLoopInfo>();
AU.addPreserved<MachineLoopInfo>();
AU.addPreservedID(MachineDominatorsID);
@@ -117,9 +98,6 @@ namespace {
}
virtual bool runOnMachineFunction(MachineFunction &MF);
- virtual const char* getPassName() const {
- return "Stack Slot Coloring";
- }
private:
void InitializeSlots();
@@ -127,41 +105,23 @@ namespace {
bool OverlapWithAssignments(LiveInterval *li, int Color) const;
int ColorSlot(LiveInterval *li);
bool ColorSlots(MachineFunction &MF);
- bool ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping,
- SmallVector<SmallVector<int, 4>, 16> &RevMap,
- BitVector &SlotIsReg);
void RewriteInstruction(MachineInstr *MI, int OldFI, int NewFI,
MachineFunction &MF);
- bool PropagateBackward(MachineBasicBlock::iterator MII,
- MachineBasicBlock *MBB,
- unsigned OldReg, unsigned NewReg);
- bool PropagateForward(MachineBasicBlock::iterator MII,
- MachineBasicBlock *MBB,
- unsigned OldReg, unsigned NewReg);
- void UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
- unsigned Reg, const TargetRegisterClass *RC,
- SmallSet<unsigned, 4> &Defs,
- MachineFunction &MF);
- bool AllMemRefsCanBeUnfolded(int SS);
bool RemoveDeadStores(MachineBasicBlock* MBB);
};
} // end anonymous namespace
char StackSlotColoring::ID = 0;
+char &llvm::StackSlotColoringID = StackSlotColoring::ID;
INITIALIZE_PASS_BEGIN(StackSlotColoring, "stack-slot-coloring",
"Stack Slot Coloring", false, false)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
INITIALIZE_PASS_DEPENDENCY(LiveStacks)
-INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_END(StackSlotColoring, "stack-slot-coloring",
"Stack Slot Coloring", false, false)
-FunctionPass *llvm::createStackSlotColoringPass(bool RegColor) {
- return new StackSlotColoring(RegColor);
-}
-
namespace {
// IntervalSorter - Comparison predicate that sort live intervals by
// their weight.
@@ -248,79 +208,6 @@ StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const {
return false;
}
-/// ColorSlotsWithFreeRegs - If there are any free registers available, try
-/// replacing spill slots references with registers instead.
-bool
-StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping,
- SmallVector<SmallVector<int, 4>, 16> &RevMap,
- BitVector &SlotIsReg) {
- if (!(ColorWithRegs || ColorWithRegsOpt) || !VRM->HasUnusedRegisters())
- return false;
-
- bool Changed = false;
- DEBUG(dbgs() << "Assigning unused registers to spill slots:\n");
- for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
- LiveInterval *li = SSIntervals[i];
- int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
- if (!UsedColors[SS] || li->weight < 20)
- // If the weight is < 20, i.e. two references in a loop with depth 1,
- // don't bother with it.
- continue;
-
- // These slots allow to share the same registers.
- bool AllColored = true;
- SmallVector<unsigned, 4> ColoredRegs;
- for (unsigned j = 0, ee = RevMap[SS].size(); j != ee; ++j) {
- int RSS = RevMap[SS][j];
- const TargetRegisterClass *RC = LS->getIntervalRegClass(RSS);
- // If it's not colored to another stack slot, try coloring it
- // to a "free" register.
- if (!RC) {
- AllColored = false;
- continue;
- }
- unsigned Reg = VRM->getFirstUnusedRegister(RC);
- if (!Reg) {
- AllColored = false;
- continue;
- }
- if (!AllMemRefsCanBeUnfolded(RSS)) {
- AllColored = false;
- continue;
- } else {
- DEBUG(dbgs() << "Assigning fi#" << RSS << " to "
- << TRI->getName(Reg) << '\n');
- ColoredRegs.push_back(Reg);
- SlotMapping[RSS] = Reg;
- SlotIsReg.set(RSS);
- Changed = true;
- }
- }
-
- // Register and its sub-registers are no longer free.
- while (!ColoredRegs.empty()) {
- unsigned Reg = ColoredRegs.back();
- ColoredRegs.pop_back();
- VRM->setRegisterUsed(Reg);
- // If reg is a callee-saved register, it will have to be spilled in
- // the prologue.
- MRI->setPhysRegUsed(Reg);
- for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
- VRM->setRegisterUsed(*AS);
- MRI->setPhysRegUsed(*AS);
- }
- }
- // This spill slot is dead after the rewrites
- if (AllColored) {
- MFI->RemoveStackObject(SS);
- ++NumEliminated;
- }
- }
- DEBUG(dbgs() << '\n');
-
- return Changed;
-}
-
/// ColorSlot - Assign a "color" (stack slot) to the specified stack slot.
///
int StackSlotColoring::ColorSlot(LiveInterval *li) {
@@ -372,7 +259,6 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
SmallVector<int, 16> SlotMapping(NumObjs, -1);
SmallVector<float, 16> SlotWeights(NumObjs, 0.0);
SmallVector<SmallVector<int, 4>, 16> RevMap(NumObjs);
- BitVector SlotIsReg(NumObjs);
BitVector UsedColors(NumObjs);
DEBUG(dbgs() << "Color spill slot intervals:\n");
@@ -404,31 +290,19 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
DEBUG(dbgs() << '\n');
#endif
- // Can we "color" a stack slot with a unused register?
- Changed |= ColorSlotsWithFreeRegs(SlotMapping, RevMap, SlotIsReg);
-
if (!Changed)
return false;
// Rewrite all MO_FrameIndex operands.
SmallVector<SmallSet<unsigned, 4>, 4> NewDefs(MF.getNumBlockIDs());
for (unsigned SS = 0, SE = SSRefs.size(); SS != SE; ++SS) {
- bool isReg = SlotIsReg[SS];
int NewFI = SlotMapping[SS];
- if (NewFI == -1 || (NewFI == (int)SS && !isReg))
+ if (NewFI == -1 || (NewFI == (int)SS))
continue;
- const TargetRegisterClass *RC = LS->getIntervalRegClass(SS);
SmallVector<MachineInstr*, 8> &RefMIs = SSRefs[SS];
for (unsigned i = 0, e = RefMIs.size(); i != e; ++i)
- if (!isReg)
- RewriteInstruction(RefMIs[i], SS, NewFI, MF);
- else {
- // Rewrite to use a register instead.
- unsigned MBBId = RefMIs[i]->getParent()->getNumber();
- SmallSet<unsigned, 4> &Defs = NewDefs[MBBId];
- UnfoldAndRewriteInstruction(RefMIs[i], SS, NewFI, RC, Defs, MF);
- }
+ RewriteInstruction(RefMIs[i], SS, NewFI, MF);
}
// Delete unused stack slots.
@@ -441,28 +315,6 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
return true;
}
-/// AllMemRefsCanBeUnfolded - Return true if all references of the specified
-/// spill slot index can be unfolded.
-bool StackSlotColoring::AllMemRefsCanBeUnfolded(int SS) {
- SmallVector<MachineInstr*, 8> &RefMIs = SSRefs[SS];
- for (unsigned i = 0, e = RefMIs.size(); i != e; ++i) {
- MachineInstr *MI = RefMIs[i];
- if (TII->isLoadFromStackSlot(MI, SS) ||
- TII->isStoreToStackSlot(MI, SS))
- // Restore and spill will become copies.
- return true;
- if (!TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), false, false))
- return false;
- for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
- MachineOperand &MO = MI->getOperand(j);
- if (MO.isFI() && MO.getIndex() != SS)
- // If it uses another frameindex, we can, currently* unfold it.
- return false;
- }
- }
- return true;
-}
-
/// RewriteInstruction - Rewrite specified instruction by replacing references
/// to old frame index with new one.
void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI,
@@ -489,179 +341,6 @@ void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI,
(*I)->setValue(NewSV);
}
-/// PropagateBackward - Traverse backward and look for the definition of
-/// OldReg. If it can successfully update all of the references with NewReg,
-/// do so and return true.
-bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII,
- MachineBasicBlock *MBB,
- unsigned OldReg, unsigned NewReg) {
- if (MII == MBB->begin())
- return false;
-
- SmallVector<MachineOperand*, 4> Uses;
- SmallVector<MachineOperand*, 4> Refs;
- while (--MII != MBB->begin()) {
- bool FoundDef = false; // Not counting 2address def.
-
- Uses.clear();
- const MCInstrDesc &MCID = MII->getDesc();
- for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MII->getOperand(i);
- if (!MO.isReg())
- continue;
- unsigned Reg = MO.getReg();
- if (Reg == 0)
- continue;
- if (Reg == OldReg) {
- if (MO.isImplicit())
- return false;
-
- // Abort the use is actually a sub-register def. We don't have enough
- // information to figure out if it is really legal.
- if (MO.getSubReg() || MII->isSubregToReg())
- return false;
-
- const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI);
- if (RC && !RC->contains(NewReg))
- return false;
-
- if (MO.isUse()) {
- Uses.push_back(&MO);
- } else {
- Refs.push_back(&MO);
- if (!MII->isRegTiedToUseOperand(i))
- FoundDef = true;
- }
- } else if (TRI->regsOverlap(Reg, NewReg)) {
- return false;
- } else if (TRI->regsOverlap(Reg, OldReg)) {
- if (!MO.isUse() || !MO.isKill())
- return false;
- }
- }
-
- if (FoundDef) {
- // Found non-two-address def. Stop here.
- for (unsigned i = 0, e = Refs.size(); i != e; ++i)
- Refs[i]->setReg(NewReg);
- return true;
- }
-
- // Two-address uses must be updated as well.
- for (unsigned i = 0, e = Uses.size(); i != e; ++i)
- Refs.push_back(Uses[i]);
- }
- return false;
-}
-
-/// PropagateForward - Traverse forward and look for the kill of OldReg. If
-/// it can successfully update all of the uses with NewReg, do so and
-/// return true.
-bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII,
- MachineBasicBlock *MBB,
- unsigned OldReg, unsigned NewReg) {
- if (MII == MBB->end())
- return false;
-
- SmallVector<MachineOperand*, 4> Uses;
- while (++MII != MBB->end()) {
- bool FoundKill = false;
- const MCInstrDesc &MCID = MII->getDesc();
- for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MII->getOperand(i);
- if (!MO.isReg())
- continue;
- unsigned Reg = MO.getReg();
- if (Reg == 0)
- continue;
- if (Reg == OldReg) {
- if (MO.isDef() || MO.isImplicit())
- return false;
-
- // Abort the use is actually a sub-register use. We don't have enough
- // information to figure out if it is really legal.
- if (MO.getSubReg())
- return false;
-
- const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI);
- if (RC && !RC->contains(NewReg))
- return false;
- if (MO.isKill())
- FoundKill = true;
-
- Uses.push_back(&MO);
- } else if (TRI->regsOverlap(Reg, NewReg) ||
- TRI->regsOverlap(Reg, OldReg))
- return false;
- }
- if (FoundKill) {
- for (unsigned i = 0, e = Uses.size(); i != e; ++i)
- Uses[i]->setReg(NewReg);
- return true;
- }
- }
- return false;
-}
-
-/// UnfoldAndRewriteInstruction - Rewrite specified instruction by unfolding
-/// folded memory references and replacing those references with register
-/// references instead.
-void
-StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
- unsigned Reg,
- const TargetRegisterClass *RC,
- SmallSet<unsigned, 4> &Defs,
- MachineFunction &MF) {
- MachineBasicBlock *MBB = MI->getParent();
- if (unsigned DstReg = TII->isLoadFromStackSlot(MI, OldFI)) {
- if (PropagateForward(MI, MBB, DstReg, Reg)) {
- DEBUG(dbgs() << "Eliminated load: ");
- DEBUG(MI->dump());
- ++NumLoadElim;
- } else {
- BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY),
- DstReg).addReg(Reg);
- ++NumRegRepl;
- }
-
- if (!Defs.count(Reg)) {
- // If this is the first use of Reg in this MBB and it wasn't previously
- // defined in MBB, add it to livein.
- MBB->addLiveIn(Reg);
- Defs.insert(Reg);
- }
- } else if (unsigned SrcReg = TII->isStoreToStackSlot(MI, OldFI)) {
- if (MI->killsRegister(SrcReg) && PropagateBackward(MI, MBB, SrcReg, Reg)) {
- DEBUG(dbgs() << "Eliminated store: ");
- DEBUG(MI->dump());
- ++NumStoreElim;
- } else {
- BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY), Reg)
- .addReg(SrcReg);
- ++NumRegRepl;
- }
-
- // Remember reg has been defined in MBB.
- Defs.insert(Reg);
- } else {
- SmallVector<MachineInstr*, 4> NewMIs;
- bool Success = TII->unfoldMemoryOperand(MF, MI, Reg, false, false, NewMIs);
- (void)Success; // Silence compiler warning.
- assert(Success && "Failed to unfold!");
- MachineInstr *NewMI = NewMIs[0];
- MBB->insert(MI, NewMI);
- ++NumRegRepl;
-
- if (NewMI->readsRegister(Reg)) {
- if (!Defs.count(Reg))
- // If this is the first use of Reg in this MBB and it wasn't previously
- // defined in MBB, add it to livein.
- MBB->addLiveIn(Reg);
- Defs.insert(Reg);
- }
- }
- MBB->erase(MI);
-}
/// RemoveDeadStores - Scan through a basic block and look for loads followed
/// by stores. If they're both using the same stack slot, then the store is
@@ -679,33 +358,33 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
I != E; ++I) {
if (DCELimit != -1 && (int)NumDead >= DCELimit)
break;
-
+
MachineBasicBlock::iterator NextMI = llvm::next(I);
if (NextMI == MBB->end()) continue;
-
+
int FirstSS, SecondSS;
unsigned LoadReg = 0;
unsigned StoreReg = 0;
if (!(LoadReg = TII->isLoadFromStackSlot(I, FirstSS))) continue;
if (!(StoreReg = TII->isStoreToStackSlot(NextMI, SecondSS))) continue;
if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1) continue;
-
+
++NumDead;
changed = true;
-
+
if (NextMI->findRegisterUseOperandIdx(LoadReg, true, 0) != -1) {
++NumDead;
toErase.push_back(I);
}
-
+
toErase.push_back(NextMI);
++I;
}
-
+
for (SmallVector<MachineInstr*, 4>::iterator I = toErase.begin(),
E = toErase.end(); I != E; ++I)
(*I)->eraseFromParent();
-
+
return changed;
}
@@ -713,32 +392,27 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
DEBUG({
dbgs() << "********** Stack Slot Coloring **********\n"
- << "********** Function: "
+ << "********** Function: "
<< MF.getFunction()->getName() << '\n';
});
MFI = MF.getFrameInfo();
- MRI = &MF.getRegInfo();
TII = MF.getTarget().getInstrInfo();
- TRI = MF.getTarget().getRegisterInfo();
LS = &getAnalysis<LiveStacks>();
- VRM = &getAnalysis<VirtRegMap>();
loopInfo = &getAnalysis<MachineLoopInfo>();
bool Changed = false;
unsigned NumSlots = LS->getNumIntervals();
- if (NumSlots < 2) {
- if (NumSlots == 0 || !VRM->HasUnusedRegisters())
- // Nothing to do!
- return false;
- }
+ if (NumSlots == 0)
+ // Nothing to do!
+ return false;
// If there are calls to setjmp or sigsetjmp, don't perform stack slot
// coloring. The stack could be modified before the longjmp is executed,
// resulting in the wrong value being used afterwards. (See
// <rdar://problem/8007500>.)
- if (MF.callsSetJmp())
+ if (MF.exposesReturnsTwice())
return false;
// Gather spill slot references
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
index 260cc0ee50a5..c6fdc7382435 100644
--- a/lib/CodeGen/StrongPHIElimination.cpp
+++ b/lib/CodeGen/StrongPHIElimination.cpp
@@ -228,7 +228,6 @@ static MachineOperand *findLastUse(MachineBasicBlock *MBB, unsigned Reg) {
return &MO;
}
}
- return NULL;
}
bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) {
@@ -390,12 +389,10 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) {
MachineOperand *LastUse = findLastUse(MBB, SrcReg);
assert(LastUse);
SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent());
- SrcLI.removeRange(LastUseIndex.getDefIndex(), LI->getMBBEndIdx(MBB));
+ SrcLI.removeRange(LastUseIndex.getRegSlot(), LI->getMBBEndIdx(MBB));
LastUse->setIsKill(true);
}
- LI->renumber();
-
Allocator.Reset();
RegNodeMap.clear();
PHISrcDefs.clear();
@@ -745,7 +742,7 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
// Set the phi-def flag for the VN at this PHI.
SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
- VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getDefIndex());
+ VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getRegSlot());
assert(DestVNI);
DestVNI->setIsPHIDef(true);
@@ -756,7 +753,7 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
DestVNI->def = MBBStartIndex;
DestLI.addRange(LiveRange(MBBStartIndex,
- PHIIndex.getDefIndex(),
+ PHIIndex.getRegSlot(),
DestVNI));
return;
}
@@ -779,22 +776,21 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr);
VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex,
- CopyInstr,
LI->getVNInfoAllocator());
CopyVNI->setIsPHIDef(true);
CopyLI.addRange(LiveRange(MBBStartIndex,
- DestCopyIndex.getDefIndex(),
+ DestCopyIndex.getRegSlot(),
CopyVNI));
// Adjust DestReg's live interval to adjust for its new definition at
// CopyInstr.
LiveInterval &DestLI = LI->getOrCreateInterval(DestReg);
SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
- DestLI.removeRange(PHIIndex.getDefIndex(), DestCopyIndex.getDefIndex());
+ DestLI.removeRange(PHIIndex.getRegSlot(), DestCopyIndex.getRegSlot());
- VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getDefIndex());
+ VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot());
assert(DestVNI);
- DestVNI->def = DestCopyIndex.getDefIndex();
+ DestVNI->def = DestCopyIndex.getRegSlot();
InsertedDestCopies[CopyReg] = CopyInstr;
}
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index 3a6211a0f3e6..8ebfbcae785b 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -56,10 +56,10 @@ typedef std::vector<std::pair<MachineBasicBlock*,unsigned> > AvailableValsTy;
namespace {
/// TailDuplicatePass - Perform tail duplication.
class TailDuplicatePass : public MachineFunctionPass {
- bool PreRegAlloc;
const TargetInstrInfo *TII;
MachineModuleInfo *MMI;
MachineRegisterInfo *MRI;
+ bool PreRegAlloc;
// SSAUpdateVRs - A list of virtual registers for which to update SSA form.
SmallVector<unsigned, 16> SSAUpdateVRs;
@@ -70,11 +70,10 @@ namespace {
public:
static char ID;
- explicit TailDuplicatePass(bool PreRA) :
- MachineFunctionPass(ID), PreRegAlloc(PreRA) {}
+ explicit TailDuplicatePass() :
+ MachineFunctionPass(ID), PreRegAlloc(false) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
- virtual const char *getPassName() const { return "Tail Duplication"; }
private:
void AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
@@ -118,14 +117,16 @@ namespace {
char TailDuplicatePass::ID = 0;
}
-FunctionPass *llvm::createTailDuplicatePass(bool PreRegAlloc) {
- return new TailDuplicatePass(PreRegAlloc);
-}
+char &llvm::TailDuplicateID = TailDuplicatePass::ID;
+
+INITIALIZE_PASS(TailDuplicatePass, "tailduplication", "Tail Duplication",
+ false, false)
bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getTarget().getInstrInfo();
MRI = &MF.getRegInfo();
MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ PreRegAlloc = MRI->isSSA();
bool MadeChange = false;
while (TailDuplicateBlocks(MF))
@@ -432,7 +433,7 @@ void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI,
MO.setReg(VI->second);
}
}
- PredBB->insert(PredBB->end(), NewMI);
+ PredBB->insert(PredBB->instr_end(), NewMI);
}
/// UpdateSuccessorsPHIs - After FromBB is tail duplicated into its predecessor
@@ -553,7 +554,7 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
bool HasIndirectbr = false;
if (!TailBB.empty())
- HasIndirectbr = TailBB.back().getDesc().isIndirectBranch();
+ HasIndirectbr = TailBB.back().isIndirectBranch();
if (HasIndirectbr && PreRegAlloc)
MaxDuplicateCount = 20;
@@ -561,22 +562,21 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
// Check the instructions in the block to determine whether tail-duplication
// is invalid or unlikely to be profitable.
unsigned InstrCount = 0;
- for (MachineBasicBlock::const_iterator I = TailBB.begin(); I != TailBB.end();
- ++I) {
+ for (MachineBasicBlock::iterator I = TailBB.begin(); I != TailBB.end(); ++I) {
// Non-duplicable things shouldn't be tail-duplicated.
- if (I->getDesc().isNotDuplicable())
+ if (I->isNotDuplicable())
return false;
// Do not duplicate 'return' instructions if this is a pre-regalloc run.
// A return may expand into a lot more instructions (e.g. reload of callee
// saved registers) after PEI.
- if (PreRegAlloc && I->getDesc().isReturn())
+ if (PreRegAlloc && I->isReturn())
return false;
// Avoid duplicating calls before register allocation. Calls presents a
// barrier to register allocation so duplicating them may end up increasing
// spills.
- if (PreRegAlloc && I->getDesc().isCall())
+ if (PreRegAlloc && I->isCall())
return false;
if (!I->isPHI() && !I->isDebugValue())
@@ -611,7 +611,7 @@ TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) {
++I;
if (I == E)
return true;
- return I->getDesc().isUnconditionalBranch();
+ return I->isUnconditionalBranch();
}
static bool
@@ -778,8 +778,10 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
// Clone the contents of TailBB into PredBB.
DenseMap<unsigned, unsigned> LocalVRMap;
SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
- MachineBasicBlock::iterator I = TailBB->begin();
- while (I != TailBB->end()) {
+ // Use instr_iterator here to properly handle bundles, e.g.
+ // ARM Thumb2 IT block.
+ MachineBasicBlock::instr_iterator I = TailBB->instr_begin();
+ while (I != TailBB->instr_end()) {
MachineInstr *MI = &*I;
++I;
if (MI->isPHI()) {
@@ -824,7 +826,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
SmallVector<MachineOperand, 4> PriorCond;
// This has to check PrevBB->succ_size() because EH edges are ignored by
// AnalyzeBranch.
- if (PrevBB->succ_size() == 1 &&
+ if (PrevBB->succ_size() == 1 &&
!TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) &&
PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 &&
!TailBB->hasAddressTaken()) {
@@ -849,6 +851,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
// Replace def of virtual registers with new registers, and update
// uses with PHI source register or the new registers.
MachineInstr *MI = &*I++;
+ assert(!MI->isBundle() && "Not expecting bundles before regalloc!");
DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi);
MI->eraseFromParent();
}
diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp
new file mode 100644
index 000000000000..cadb87815dbe
--- /dev/null
+++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -0,0 +1,45 @@
+//===----- TargetFrameLoweringImpl.cpp - Implement target frame interface --==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the layout of a stack frame on the target machine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include <cstdlib>
+using namespace llvm;
+
+TargetFrameLowering::~TargetFrameLowering() {
+}
+
+/// getFrameIndexOffset - Returns the displacement from the frame register to
+/// the stack frame of the specified index. This is the default implementation
+/// which is overridden for some targets.
+int TargetFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return MFI->getObjectOffset(FI) + MFI->getStackSize() -
+ getOffsetOfLocalArea() + MFI->getOffsetAdjustment();
+}
+
+int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF,
+ int FI, unsigned &FrameReg) const {
+ const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo();
+
+ // By default, assume all frame indices are referenced via whatever
+ // getFrameRegister() says. The target can override this if it's doing
+ // something different.
+ FrameReg = RI->getFrameRegister(MF);
+ return getFrameIndexOffset(MF, FI);
+}
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index f32678f12b0a..2beb9281e35b 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -77,6 +78,9 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0;
unsigned Reg1 = MI->getOperand(Idx1).getReg();
unsigned Reg2 = MI->getOperand(Idx2).getReg();
+ unsigned SubReg0 = HasDef ? MI->getOperand(0).getSubReg() : 0;
+ unsigned SubReg1 = MI->getOperand(Idx1).getSubReg();
+ unsigned SubReg2 = MI->getOperand(Idx2).getSubReg();
bool Reg1IsKill = MI->getOperand(Idx1).isKill();
bool Reg2IsKill = MI->getOperand(Idx2).isKill();
// If destination is tied to either of the commuted source register, then
@@ -85,10 +89,12 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
MI->getDesc().getOperandConstraint(Idx1, MCOI::TIED_TO) == 0) {
Reg2IsKill = false;
Reg0 = Reg2;
+ SubReg0 = SubReg2;
} else if (HasDef && Reg0 == Reg2 &&
MI->getDesc().getOperandConstraint(Idx2, MCOI::TIED_TO) == 0) {
Reg1IsKill = false;
Reg0 = Reg1;
+ SubReg0 = SubReg1;
}
if (NewMI) {
@@ -97,19 +103,23 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
MachineFunction &MF = *MI->getParent()->getParent();
if (HasDef)
return BuildMI(MF, MI->getDebugLoc(), MI->getDesc())
- .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
- .addReg(Reg2, getKillRegState(Reg2IsKill))
- .addReg(Reg1, getKillRegState(Reg2IsKill));
+ .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead), SubReg0)
+ .addReg(Reg2, getKillRegState(Reg2IsKill), SubReg2)
+ .addReg(Reg1, getKillRegState(Reg1IsKill), SubReg1);
else
return BuildMI(MF, MI->getDebugLoc(), MI->getDesc())
- .addReg(Reg2, getKillRegState(Reg2IsKill))
- .addReg(Reg1, getKillRegState(Reg2IsKill));
+ .addReg(Reg2, getKillRegState(Reg2IsKill), SubReg2)
+ .addReg(Reg1, getKillRegState(Reg1IsKill), SubReg1);
}
- if (HasDef)
+ if (HasDef) {
MI->getOperand(0).setReg(Reg0);
+ MI->getOperand(0).setSubReg(SubReg0);
+ }
MI->getOperand(Idx2).setReg(Reg1);
MI->getOperand(Idx1).setReg(Reg2);
+ MI->getOperand(Idx2).setSubReg(SubReg1);
+ MI->getOperand(Idx1).setSubReg(SubReg2);
MI->getOperand(Idx2).setIsKill(Reg1IsKill);
MI->getOperand(Idx1).setIsKill(Reg2IsKill);
return MI;
@@ -121,6 +131,9 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI,
unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const {
+ assert(!MI->isBundle() &&
+ "TargetInstrInfoImpl::findCommutedOpIndices() can't handle bundles");
+
const MCInstrDesc &MCID = MI->getDesc();
if (!MCID.isCommutable())
return false;
@@ -136,11 +149,28 @@ bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI,
}
+bool
+TargetInstrInfoImpl::isUnpredicatedTerminator(const MachineInstr *MI) const {
+ if (!MI->isTerminator()) return false;
+
+ // Conditional branch is a special case.
+ if (MI->isBranch() && !MI->isBarrier())
+ return true;
+ if (!MI->isPredicable())
+ return true;
+ return !isPredicated(MI);
+}
+
+
bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI,
const SmallVectorImpl<MachineOperand> &Pred) const {
bool MadeChange = false;
+
+ assert(!MI->isBundle() &&
+ "TargetInstrInfoImpl::PredicateInstruction() can't handle bundles");
+
const MCInstrDesc &MCID = MI->getDesc();
- if (!MCID.isPredicable())
+ if (!MI->isPredicable())
return false;
for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -218,7 +248,7 @@ TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0,
MachineInstr *TargetInstrInfoImpl::duplicate(MachineInstr *Orig,
MachineFunction &MF) const {
- assert(!Orig->getDesc().isNotDuplicable() &&
+ assert(!Orig->isNotDuplicable() &&
"Instruction cannot be duplicated");
return MF.CloneMachineInstr(Orig);
}
@@ -288,16 +318,15 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
if (MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FI)) {
// Add a memory operand, foldMemoryOperandImpl doesn't do that.
assert((!(Flags & MachineMemOperand::MOStore) ||
- NewMI->getDesc().mayStore()) &&
+ NewMI->mayStore()) &&
"Folded a def to a non-store!");
assert((!(Flags & MachineMemOperand::MOLoad) ||
- NewMI->getDesc().mayLoad()) &&
+ NewMI->mayLoad()) &&
"Folded a use to a non-load!");
const MachineFrameInfo &MFI = *MF.getFrameInfo();
assert(MFI.getObjectOffset(FI) != -1);
MachineMemOperand *MMO =
- MF.getMachineMemOperand(
- MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
Flags, MFI.getObjectSize(FI),
MFI.getObjectAlignment(FI));
NewMI->addMemOperand(MF, MMO);
@@ -332,7 +361,7 @@ MachineInstr*
TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
const SmallVectorImpl<unsigned> &Ops,
MachineInstr* LoadMI) const {
- assert(LoadMI->getDesc().canFoldAsLoad() && "LoadMI isn't foldable!");
+ assert(LoadMI->canFoldAsLoad() && "LoadMI isn't foldable!");
#ifndef NDEBUG
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
assert(MI->getOperand(Ops[i]).isUse() && "Folding load into def!");
@@ -360,7 +389,6 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
const MachineRegisterInfo &MRI = MF.getRegInfo();
const TargetMachine &TM = MF.getTarget();
const TargetInstrInfo &TII = *TM.getInstrInfo();
- const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
// Remat clients assume operand 0 is the defined register.
if (!MI->getNumOperands() || !MI->getOperand(0).isReg())
@@ -383,10 +411,8 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx))
return true;
- const MCInstrDesc &MCID = MI->getDesc();
-
// Avoid instructions obviously unsafe for remat.
- if (MCID.isNotDuplicable() || MCID.mayStore() ||
+ if (MI->isNotDuplicable() || MI->mayStore() ||
MI->hasUnmodeledSideEffects())
return false;
@@ -396,7 +422,7 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
return false;
// Avoid instructions which load from potentially varying memory.
- if (MCID.mayLoad() && !MI->isInvariantLoad(AA))
+ if (MI->mayLoad() && !MI->isInvariantLoad(AA))
return false;
// If any of the registers accessed are non-constant, conservatively assume
@@ -414,19 +440,8 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
// If the physreg has no defs anywhere, it's just an ambient register
// and we can freely move its uses. Alternatively, if it's allocatable,
// it could get allocated to something with a def during allocation.
- if (!MRI.def_empty(Reg))
- return false;
- BitVector AllocatableRegs = TRI.getAllocatableSet(MF, 0);
- if (AllocatableRegs.test(Reg))
+ if (!MRI.isConstantPhysReg(Reg, MF))
return false;
- // Check for a def among the register's aliases too.
- for (const unsigned *Alias = TRI.getAliasSet(Reg); *Alias; ++Alias) {
- unsigned AliasReg = *Alias;
- if (!MRI.def_empty(AliasReg))
- return false;
- if (AllocatableRegs.test(AliasReg))
- return false;
- }
} else {
// A physreg def. We can't remat it.
return false;
@@ -457,7 +472,7 @@ bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI,
const MachineBasicBlock *MBB,
const MachineFunction &MF) const{
// Terminators and labels can't be scheduled around.
- if (MI->getDesc().isTerminator() || MI->isLabel())
+ if (MI->isTerminator() || MI->isLabel())
return true;
// Don't attempt to schedule around any instruction that defines
@@ -493,3 +508,32 @@ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
return (ScheduleHazardRecognizer *)
new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched");
}
+
+int
+TargetInstrInfoImpl::getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode, unsigned UseIdx) const {
+ if (!ItinData || ItinData->isEmpty())
+ return -1;
+
+ if (!DefNode->isMachineOpcode())
+ return -1;
+
+ unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass();
+ if (!UseNode->isMachineOpcode())
+ return ItinData->getOperandCycle(DefClass, DefIdx);
+ unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass();
+ return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+}
+
+int TargetInstrInfoImpl::getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *N) const {
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ if (!N->isMachineOpcode())
+ return 1;
+
+ return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass());
+}
+
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 3848f4d4d4c4..9925185be120 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -17,6 +17,7 @@
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/GlobalVariable.h"
+#include "llvm/Module.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -53,11 +54,9 @@ TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV,
report_fatal_error("We do not support this DWARF encoding yet!");
case dwarf::DW_EH_PE_absptr:
return Mang->getSymbol(GV);
- break;
case dwarf::DW_EH_PE_pcrel: {
return getContext().GetOrCreateSymbol(StringRef("DW.ref.") +
Mang->getSymbol(GV)->getName());
- break;
}
}
}
@@ -78,14 +77,14 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
Flags,
SectionKind::getDataRel(),
0, Label->getName());
+ unsigned Size = TM.getTargetData()->getPointerSize();
Streamer.SwitchSection(Sec);
- Streamer.EmitValueToAlignment(8);
+ Streamer.EmitValueToAlignment(TM.getTargetData()->getPointerABIAlignment());
Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject);
- const MCExpr *E = MCConstantExpr::Create(8, getContext());
+ const MCExpr *E = MCConstantExpr::Create(Size, getContext());
Streamer.EmitELFSize(Label, E);
Streamer.EmitLabel(Label);
- unsigned Size = TM.getTargetData()->getPointerSize();
Streamer.EmitSymbolValue(Sym, Size);
}
@@ -189,6 +188,7 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
static const char *getSectionPrefixForGlobal(SectionKind Kind) {
if (Kind.isText()) return ".text.";
if (Kind.isReadOnly()) return ".rodata.";
+ if (Kind.isBSS()) return ".bss.";
if (Kind.isThreadData()) return ".tdata.";
if (Kind.isThreadBSS()) return ".tbss.";
@@ -217,7 +217,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
// If this global is linkonce/weak and the target handles this by emitting it
// into a 'uniqued' section name, create and return the section now.
if ((GV->isWeakForLinker() || EmitUniquedSection) &&
- !Kind.isCommon() && !Kind.isBSS()) {
+ !Kind.isCommon()) {
const char *Prefix;
Prefix = getSectionPrefixForGlobal(Kind);
@@ -342,10 +342,92 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
}
+const MCSection *
+TargetLoweringObjectFileELF::getStaticCtorSection(unsigned Priority) const {
+ // The default scheme is .ctor / .dtor, so we have to invert the priority
+ // numbering.
+ if (Priority == 65535)
+ return StaticCtorSection;
+
+ std::string Name = std::string(".ctors.") + utostr(65535 - Priority);
+ return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+}
+
+const MCSection *
+TargetLoweringObjectFileELF::getStaticDtorSection(unsigned Priority) const {
+ // The default scheme is .ctor / .dtor, so we have to invert the priority
+ // numbering.
+ if (Priority == 65535)
+ return StaticDtorSection;
+
+ std::string Name = std::string(".dtors.") + utostr(65535 - Priority);
+ return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+}
+
//===----------------------------------------------------------------------===//
// MachO
//===----------------------------------------------------------------------===//
+/// emitModuleFlags - Emit the module flags that specify the garbage collection
+/// information.
+void TargetLoweringObjectFileMachO::
+emitModuleFlags(MCStreamer &Streamer,
+ ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
+ Mangler *Mang, const TargetMachine &TM) const {
+ unsigned VersionVal = 0;
+ unsigned GCFlags = 0;
+ StringRef SectionVal;
+
+ for (ArrayRef<Module::ModuleFlagEntry>::iterator
+ i = ModuleFlags.begin(), e = ModuleFlags.end(); i != e; ++i) {
+ const Module::ModuleFlagEntry &MFE = *i;
+
+ // Ignore flags with 'Require' behavior.
+ if (MFE.Behavior == Module::Require)
+ continue;
+
+ StringRef Key = MFE.Key->getString();
+ Value *Val = MFE.Val;
+
+ if (Key == "Objective-C Image Info Version")
+ VersionVal = cast<ConstantInt>(Val)->getZExtValue();
+ else if (Key == "Objective-C Garbage Collection" ||
+ Key == "Objective-C GC Only")
+ GCFlags |= cast<ConstantInt>(Val)->getZExtValue();
+ else if (Key == "Objective-C Image Info Section")
+ SectionVal = cast<MDString>(Val)->getString();
+ }
+
+ // The section is mandatory. If we don't have it, then we don't have GC info.
+ if (SectionVal.empty()) return;
+
+ StringRef Segment, Section;
+ unsigned TAA = 0, StubSize = 0;
+ bool TAAParsed;
+ std::string ErrorCode =
+ MCSectionMachO::ParseSectionSpecifier(SectionVal, Segment, Section,
+ TAA, TAAParsed, StubSize);
+ if (!ErrorCode.empty())
+ // If invalid, report the error with report_fatal_error.
+ report_fatal_error("Invalid section specifier '" + Section + "': " +
+ ErrorCode + ".");
+
+ // Get the section.
+ const MCSectionMachO *S =
+ getContext().getMachOSection(Segment, Section, TAA, StubSize,
+ SectionKind::getDataNoRel());
+ Streamer.SwitchSection(S);
+ Streamer.EmitLabel(getContext().
+ GetOrCreateSymbol(StringRef("L_OBJC_IMAGE_INFO")));
+ Streamer.EmitIntValue(VersionVal, 4);
+ Streamer.EmitIntValue(GCFlags, 4);
+ Streamer.AddBlankLine();
+}
+
const MCSection *TargetLoweringObjectFileMachO::
getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
Mangler *Mang, const TargetMachine &TM) const {
@@ -358,11 +440,9 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
TAA, TAAParsed, StubSize);
if (!ErrorCode.empty()) {
// If invalid, report the error with report_fatal_error.
- report_fatal_error("Global variable '" + GV->getNameStr() +
- "' has an invalid section specifier '" + GV->getSection()+
- "': " + ErrorCode + ".");
- // Fall back to dropping it into the data section.
- return DataSection;
+ report_fatal_error("Global variable '" + GV->getName() +
+ "' has an invalid section specifier '" +
+ GV->getSection() + "': " + ErrorCode + ".");
}
// Get the section.
@@ -379,9 +459,9 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
// to reject it here.
if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) {
// If invalid, report the error with report_fatal_error.
- report_fatal_error("Global variable '" + GV->getNameStr() +
- "' section type or attributes does not match previous"
- " section specifier");
+ report_fatal_error("Global variable '" + GV->getName() +
+ "' section type or attributes does not match previous"
+ " section specifier");
}
return S;
@@ -536,9 +616,7 @@ getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
// Add information about the stub reference to MachOMMI so that the stub
// gets emitted by the asmprinter.
MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
- MachineModuleInfoImpl::StubValueTy &StubSym =
- GV->hasHiddenVisibility() ? MachOMMI.getHiddenGVStubEntry(SSym) :
- MachOMMI.getGVStubEntry(SSym);
+ MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym);
if (StubSym.getPointer() == 0) {
MCSymbol *Sym = Mang->getSymbol(GV);
StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
@@ -568,6 +646,11 @@ getCOFFSectionFlags(SectionKind K) {
COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA |
COFF::IMAGE_SCN_MEM_READ |
COFF::IMAGE_SCN_MEM_WRITE;
+ else if (K.isThreadLocal())
+ Flags |=
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE;
else if (K.isReadOnly())
Flags |=
COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
@@ -594,6 +677,8 @@ static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) {
return ".text$";
if (Kind.isBSS ())
return ".bss$";
+ if (Kind.isThreadLocal())
+ return ".tls$";
if (Kind.isWriteable())
return ".data$";
return ".rdata$";
@@ -603,7 +688,6 @@ static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) {
const MCSection *TargetLoweringObjectFileCOFF::
SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
Mangler *Mang, const TargetMachine &TM) const {
- assert(!Kind.isThreadLocal() && "Doesn't support TLS");
// If this global is linkonce/weak and the target handles this by emitting it
// into a 'uniqued' section name, create and return the section now.
@@ -624,6 +708,9 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
if (Kind.isText())
return getTextSection();
+ if (Kind.isThreadLocal())
+ return getTLSDataSection();
+
return getDataSection();
}
diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp
new file mode 100644
index 000000000000..0f59d0169e18
--- /dev/null
+++ b/lib/CodeGen/TargetOptionsImpl.cpp
@@ -0,0 +1,52 @@
+//===-- TargetOptionsImpl.cpp - Options that apply to all targets ----------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the methods in the TargetOptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+/// DisableFramePointerElim - This returns true if frame pointer elimination
+/// optimization should be disabled for the given machine function.
+bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const {
+ // Check to see if we should eliminate non-leaf frame pointers and then
+ // check to see if we should eliminate all frame pointers.
+ if (NoFramePointerElimNonLeaf && !NoFramePointerElim) {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return MFI->hasCalls();
+ }
+
+ return NoFramePointerElim;
+}
+
+/// LessPreciseFPMAD - This flag return true when -enable-fp-mad option
+/// is specified on the command line. When this flag is off(default), the
+/// code generator is not allowed to generate mad (multiply add) if the
+/// result is "less precise" than doing those operations individually.
+bool TargetOptions::LessPreciseFPMAD() const {
+ return UnsafeFPMath || LessPreciseFPMADOption;
+}
+
+/// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
+/// that the rounding mode of the FPU can change from its default.
+bool TargetOptions::HonorSignDependentRoundingFPMath() const {
+ return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption;
+}
+
+/// getTrapFunctionName - If this returns a non-empty string, this means isel
+/// should lower Intrinsic::trap to a call to the specified function name
+/// instead of an ISD::TRAP node.
+StringRef TargetOptions::getTrapFunctionName() const {
+ return TrapFuncName;
+}
+
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index d87937822280..c30b1333bb2a 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -36,6 +36,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
@@ -56,14 +57,18 @@ STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address");
STATISTIC(Num3AddrSunk, "Number of 3-address instructions sunk");
STATISTIC(NumReMats, "Number of instructions re-materialized");
STATISTIC(NumDeletes, "Number of dead instructions deleted");
+STATISTIC(NumReSchedUps, "Number of instructions re-scheduled up");
+STATISTIC(NumReSchedDowns, "Number of instructions re-scheduled down");
namespace {
class TwoAddressInstructionPass : public MachineFunctionPass {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
+ const InstrItineraryData *InstrItins;
MachineRegisterInfo *MRI;
LiveVariables *LV;
AliasAnalysis *AA;
+ CodeGenOpt::Level OptLevel;
// DistanceMap - Keep track the distance of a MI from the start of the
// current basic block.
@@ -120,6 +125,18 @@ namespace {
MachineBasicBlock::iterator &nmi,
MachineFunction::iterator &mbbi, unsigned Dist);
+ bool isDefTooClose(unsigned Reg, unsigned Dist,
+ MachineInstr *MI, MachineBasicBlock *MBB);
+
+ bool RescheduleMIBelowKill(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned Reg);
+ bool RescheduleKillAboveMI(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned Reg);
+
bool TryInstructionTransform(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
MachineFunction::iterator &mbbi,
@@ -152,7 +169,6 @@ namespace {
AU.addPreserved<LiveVariables>();
AU.addPreservedID(MachineLoopInfoID);
AU.addPreservedID(MachineDominatorsID);
- AU.addPreservedID(PHIEliminationID);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -225,12 +241,12 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
// appropriate location, we can try to sink the current instruction
// past it.
if (!KillMI || KillMI->getParent() != MBB || KillMI == MI ||
- KillMI->getDesc().isTerminator())
+ KillMI->isTerminator())
return false;
// If any of the definitions are used by another instruction between the
// position and the kill use, then it's not safe to sink it.
- //
+ //
// FIXME: This can be sped up if there is an easy way to query whether an
// instruction is before or after another instruction. Then we can use
// MachineRegisterInfo def / use instead.
@@ -273,7 +289,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
KillMO->setIsKill(false);
KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI);
KillMO->setIsKill(true);
-
+
if (LV)
LV->replaceKillInstruction(SavedReg, KillMI, MI);
@@ -319,7 +335,7 @@ TwoAddressInstructionPass::isProfitableToReMat(unsigned Reg,
continue; // Current use.
OtherUse = true;
// There is at least one other use in the MBB that will clobber the
- // register.
+ // register.
if (isTwoAddrUse(UseMI, Reg))
return true;
}
@@ -467,6 +483,32 @@ static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) {
return false;
}
+/// findLocalKill - Look for an instruction below MI in the MBB that kills the
+/// specified register. Returns null if there are any other Reg use between the
+/// instructions.
+static
+MachineInstr *findLocalKill(unsigned Reg, MachineBasicBlock *MBB,
+ MachineInstr *MI, MachineRegisterInfo *MRI,
+ DenseMap<MachineInstr*, unsigned> &DistanceMap) {
+ MachineInstr *KillMI = 0;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI->use_nodbg_begin(Reg),
+ UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
+ MachineInstr *UseMI = &*UI;
+ if (UseMI == MI || UseMI->getParent() != MBB)
+ continue;
+ if (DistanceMap.count(UseMI))
+ continue;
+ if (!UI.getOperand().isKill())
+ return 0;
+ if (KillMI)
+ return 0; // -O0 kill markers cannot be trusted?
+ KillMI = UseMI;
+ }
+
+ return KillMI;
+}
+
/// findOnlyInterestingUse - Given a register, if has a single in-basic block
/// use, return the use instruction if it's a copy or a two-address use.
static
@@ -528,6 +570,9 @@ bool
TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC,
MachineInstr *MI, MachineBasicBlock *MBB,
unsigned Dist) {
+ if (OptLevel == CodeGenOpt::None)
+ return false;
+
// Determine if it's profitable to commute this two address instruction. In
// general, we want no uses between this instruction and the definition of
// the two-address register.
@@ -544,7 +589,7 @@ TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC,
// %reg1029<def> = MOV8rr %reg1028
// %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead>
// insert => %reg1030<def> = MOV8rr %reg1029
- // %reg1030<def> = ADD8rr %reg1029<kill>, %reg1028<kill>, %EFLAGS<imp-def,dead>
+ // %reg1030<def> = ADD8rr %reg1029<kill>, %reg1028<kill>, %EFLAGS<imp-def,dead>
if (!MI->killsRegister(regC))
return false;
@@ -770,10 +815,9 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI,
static bool isSafeToDelete(MachineInstr *MI,
const TargetInstrInfo *TII,
SmallVector<unsigned, 4> &Kills) {
- const MCInstrDesc &MCID = MI->getDesc();
- if (MCID.mayStore() || MCID.isCall())
+ if (MI->mayStore() || MI->isCall())
return false;
- if (MCID.isTerminator() || MI->hasUnmodeledSideEffects())
+ if (MI->isTerminator() || MI->hasUnmodeledSideEffects())
return false;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -852,28 +896,316 @@ TwoAddressInstructionPass::DeleteUnusedInstr(MachineBasicBlock::iterator &mi,
return true;
}
+/// RescheduleMIBelowKill - If there is one more local instruction that reads
+/// 'Reg' and it kills 'Reg, consider moving the instruction below the kill
+/// instruction in order to eliminate the need for the copy.
+bool
+TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned Reg) {
+ MachineInstr *MI = &*mi;
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
+ if (DI == DistanceMap.end())
+ // Must be created from unfolded load. Don't waste time trying this.
+ return false;
+
+ MachineInstr *KillMI = findLocalKill(Reg, MBB, mi, MRI, DistanceMap);
+ if (!KillMI || KillMI->isCopy() || KillMI->isCopyLike())
+ // Don't mess with copies, they may be coalesced later.
+ return false;
+
+ if (KillMI->hasUnmodeledSideEffects() || KillMI->isCall() ||
+ KillMI->isBranch() || KillMI->isTerminator())
+ // Don't move pass calls, etc.
+ return false;
+
+ unsigned DstReg;
+ if (isTwoAddrUse(*KillMI, Reg, DstReg))
+ return false;
+
+ bool SeenStore = true;
+ if (!MI->isSafeToMove(TII, AA, SeenStore))
+ return false;
+
+ if (TII->getInstrLatency(InstrItins, MI) > 1)
+ // FIXME: Needs more sophisticated heuristics.
+ return false;
+
+ SmallSet<unsigned, 2> Uses;
+ SmallSet<unsigned, 2> Kills;
+ SmallSet<unsigned, 2> Defs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MO.isDef())
+ Defs.insert(MOReg);
+ else {
+ Uses.insert(MOReg);
+ if (MO.isKill() && MOReg != Reg)
+ Kills.insert(MOReg);
+ }
+ }
+
+ // Move the copies connected to MI down as well.
+ MachineBasicBlock::iterator From = MI;
+ MachineBasicBlock::iterator To = llvm::next(From);
+ while (To->isCopy() && Defs.count(To->getOperand(1).getReg())) {
+ Defs.insert(To->getOperand(0).getReg());
+ ++To;
+ }
+
+ // Check if the reschedule will not break depedencies.
+ unsigned NumVisited = 0;
+ MachineBasicBlock::iterator KillPos = KillMI;
+ ++KillPos;
+ for (MachineBasicBlock::iterator I = To; I != KillPos; ++I) {
+ MachineInstr *OtherMI = I;
+ // DBG_VALUE cannot be counted against the limit.
+ if (OtherMI->isDebugValue())
+ continue;
+ if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost.
+ return false;
+ ++NumVisited;
+ if (OtherMI->hasUnmodeledSideEffects() || OtherMI->isCall() ||
+ OtherMI->isBranch() || OtherMI->isTerminator())
+ // Don't move pass calls, etc.
+ return false;
+ for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = OtherMI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MO.isDef()) {
+ if (Uses.count(MOReg))
+ // Physical register use would be clobbered.
+ return false;
+ if (!MO.isDead() && Defs.count(MOReg))
+ // May clobber a physical register def.
+ // FIXME: This may be too conservative. It's ok if the instruction
+ // is sunken completely below the use.
+ return false;
+ } else {
+ if (Defs.count(MOReg))
+ return false;
+ if (MOReg != Reg &&
+ ((MO.isKill() && Uses.count(MOReg)) || Kills.count(MOReg)))
+ // Don't want to extend other live ranges and update kills.
+ return false;
+ }
+ }
+ }
+
+ // Move debug info as well.
+ while (From != MBB->begin() && llvm::prior(From)->isDebugValue())
+ --From;
+
+ // Copies following MI may have been moved as well.
+ nmi = To;
+ MBB->splice(KillPos, MBB, From, To);
+ DistanceMap.erase(DI);
+
+ if (LV) {
+ // Update live variables
+ LV->removeVirtualRegisterKilled(Reg, KillMI);
+ LV->addVirtualRegisterKilled(Reg, MI);
+ } else {
+ for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = KillMI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
+ continue;
+ MO.setIsKill(false);
+ }
+ MI->addRegisterKilled(Reg, 0);
+ }
+
+ return true;
+}
+
+/// isDefTooClose - Return true if the re-scheduling will put the given
+/// instruction too close to the defs of its register dependencies.
+bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist,
+ MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+ for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg),
+ DE = MRI->def_end(); DI != DE; ++DI) {
+ MachineInstr *DefMI = &*DI;
+ if (DefMI->getParent() != MBB || DefMI->isCopy() || DefMI->isCopyLike())
+ continue;
+ if (DefMI == MI)
+ return true; // MI is defining something KillMI uses
+ DenseMap<MachineInstr*, unsigned>::iterator DDI = DistanceMap.find(DefMI);
+ if (DDI == DistanceMap.end())
+ return true; // Below MI
+ unsigned DefDist = DDI->second;
+ assert(Dist > DefDist && "Visited def already?");
+ if (TII->getInstrLatency(InstrItins, DefMI) > (int)(Dist - DefDist))
+ return true;
+ }
+ return false;
+}
+
+/// RescheduleKillAboveMI - If there is one more local instruction that reads
+/// 'Reg' and it kills 'Reg, consider moving the kill instruction above the
+/// current two-address instruction in order to eliminate the need for the
+/// copy.
+bool
+TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned Reg) {
+ MachineInstr *MI = &*mi;
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
+ if (DI == DistanceMap.end())
+ // Must be created from unfolded load. Don't waste time trying this.
+ return false;
+
+ MachineInstr *KillMI = findLocalKill(Reg, MBB, mi, MRI, DistanceMap);
+ if (!KillMI || KillMI->isCopy() || KillMI->isCopyLike())
+ // Don't mess with copies, they may be coalesced later.
+ return false;
+
+ unsigned DstReg;
+ if (isTwoAddrUse(*KillMI, Reg, DstReg))
+ return false;
+
+ bool SeenStore = true;
+ if (!KillMI->isSafeToMove(TII, AA, SeenStore))
+ return false;
+
+ SmallSet<unsigned, 2> Uses;
+ SmallSet<unsigned, 2> Kills;
+ SmallSet<unsigned, 2> Defs;
+ SmallSet<unsigned, 2> LiveDefs;
+ for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = KillMI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (MO.isUse()) {
+ if (!MOReg)
+ continue;
+ if (isDefTooClose(MOReg, DI->second, MI, MBB))
+ return false;
+ Uses.insert(MOReg);
+ if (MO.isKill() && MOReg != Reg)
+ Kills.insert(MOReg);
+ } else if (TargetRegisterInfo::isPhysicalRegister(MOReg)) {
+ Defs.insert(MOReg);
+ if (!MO.isDead())
+ LiveDefs.insert(MOReg);
+ }
+ }
+
+ // Check if the reschedule will not break depedencies.
+ unsigned NumVisited = 0;
+ MachineBasicBlock::iterator KillPos = KillMI;
+ for (MachineBasicBlock::iterator I = mi; I != KillPos; ++I) {
+ MachineInstr *OtherMI = I;
+ // DBG_VALUE cannot be counted against the limit.
+ if (OtherMI->isDebugValue())
+ continue;
+ if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost.
+ return false;
+ ++NumVisited;
+ if (OtherMI->hasUnmodeledSideEffects() || OtherMI->isCall() ||
+ OtherMI->isBranch() || OtherMI->isTerminator())
+ // Don't move pass calls, etc.
+ return false;
+ SmallVector<unsigned, 2> OtherDefs;
+ for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = OtherMI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MO.isUse()) {
+ if (Defs.count(MOReg))
+ // Moving KillMI can clobber the physical register if the def has
+ // not been seen.
+ return false;
+ if (Kills.count(MOReg))
+ // Don't want to extend other live ranges and update kills.
+ return false;
+ } else {
+ OtherDefs.push_back(MOReg);
+ }
+ }
+
+ for (unsigned i = 0, e = OtherDefs.size(); i != e; ++i) {
+ unsigned MOReg = OtherDefs[i];
+ if (Uses.count(MOReg))
+ return false;
+ if (TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+ LiveDefs.count(MOReg))
+ return false;
+ // Physical register def is seen.
+ Defs.erase(MOReg);
+ }
+ }
+
+ // Move the old kill above MI, don't forget to move debug info as well.
+ MachineBasicBlock::iterator InsertPos = mi;
+ while (InsertPos != MBB->begin() && llvm::prior(InsertPos)->isDebugValue())
+ --InsertPos;
+ MachineBasicBlock::iterator From = KillMI;
+ MachineBasicBlock::iterator To = llvm::next(From);
+ while (llvm::prior(From)->isDebugValue())
+ --From;
+ MBB->splice(InsertPos, MBB, From, To);
+
+ nmi = llvm::prior(InsertPos); // Backtrack so we process the moved instr.
+ DistanceMap.erase(DI);
+
+ if (LV) {
+ // Update live variables
+ LV->removeVirtualRegisterKilled(Reg, KillMI);
+ LV->addVirtualRegisterKilled(Reg, MI);
+ } else {
+ for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = KillMI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
+ continue;
+ MO.setIsKill(false);
+ }
+ MI->addRegisterKilled(Reg, 0);
+ }
+ return true;
+}
+
/// TryInstructionTransform - For the case where an instruction has a single
/// pair of tied register operands, attempt some transformations that may
/// either eliminate the tied operands or improve the opportunities for
-/// coalescing away the register copy. Returns true if the tied operands
-/// are eliminated altogether.
+/// coalescing away the register copy. Returns true if no copy needs to be
+/// inserted to untie mi's operands (either because they were untied, or
+/// because mi was rescheduled, and will be visited again later).
bool TwoAddressInstructionPass::
TryInstructionTransform(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
MachineFunction::iterator &mbbi,
unsigned SrcIdx, unsigned DstIdx, unsigned Dist,
SmallPtrSet<MachineInstr*, 8> &Processed) {
- const MCInstrDesc &MCID = mi->getDesc();
- unsigned regA = mi->getOperand(DstIdx).getReg();
- unsigned regB = mi->getOperand(SrcIdx).getReg();
+ if (OptLevel == CodeGenOpt::None)
+ return false;
+
+ MachineInstr &MI = *mi;
+ unsigned regA = MI.getOperand(DstIdx).getReg();
+ unsigned regB = MI.getOperand(SrcIdx).getReg();
assert(TargetRegisterInfo::isVirtualRegister(regB) &&
"cannot make instruction into two-address form");
// If regA is dead and the instruction can be deleted, just delete
// it so it doesn't clobber regB.
- bool regBKilled = isKilled(*mi, regB, MRI, TII);
- if (!regBKilled && mi->getOperand(DstIdx).isDead() &&
+ bool regBKilled = isKilled(MI, regB, MRI, TII);
+ if (!regBKilled && MI.getOperand(DstIdx).isDead() &&
DeleteUnusedInstr(mi, nmi, mbbi, Dist)) {
++NumDeletes;
return true; // Done with this instruction.
@@ -885,20 +1217,20 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
unsigned regCIdx = ~0U;
bool TryCommute = false;
bool AggressiveCommute = false;
- if (MCID.isCommutable() && mi->getNumOperands() >= 3 &&
- TII->findCommutedOpIndices(mi, SrcOp1, SrcOp2)) {
+ if (MI.isCommutable() && MI.getNumOperands() >= 3 &&
+ TII->findCommutedOpIndices(&MI, SrcOp1, SrcOp2)) {
if (SrcIdx == SrcOp1)
regCIdx = SrcOp2;
else if (SrcIdx == SrcOp2)
regCIdx = SrcOp1;
if (regCIdx != ~0U) {
- regC = mi->getOperand(regCIdx).getReg();
- if (!regBKilled && isKilled(*mi, regC, MRI, TII))
+ regC = MI.getOperand(regCIdx).getReg();
+ if (!regBKilled && isKilled(MI, regC, MRI, TII))
// If C dies but B does not, swap the B and C operands.
// This makes the live ranges of A and C joinable.
TryCommute = true;
- else if (isProfitableToCommute(regB, regC, mi, mbbi, Dist)) {
+ else if (isProfitableToCommute(regB, regC, &MI, mbbi, Dist)) {
TryCommute = true;
AggressiveCommute = true;
}
@@ -913,10 +1245,17 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
return false;
}
+ // If there is one more use of regB later in the same MBB, consider
+ // re-schedule this MI below it.
+ if (RescheduleMIBelowKill(mbbi, mi, nmi, regB)) {
+ ++NumReSchedDowns;
+ return true;
+ }
+
if (TargetRegisterInfo::isVirtualRegister(regA))
ScanUses(regA, &*mbbi, Processed);
- if (MCID.isConvertibleTo3Addr()) {
+ if (MI.isConvertibleTo3Addr()) {
// This instruction is potentially convertible to a true
// three-address instruction. Check if it is profitable.
if (!regBKilled || isProfitableToConv3Addr(regA, regB)) {
@@ -928,6 +1267,13 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
}
}
+ // If there is one more use of regB later in the same MBB, consider
+ // re-schedule it before this MI if it's legal.
+ if (RescheduleKillAboveMI(mbbi, mi, nmi, regB)) {
+ ++NumReSchedUps;
+ return true;
+ }
+
// If this is an instruction with a load folded into it, try unfolding
// the load, e.g. avoid this:
// movq %rdx, %rcx
@@ -936,11 +1282,11 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
// movq (%rax), %rcx
// addq %rdx, %rcx
// because it's preferable to schedule a load than a register copy.
- if (MCID.mayLoad() && !regBKilled) {
+ if (MI.mayLoad() && !regBKilled) {
// Determine if a load can be unfolded.
unsigned LoadRegIndex;
unsigned NewOpc =
- TII->getOpcodeAfterMemoryUnfold(mi->getOpcode(),
+ TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(),
/*UnfoldLoad=*/true,
/*UnfoldStore=*/false,
&LoadRegIndex);
@@ -950,12 +1296,12 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
MachineFunction &MF = *mbbi->getParent();
// Unfold the load.
- DEBUG(dbgs() << "2addr: UNFOLDING: " << *mi);
+ DEBUG(dbgs() << "2addr: UNFOLDING: " << MI);
const TargetRegisterClass *RC =
TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI);
unsigned Reg = MRI->createVirtualRegister(RC);
SmallVector<MachineInstr *, 2> NewMIs;
- if (!TII->unfoldMemoryOperand(MF, mi, Reg,
+ if (!TII->unfoldMemoryOperand(MF, &MI, Reg,
/*UnfoldLoad=*/true,/*UnfoldStore=*/false,
NewMIs)) {
DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
@@ -986,21 +1332,21 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
// Success, or at least we made an improvement. Keep the unfolded
// instructions and discard the original.
if (LV) {
- for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = mi->getOperand(i);
- if (MO.isReg() &&
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (MO.isReg() &&
TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
if (MO.isUse()) {
if (MO.isKill()) {
if (NewMIs[0]->killsRegister(MO.getReg()))
- LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[0]);
+ LV->replaceKillInstruction(MO.getReg(), &MI, NewMIs[0]);
else {
assert(NewMIs[1]->killsRegister(MO.getReg()) &&
"Kill missing after load unfold!");
- LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[1]);
+ LV->replaceKillInstruction(MO.getReg(), &MI, NewMIs[1]);
}
}
- } else if (LV->removeVirtualRegisterDead(MO.getReg(), mi)) {
+ } else if (LV->removeVirtualRegisterDead(MO.getReg(), &MI)) {
if (NewMIs[1]->registerDefIsDead(MO.getReg()))
LV->addVirtualRegisterDead(MO.getReg(), NewMIs[1]);
else {
@@ -1013,7 +1359,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
}
LV->addVirtualRegisterKilled(Reg, NewMIs[1]);
}
- mi->eraseFromParent();
+ MI.eraseFromParent();
mi = NewMIs[1];
if (TransformSuccess)
return true;
@@ -1035,18 +1381,19 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
/// runOnMachineFunction - Reduce two-address instructions to two operands.
///
bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
- DEBUG(dbgs() << "Machine Function\n");
const TargetMachine &TM = MF.getTarget();
MRI = &MF.getRegInfo();
TII = TM.getInstrInfo();
TRI = TM.getRegisterInfo();
+ InstrItins = TM.getInstrItineraryData();
LV = getAnalysisIfAvailable<LiveVariables>();
AA = &getAnalysis<AliasAnalysis>();
+ OptLevel = TM.getOptLevel();
bool MadeChange = false;
DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n");
- DEBUG(dbgs() << "********** Function: "
+ DEBUG(dbgs() << "********** Function: "
<< MF.getFunction()->getName() << '\n');
// This pass takes the function out of SSA form.
@@ -1177,7 +1524,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
// If it's safe and profitable, remat the definition instead of
// copying it.
if (DefMI &&
- DefMI->getDesc().isAsCheapAsAMove() &&
+ DefMI->isAsCheapAsAMove() &&
DefMI->isSafeToReMat(TII, AA, regB) &&
isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){
DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n");
@@ -1248,19 +1595,19 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
MadeChange = true;
DEBUG(dbgs() << "\t\trewrite to:\t" << *mi);
- }
- // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form.
- if (mi->isInsertSubreg()) {
- // From %reg = INSERT_SUBREG %reg, %subreg, subidx
- // To %reg:subidx = COPY %subreg
- unsigned SubIdx = mi->getOperand(3).getImm();
- mi->RemoveOperand(3);
- assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx");
- mi->getOperand(0).setSubReg(SubIdx);
- mi->RemoveOperand(1);
- mi->setDesc(TII->get(TargetOpcode::COPY));
- DEBUG(dbgs() << "\t\tconvert to:\t" << *mi);
+ // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form.
+ if (mi->isInsertSubreg()) {
+ // From %reg = INSERT_SUBREG %reg, %subreg, subidx
+ // To %reg:subidx = COPY %subreg
+ unsigned SubIdx = mi->getOperand(3).getImm();
+ mi->RemoveOperand(3);
+ assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx");
+ mi->getOperand(0).setSubReg(SubIdx);
+ mi->RemoveOperand(1);
+ mi->setDesc(TII->get(TargetOpcode::COPY));
+ DEBUG(dbgs() << "\t\tconvert to:\t" << *mi);
+ }
}
// Clear TiedOperands here instead of at the top of the loop
@@ -1298,6 +1645,36 @@ static void UpdateRegSequenceSrcs(unsigned SrcReg,
}
}
+// Find the first def of Reg, assuming they are all in the same basic block.
+static MachineInstr *findFirstDef(unsigned Reg, MachineRegisterInfo *MRI) {
+ SmallPtrSet<MachineInstr*, 8> Defs;
+ MachineInstr *First = 0;
+ for (MachineRegisterInfo::def_iterator RI = MRI->def_begin(Reg);
+ MachineInstr *MI = RI.skipInstruction(); Defs.insert(MI))
+ First = MI;
+ if (!First)
+ return 0;
+
+ MachineBasicBlock *MBB = First->getParent();
+ MachineBasicBlock::iterator A = First, B = First;
+ bool Moving;
+ do {
+ Moving = false;
+ if (A != MBB->begin()) {
+ Moving = true;
+ --A;
+ if (Defs.erase(A)) First = A;
+ }
+ if (B != MBB->end()) {
+ Defs.erase(B);
+ ++B;
+ Moving = true;
+ }
+ } while (Moving && !Defs.empty());
+ assert(Defs.empty() && "Instructions outside basic block!");
+ return First;
+}
+
/// CoalesceExtSubRegs - If a number of sources of the REG_SEQUENCE are
/// EXTRACT_SUBREG from the same register and to the same virtual register
/// with different sub-register indices, attempt to combine the
@@ -1380,8 +1757,10 @@ TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs,
CanCoalesce = false;
break;
}
- // Keep track of one of the uses.
- SomeMI = UseMI;
+ // Keep track of one of the uses. Preferably the first one which has a
+ // <def,undef> flag.
+ if (!SomeMI || UseMI->getOperand(0).isUndef())
+ SomeMI = UseMI;
}
if (!CanCoalesce)
continue;
@@ -1390,7 +1769,9 @@ TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs,
MachineInstr *CopyMI = BuildMI(*SomeMI->getParent(), SomeMI,
SomeMI->getDebugLoc(),
TII->get(TargetOpcode::COPY))
- .addReg(DstReg, RegState::Define, NewDstSubIdx)
+ .addReg(DstReg, RegState::Define |
+ getUndefRegState(SomeMI->getOperand(0).isUndef()),
+ NewDstSubIdx)
.addReg(SrcReg, 0, NewSrcSubIdx);
// Remove all the old extract instructions.
@@ -1452,26 +1833,30 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
SmallSet<unsigned, 4> Seen;
for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
unsigned SrcReg = MI->getOperand(i).getReg();
+ unsigned SrcSubIdx = MI->getOperand(i).getSubReg();
unsigned SubIdx = MI->getOperand(i+1).getImm();
- if (MI->getOperand(i).getSubReg() ||
- TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
- DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI);
- llvm_unreachable(0);
+ // DefMI of NULL means the value does not have a vreg in this block
+ // i.e., its a physical register or a subreg.
+ // In either case we force a copy to be generated.
+ MachineInstr *DefMI = NULL;
+ if (!MI->getOperand(i).getSubReg() &&
+ !TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
+ DefMI = MRI->getVRegDef(SrcReg);
}
- MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
- if (DefMI->isImplicitDef()) {
+ if (DefMI && DefMI->isImplicitDef()) {
DefMI->eraseFromParent();
continue;
}
IsImpDef = false;
// Remember COPY sources. These might be candidate for coalescing.
- if (DefMI->isCopy() && DefMI->getOperand(1).getSubReg())
+ if (DefMI && DefMI->isCopy() && DefMI->getOperand(1).getSubReg())
RealSrcs.push_back(DefMI->getOperand(1).getReg());
bool isKill = MI->getOperand(i).isKill();
- if (!Seen.insert(SrcReg) || MI->getParent() != DefMI->getParent() ||
+ if (!DefMI || !Seen.insert(SrcReg) ||
+ MI->getParent() != DefMI->getParent() ||
!isKill || HasOtherRegSequenceUses(SrcReg, MI, MRI) ||
!TRI->getMatchingSuperRegClass(MRI->getRegClass(DstReg),
MRI->getRegClass(SrcReg), SubIdx)) {
@@ -1504,9 +1889,9 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
MachineInstr *CopyMI = BuildMI(*MI->getParent(), InsertLoc,
MI->getDebugLoc(), TII->get(TargetOpcode::COPY))
.addReg(DstReg, RegState::Define, SubIdx)
- .addReg(SrcReg, getKillRegState(isKill));
+ .addReg(SrcReg, getKillRegState(isKill), SrcSubIdx);
MI->getOperand(i).setReg(0);
- if (LV && isKill)
+ if (LV && isKill && !TargetRegisterInfo::isPhysicalRegister(SrcReg))
LV->replaceKillInstruction(SrcReg, MI, CopyMI);
DEBUG(dbgs() << "Inserted: " << *CopyMI);
}
@@ -1519,11 +1904,27 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
UpdateRegSequenceSrcs(SrcReg, DstReg, SubIdx, MRI, *TRI);
}
+ // Set <def,undef> flags on the first DstReg def in the basic block.
+ // It marks the beginning of the live range. All the other defs are
+ // read-modify-write.
+ if (MachineInstr *Def = findFirstDef(DstReg, MRI)) {
+ for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = Def->getOperand(i);
+ if (MO.isReg() && MO.isDef() && MO.getReg() == DstReg)
+ MO.setIsUndef();
+ }
+ // Make sure there is a full non-subreg imp-def operand on the
+ // instruction. This shouldn't be necessary, but it seems that at least
+ // RAFast requires it.
+ Def->addRegisterDefined(DstReg, TRI);
+ DEBUG(dbgs() << "First def: " << *Def);
+ }
+
if (IsImpDef) {
DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF");
MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
- MI->RemoveOperand(j);
+ MI->RemoveOperand(j);
} else {
DEBUG(dbgs() << "Eliminated: " << *MI);
MI->eraseFromParent();
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index 8a1cdc01c494..3bab93bdc098 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -16,10 +16,9 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "virtregmap"
+#define DEBUG_TYPE "regalloc"
#include "VirtRegMap.h"
#include "llvm/Function.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -32,12 +31,8 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallSet.h"
#include <algorithm>
using namespace llvm;
@@ -58,34 +53,11 @@ bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
TRI = mf.getTarget().getRegisterInfo();
MF = &mf;
- ReMatId = MAX_STACK_SLOT+1;
- LowSpillSlot = HighSpillSlot = NO_STACK_SLOT;
-
Virt2PhysMap.clear();
Virt2StackSlotMap.clear();
- Virt2ReMatIdMap.clear();
Virt2SplitMap.clear();
- Virt2SplitKillMap.clear();
- ReMatMap.clear();
- ImplicitDefed.clear();
- SpillSlotToUsesMap.clear();
- MI2VirtMap.clear();
- SpillPt2VirtMap.clear();
- RestorePt2VirtMap.clear();
- EmergencySpillMap.clear();
- EmergencySpillSlots.clear();
-
- SpillSlotToUsesMap.resize(8);
- ImplicitDefed.resize(MF->getRegInfo().getNumVirtRegs());
-
- allocatableRCRegs.clear();
- for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
- E = TRI->regclass_end(); I != E; ++I)
- allocatableRCRegs.insert(std::make_pair(*I,
- TRI->getAllocatableSet(mf, *I)));
grow();
-
return false;
}
@@ -93,24 +65,12 @@ void VirtRegMap::grow() {
unsigned NumRegs = MF->getRegInfo().getNumVirtRegs();
Virt2PhysMap.resize(NumRegs);
Virt2StackSlotMap.resize(NumRegs);
- Virt2ReMatIdMap.resize(NumRegs);
Virt2SplitMap.resize(NumRegs);
- Virt2SplitKillMap.resize(NumRegs);
- ReMatMap.resize(NumRegs);
- ImplicitDefed.resize(NumRegs);
}
unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) {
int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
RC->getAlignment());
- if (LowSpillSlot == NO_STACK_SLOT)
- LowSpillSlot = SS;
- if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot)
- HighSpillSlot = SS;
- assert(SS >= LowSpillSlot && "Unexpected low spill slot");
- unsigned Idx = SS-LowSpillSlot;
- while (Idx >= SpillSlotToUsesMap.size())
- SpillSlotToUsesMap.resize(SpillSlotToUsesMap.size()*2);
++NumSpillSlots;
return SS;
}
@@ -144,118 +104,6 @@ void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) {
Virt2StackSlotMap[virtReg] = SS;
}
-int VirtRegMap::assignVirtReMatId(unsigned virtReg) {
- assert(TargetRegisterInfo::isVirtualRegister(virtReg));
- assert(Virt2ReMatIdMap[virtReg] == NO_STACK_SLOT &&
- "attempt to assign re-mat id to already spilled register");
- Virt2ReMatIdMap[virtReg] = ReMatId;
- return ReMatId++;
-}
-
-void VirtRegMap::assignVirtReMatId(unsigned virtReg, int id) {
- assert(TargetRegisterInfo::isVirtualRegister(virtReg));
- assert(Virt2ReMatIdMap[virtReg] == NO_STACK_SLOT &&
- "attempt to assign re-mat id to already spilled register");
- Virt2ReMatIdMap[virtReg] = id;
-}
-
-int VirtRegMap::getEmergencySpillSlot(const TargetRegisterClass *RC) {
- std::map<const TargetRegisterClass*, int>::iterator I =
- EmergencySpillSlots.find(RC);
- if (I != EmergencySpillSlots.end())
- return I->second;
- return EmergencySpillSlots[RC] = createSpillSlot(RC);
-}
-
-void VirtRegMap::addSpillSlotUse(int FI, MachineInstr *MI) {
- if (!MF->getFrameInfo()->isFixedObjectIndex(FI)) {
- // If FI < LowSpillSlot, this stack reference was produced by
- // instruction selection and is not a spill
- if (FI >= LowSpillSlot) {
- assert(FI >= 0 && "Spill slot index should not be negative!");
- assert((unsigned)FI-LowSpillSlot < SpillSlotToUsesMap.size()
- && "Invalid spill slot");
- SpillSlotToUsesMap[FI-LowSpillSlot].insert(MI);
- }
- }
-}
-
-void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *OldMI,
- MachineInstr *NewMI, ModRef MRInfo) {
- // Move previous memory references folded to new instruction.
- MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(NewMI);
- for (MI2VirtMapTy::iterator I = MI2VirtMap.lower_bound(OldMI),
- E = MI2VirtMap.end(); I != E && I->first == OldMI; ) {
- MI2VirtMap.insert(IP, std::make_pair(NewMI, I->second));
- MI2VirtMap.erase(I++);
- }
-
- // add new memory reference
- MI2VirtMap.insert(IP, std::make_pair(NewMI, std::make_pair(VirtReg, MRInfo)));
-}
-
-void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *MI, ModRef MRInfo) {
- MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(MI);
- MI2VirtMap.insert(IP, std::make_pair(MI, std::make_pair(VirtReg, MRInfo)));
-}
-
-void VirtRegMap::RemoveMachineInstrFromMaps(MachineInstr *MI) {
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (!MO.isFI())
- continue;
- int FI = MO.getIndex();
- if (MF->getFrameInfo()->isFixedObjectIndex(FI))
- continue;
- // This stack reference was produced by instruction selection and
- // is not a spill
- if (FI < LowSpillSlot)
- continue;
- assert((unsigned)FI-LowSpillSlot < SpillSlotToUsesMap.size()
- && "Invalid spill slot");
- SpillSlotToUsesMap[FI-LowSpillSlot].erase(MI);
- }
- MI2VirtMap.erase(MI);
- SpillPt2VirtMap.erase(MI);
- RestorePt2VirtMap.erase(MI);
- EmergencySpillMap.erase(MI);
-}
-
-/// FindUnusedRegisters - Gather a list of allocatable registers that
-/// have not been allocated to any virtual register.
-bool VirtRegMap::FindUnusedRegisters(LiveIntervals* LIs) {
- unsigned NumRegs = TRI->getNumRegs();
- UnusedRegs.reset();
- UnusedRegs.resize(NumRegs);
-
- BitVector Used(NumRegs);
- for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
- if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG)
- Used.set(Virt2PhysMap[Reg]);
- }
-
- BitVector Allocatable = TRI->getAllocatableSet(*MF);
- bool AnyUnused = false;
- for (unsigned Reg = 1; Reg < NumRegs; ++Reg) {
- if (Allocatable[Reg] && !Used[Reg] && !LIs->hasInterval(Reg)) {
- bool ReallyUnused = true;
- for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
- if (Used[*AS] || LIs->hasInterval(*AS)) {
- ReallyUnused = false;
- break;
- }
- }
- if (ReallyUnused) {
- AnyUnused = true;
- UnusedRegs.set(Reg);
- }
- }
- }
-
- return AnyUnused;
-}
-
void VirtRegMap::rewrite(SlotIndexes *Indexes) {
DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n"
<< "********** Function: "
@@ -264,23 +112,32 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) {
SmallVector<unsigned, 8> SuperDeads;
SmallVector<unsigned, 8> SuperDefs;
SmallVector<unsigned, 8> SuperKills;
+#ifndef NDEBUG
+ BitVector Reserved = TRI->getReservedRegs(*MF);
+#endif
for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
MBBI != MBBE; ++MBBI) {
DEBUG(MBBI->print(dbgs(), Indexes));
- for (MachineBasicBlock::iterator MII = MBBI->begin(), MIE = MBBI->end();
- MII != MIE;) {
+ for (MachineBasicBlock::instr_iterator
+ MII = MBBI->instr_begin(), MIE = MBBI->instr_end(); MII != MIE;) {
MachineInstr *MI = MII;
++MII;
for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
MOE = MI->operands_end(); MOI != MOE; ++MOI) {
MachineOperand &MO = *MOI;
+
+ // Make sure MRI knows about registers clobbered by regmasks.
+ if (MO.isRegMask())
+ MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
+
if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
continue;
unsigned VirtReg = MO.getReg();
unsigned PhysReg = getPhys(VirtReg);
assert(PhysReg != NO_PHYS_REG && "Instruction uses unmapped VirtReg");
+ assert(!Reserved.test(PhysReg) && "Reserved register assignment");
// Preserve semantics of sub-register operands.
if (MO.getSubReg()) {
@@ -332,7 +189,6 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) {
++NumIdCopies;
if (MI->getNumOperands() == 2) {
DEBUG(dbgs() << "Deleting identity copy.\n");
- RemoveMachineInstrFromMaps(MI);
if (Indexes)
Indexes->removeMachineInstrFromMaps(MI);
// It's safe to erase MI because MII has already been incremented.
diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h
index 03abff356934..8cac31137e3d 100644
--- a/lib/CodeGen/VirtRegMap.h
+++ b/lib/CodeGen/VirtRegMap.h
@@ -18,22 +18,14 @@
#define LLVM_CODEGEN_VIRTREGMAP_H
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IndexedMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include <map>
namespace llvm {
- class LiveIntervals;
class MachineInstr;
class MachineFunction;
class MachineRegisterInfo;
class TargetInstrInfo;
- class TargetRegisterInfo;
class raw_ostream;
class SlotIndexes;
@@ -45,18 +37,12 @@ namespace llvm {
MAX_STACK_SLOT = (1L << 18)-1
};
- enum ModRef { isRef = 1, isMod = 2, isModRef = 3 };
- typedef std::multimap<MachineInstr*,
- std::pair<unsigned, ModRef> > MI2VirtMapTy;
-
private:
MachineRegisterInfo *MRI;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
MachineFunction *MF;
- DenseMap<const TargetRegisterClass*, BitVector> allocatableRCRegs;
-
/// Virt2PhysMap - This is a virtual to physical register
/// mapping. Each virtual register is required to have an entry in
/// it; even spilled virtual registers (the register mapped to a
@@ -70,71 +56,10 @@ namespace llvm {
/// at.
IndexedMap<int, VirtReg2IndexFunctor> Virt2StackSlotMap;
- /// Virt2ReMatIdMap - This is virtual register to rematerialization id
- /// mapping. Each spilled virtual register that should be remat'd has an
- /// entry in it which corresponds to the remat id.
- IndexedMap<int, VirtReg2IndexFunctor> Virt2ReMatIdMap;
-
/// Virt2SplitMap - This is virtual register to splitted virtual register
/// mapping.
IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2SplitMap;
- /// Virt2SplitKillMap - This is splitted virtual register to its last use
- /// (kill) index mapping.
- IndexedMap<SlotIndex, VirtReg2IndexFunctor> Virt2SplitKillMap;
-
- /// ReMatMap - This is virtual register to re-materialized instruction
- /// mapping. Each virtual register whose definition is going to be
- /// re-materialized has an entry in it.
- IndexedMap<MachineInstr*, VirtReg2IndexFunctor> ReMatMap;
-
- /// MI2VirtMap - This is MachineInstr to virtual register
- /// mapping. In the case of memory spill code being folded into
- /// instructions, we need to know which virtual register was
- /// read/written by this instruction.
- MI2VirtMapTy MI2VirtMap;
-
- /// SpillPt2VirtMap - This records the virtual registers which should
- /// be spilled right after the MachineInstr due to live interval
- /// splitting.
- std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >
- SpillPt2VirtMap;
-
- /// RestorePt2VirtMap - This records the virtual registers which should
- /// be restored right before the MachineInstr due to live interval
- /// splitting.
- std::map<MachineInstr*, std::vector<unsigned> > RestorePt2VirtMap;
-
- /// EmergencySpillMap - This records the physical registers that should
- /// be spilled / restored around the MachineInstr since the register
- /// allocator has run out of registers.
- std::map<MachineInstr*, std::vector<unsigned> > EmergencySpillMap;
-
- /// EmergencySpillSlots - This records emergency spill slots used to
- /// spill physical registers when the register allocator runs out of
- /// registers. Ideally only one stack slot is used per function per
- /// register class.
- std::map<const TargetRegisterClass*, int> EmergencySpillSlots;
-
- /// ReMatId - Instead of assigning a stack slot to a to be rematerialized
- /// virtual register, an unique id is being assigned. This keeps track of
- /// the highest id used so far. Note, this starts at (1<<18) to avoid
- /// conflicts with stack slot numbers.
- int ReMatId;
-
- /// LowSpillSlot, HighSpillSlot - Lowest and highest spill slot indexes.
- int LowSpillSlot, HighSpillSlot;
-
- /// SpillSlotToUsesMap - Records uses for each register spill slot.
- SmallVector<SmallPtrSet<MachineInstr*, 4>, 8> SpillSlotToUsesMap;
-
- /// ImplicitDefed - One bit for each virtual register. If set it indicates
- /// the register is implicitly defined.
- BitVector ImplicitDefed;
-
- /// UnusedRegs - A list of physical registers that have not been used.
- BitVector UnusedRegs;
-
/// createSpillSlot - Allocate a spill slot for RC from MFI.
unsigned createSpillSlot(const TargetRegisterClass *RC);
@@ -144,11 +69,7 @@ namespace llvm {
public:
static char ID;
VirtRegMap() : MachineFunctionPass(ID), Virt2PhysMap(NO_PHYS_REG),
- Virt2StackSlotMap(NO_STACK_SLOT),
- Virt2ReMatIdMap(NO_STACK_SLOT), Virt2SplitMap(0),
- Virt2SplitKillMap(SlotIndex()), ReMatMap(NULL),
- ReMatId(MAX_STACK_SLOT+1),
- LowSpillSlot(NO_STACK_SLOT), HighSpillSlot(NO_STACK_SLOT) { }
+ Virt2StackSlotMap(NO_STACK_SLOT), Virt2SplitMap(0) { }
virtual bool runOnMachineFunction(MachineFunction &MF);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -235,8 +156,7 @@ namespace llvm {
/// @brief returns true if the specified virtual register is not
/// mapped to a stack slot or rematerialized.
bool isAssignedReg(unsigned virtReg) const {
- if (getStackSlot(virtReg) == NO_STACK_SLOT &&
- getReMatId(virtReg) == NO_STACK_SLOT)
+ if (getStackSlot(virtReg) == NO_STACK_SLOT)
return true;
// Split register can be assigned a physical register as well as a
// stack slot or remat id.
@@ -250,13 +170,6 @@ namespace llvm {
return Virt2StackSlotMap[virtReg];
}
- /// @brief returns the rematerialization id mapped to the specified virtual
- /// register
- int getReMatId(unsigned virtReg) const {
- assert(TargetRegisterInfo::isVirtualRegister(virtReg));
- return Virt2ReMatIdMap[virtReg];
- }
-
/// @brief create a mapping for the specifed virtual register to
/// the next available stack slot
int assignVirt2StackSlot(unsigned virtReg);
@@ -264,250 +177,6 @@ namespace llvm {
/// the specified stack slot
void assignVirt2StackSlot(unsigned virtReg, int frameIndex);
- /// @brief assign an unique re-materialization id to the specified
- /// virtual register.
- int assignVirtReMatId(unsigned virtReg);
- /// @brief assign an unique re-materialization id to the specified
- /// virtual register.
- void assignVirtReMatId(unsigned virtReg, int id);
-
- /// @brief returns true if the specified virtual register is being
- /// re-materialized.
- bool isReMaterialized(unsigned virtReg) const {
- return ReMatMap[virtReg] != NULL;
- }
-
- /// @brief returns the original machine instruction being re-issued
- /// to re-materialize the specified virtual register.
- MachineInstr *getReMaterializedMI(unsigned virtReg) const {
- return ReMatMap[virtReg];
- }
-
- /// @brief records the specified virtual register will be
- /// re-materialized and the original instruction which will be re-issed
- /// for this purpose. If parameter all is true, then all uses of the
- /// registers are rematerialized and it's safe to delete the definition.
- void setVirtIsReMaterialized(unsigned virtReg, MachineInstr *def) {
- ReMatMap[virtReg] = def;
- }
-
- /// @brief record the last use (kill) of a split virtual register.
- void addKillPoint(unsigned virtReg, SlotIndex index) {
- Virt2SplitKillMap[virtReg] = index;
- }
-
- SlotIndex getKillPoint(unsigned virtReg) const {
- return Virt2SplitKillMap[virtReg];
- }
-
- /// @brief remove the last use (kill) of a split virtual register.
- void removeKillPoint(unsigned virtReg) {
- Virt2SplitKillMap[virtReg] = SlotIndex();
- }
-
- /// @brief returns true if the specified MachineInstr is a spill point.
- bool isSpillPt(MachineInstr *Pt) const {
- return SpillPt2VirtMap.find(Pt) != SpillPt2VirtMap.end();
- }
-
- /// @brief returns the virtual registers that should be spilled due to
- /// splitting right after the specified MachineInstr.
- std::vector<std::pair<unsigned,bool> > &getSpillPtSpills(MachineInstr *Pt) {
- return SpillPt2VirtMap[Pt];
- }
-
- /// @brief records the specified MachineInstr as a spill point for virtReg.
- void addSpillPoint(unsigned virtReg, bool isKill, MachineInstr *Pt) {
- std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >::iterator
- I = SpillPt2VirtMap.find(Pt);
- if (I != SpillPt2VirtMap.end())
- I->second.push_back(std::make_pair(virtReg, isKill));
- else {
- std::vector<std::pair<unsigned,bool> > Virts;
- Virts.push_back(std::make_pair(virtReg, isKill));
- SpillPt2VirtMap.insert(std::make_pair(Pt, Virts));
- }
- }
-
- /// @brief - transfer spill point information from one instruction to
- /// another.
- void transferSpillPts(MachineInstr *Old, MachineInstr *New) {
- std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >::iterator
- I = SpillPt2VirtMap.find(Old);
- if (I == SpillPt2VirtMap.end())
- return;
- while (!I->second.empty()) {
- unsigned virtReg = I->second.back().first;
- bool isKill = I->second.back().second;
- I->second.pop_back();
- addSpillPoint(virtReg, isKill, New);
- }
- SpillPt2VirtMap.erase(I);
- }
-
- /// @brief returns true if the specified MachineInstr is a restore point.
- bool isRestorePt(MachineInstr *Pt) const {
- return RestorePt2VirtMap.find(Pt) != RestorePt2VirtMap.end();
- }
-
- /// @brief returns the virtual registers that should be restoreed due to
- /// splitting right after the specified MachineInstr.
- std::vector<unsigned> &getRestorePtRestores(MachineInstr *Pt) {
- return RestorePt2VirtMap[Pt];
- }
-
- /// @brief records the specified MachineInstr as a restore point for virtReg.
- void addRestorePoint(unsigned virtReg, MachineInstr *Pt) {
- std::map<MachineInstr*, std::vector<unsigned> >::iterator I =
- RestorePt2VirtMap.find(Pt);
- if (I != RestorePt2VirtMap.end())
- I->second.push_back(virtReg);
- else {
- std::vector<unsigned> Virts;
- Virts.push_back(virtReg);
- RestorePt2VirtMap.insert(std::make_pair(Pt, Virts));
- }
- }
-
- /// @brief - transfer restore point information from one instruction to
- /// another.
- void transferRestorePts(MachineInstr *Old, MachineInstr *New) {
- std::map<MachineInstr*, std::vector<unsigned> >::iterator I =
- RestorePt2VirtMap.find(Old);
- if (I == RestorePt2VirtMap.end())
- return;
- while (!I->second.empty()) {
- unsigned virtReg = I->second.back();
- I->second.pop_back();
- addRestorePoint(virtReg, New);
- }
- RestorePt2VirtMap.erase(I);
- }
-
- /// @brief records that the specified physical register must be spilled
- /// around the specified machine instr.
- void addEmergencySpill(unsigned PhysReg, MachineInstr *MI) {
- if (EmergencySpillMap.find(MI) != EmergencySpillMap.end())
- EmergencySpillMap[MI].push_back(PhysReg);
- else {
- std::vector<unsigned> PhysRegs;
- PhysRegs.push_back(PhysReg);
- EmergencySpillMap.insert(std::make_pair(MI, PhysRegs));
- }
- }
-
- /// @brief returns true if one or more physical registers must be spilled
- /// around the specified instruction.
- bool hasEmergencySpills(MachineInstr *MI) const {
- return EmergencySpillMap.find(MI) != EmergencySpillMap.end();
- }
-
- /// @brief returns the physical registers to be spilled and restored around
- /// the instruction.
- std::vector<unsigned> &getEmergencySpills(MachineInstr *MI) {
- return EmergencySpillMap[MI];
- }
-
- /// @brief - transfer emergency spill information from one instruction to
- /// another.
- void transferEmergencySpills(MachineInstr *Old, MachineInstr *New) {
- std::map<MachineInstr*,std::vector<unsigned> >::iterator I =
- EmergencySpillMap.find(Old);
- if (I == EmergencySpillMap.end())
- return;
- while (!I->second.empty()) {
- unsigned virtReg = I->second.back();
- I->second.pop_back();
- addEmergencySpill(virtReg, New);
- }
- EmergencySpillMap.erase(I);
- }
-
- /// @brief return or get a emergency spill slot for the register class.
- int getEmergencySpillSlot(const TargetRegisterClass *RC);
-
- /// @brief Return lowest spill slot index.
- int getLowSpillSlot() const {
- return LowSpillSlot;
- }
-
- /// @brief Return highest spill slot index.
- int getHighSpillSlot() const {
- return HighSpillSlot;
- }
-
- /// @brief Records a spill slot use.
- void addSpillSlotUse(int FrameIndex, MachineInstr *MI);
-
- /// @brief Returns true if spill slot has been used.
- bool isSpillSlotUsed(int FrameIndex) const {
- assert(FrameIndex >= 0 && "Spill slot index should not be negative!");
- return !SpillSlotToUsesMap[FrameIndex-LowSpillSlot].empty();
- }
-
- /// @brief Mark the specified register as being implicitly defined.
- void setIsImplicitlyDefined(unsigned VirtReg) {
- ImplicitDefed.set(TargetRegisterInfo::virtReg2Index(VirtReg));
- }
-
- /// @brief Returns true if the virtual register is implicitly defined.
- bool isImplicitlyDefined(unsigned VirtReg) const {
- return ImplicitDefed[TargetRegisterInfo::virtReg2Index(VirtReg)];
- }
-
- /// @brief Updates information about the specified virtual register's value
- /// folded into newMI machine instruction.
- void virtFolded(unsigned VirtReg, MachineInstr *OldMI, MachineInstr *NewMI,
- ModRef MRInfo);
-
- /// @brief Updates information about the specified virtual register's value
- /// folded into the specified machine instruction.
- void virtFolded(unsigned VirtReg, MachineInstr *MI, ModRef MRInfo);
-
- /// @brief returns the virtual registers' values folded in memory
- /// operands of this instruction
- std::pair<MI2VirtMapTy::const_iterator, MI2VirtMapTy::const_iterator>
- getFoldedVirts(MachineInstr* MI) const {
- return MI2VirtMap.equal_range(MI);
- }
-
- /// RemoveMachineInstrFromMaps - MI is being erased, remove it from the
- /// the folded instruction map and spill point map.
- void RemoveMachineInstrFromMaps(MachineInstr *MI);
-
- /// FindUnusedRegisters - Gather a list of allocatable registers that
- /// have not been allocated to any virtual register.
- bool FindUnusedRegisters(LiveIntervals* LIs);
-
- /// HasUnusedRegisters - Return true if there are any allocatable registers
- /// that have not been allocated to any virtual register.
- bool HasUnusedRegisters() const {
- return !UnusedRegs.none();
- }
-
- /// setRegisterUsed - Remember the physical register is now used.
- void setRegisterUsed(unsigned Reg) {
- UnusedRegs.reset(Reg);
- }
-
- /// isRegisterUnused - Return true if the physical register has not been
- /// used.
- bool isRegisterUnused(unsigned Reg) const {
- return UnusedRegs[Reg];
- }
-
- /// getFirstUnusedRegister - Return the first physical register that has not
- /// been used.
- unsigned getFirstUnusedRegister(const TargetRegisterClass *RC) {
- int Reg = UnusedRegs.find_first();
- while (Reg != -1) {
- if (allocatableRCRegs[RC][Reg])
- return (unsigned)Reg;
- Reg = UnusedRegs.find_next(Reg);
- }
- return 0;
- }
-
/// rewrite - Rewrite all instructions in MF to use only physical registers
/// by mapping all virtual register operands to their assigned physical
/// registers.
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
deleted file mode 100644
index a5ec797b27db..000000000000
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ /dev/null
@@ -1,2633 +0,0 @@
-//===-- llvm/CodeGen/Rewriter.cpp - Rewriter -----------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "virtregrewriter"
-#include "VirtRegRewriter.h"
-#include "VirtRegMap.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-using namespace llvm;
-
-STATISTIC(NumDSE , "Number of dead stores elided");
-STATISTIC(NumDSS , "Number of dead spill slots removed");
-STATISTIC(NumCommutes, "Number of instructions commuted");
-STATISTIC(NumDRM , "Number of re-materializable defs elided");
-STATISTIC(NumStores , "Number of stores added");
-STATISTIC(NumPSpills , "Number of physical register spills");
-STATISTIC(NumOmitted , "Number of reloads omitted");
-STATISTIC(NumAvoided , "Number of reloads deemed unnecessary");
-STATISTIC(NumCopified, "Number of available reloads turned into copies");
-STATISTIC(NumReMats , "Number of re-materialization");
-STATISTIC(NumLoads , "Number of loads added");
-STATISTIC(NumReused , "Number of values reused");
-STATISTIC(NumDCE , "Number of copies elided");
-STATISTIC(NumSUnfold , "Number of stores unfolded");
-STATISTIC(NumModRefUnfold, "Number of modref unfolded");
-
-namespace {
- enum RewriterName { local, trivial };
-}
-
-static cl::opt<RewriterName>
-RewriterOpt("rewriter",
- cl::desc("Rewriter to use (default=local)"),
- cl::Prefix,
- cl::values(clEnumVal(local, "local rewriter"),
- clEnumVal(trivial, "trivial rewriter"),
- clEnumValEnd),
- cl::init(local));
-
-static cl::opt<bool>
-ScheduleSpills("schedule-spills",
- cl::desc("Schedule spill code"),
- cl::init(false));
-
-VirtRegRewriter::~VirtRegRewriter() {}
-
-/// substitutePhysReg - Replace virtual register in MachineOperand with a
-/// physical register. Do the right thing with the sub-register index.
-/// Note that operands may be added, so the MO reference is no longer valid.
-static void substitutePhysReg(MachineOperand &MO, unsigned Reg,
- const TargetRegisterInfo &TRI) {
- if (MO.getSubReg()) {
- MO.substPhysReg(Reg, TRI);
-
- // Any kill flags apply to the full virtual register, so they also apply to
- // the full physical register.
- // We assume that partial defs have already been decorated with a super-reg
- // <imp-def> operand by LiveIntervals.
- MachineInstr &MI = *MO.getParent();
- if (MO.isUse() && !MO.isUndef() &&
- (MO.isKill() || MI.isRegTiedToDefOperand(&MO-&MI.getOperand(0))))
- MI.addRegisterKilled(Reg, &TRI, /*AddIfNotFound=*/ true);
- } else {
- MO.setReg(Reg);
- }
-}
-
-namespace {
-
-/// This class is intended for use with the new spilling framework only. It
-/// rewrites vreg def/uses to use the assigned preg, but does not insert any
-/// spill code.
-struct TrivialRewriter : public VirtRegRewriter {
-
- bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
- LiveIntervals* LIs) {
- DEBUG(dbgs() << "********** REWRITE MACHINE CODE **********\n");
- DEBUG(dbgs() << "********** Function: "
- << MF.getFunction()->getName() << '\n');
- DEBUG(dbgs() << "**** Machine Instrs"
- << "(NOTE! Does not include spills and reloads!) ****\n");
- DEBUG(MF.dump());
-
- MachineRegisterInfo *mri = &MF.getRegInfo();
- const TargetRegisterInfo *tri = MF.getTarget().getRegisterInfo();
-
- bool changed = false;
-
- for (LiveIntervals::iterator liItr = LIs->begin(), liEnd = LIs->end();
- liItr != liEnd; ++liItr) {
-
- const LiveInterval *li = liItr->second;
- unsigned reg = li->reg;
-
- if (TargetRegisterInfo::isPhysicalRegister(reg)) {
- if (!li->empty())
- mri->setPhysRegUsed(reg);
- }
- else {
- if (!VRM.hasPhys(reg))
- continue;
- unsigned pReg = VRM.getPhys(reg);
- mri->setPhysRegUsed(pReg);
- // Copy the register use-list before traversing it.
- SmallVector<std::pair<MachineInstr*, unsigned>, 32> reglist;
- for (MachineRegisterInfo::reg_iterator I = mri->reg_begin(reg),
- E = mri->reg_end(); I != E; ++I)
- reglist.push_back(std::make_pair(&*I, I.getOperandNo()));
- for (unsigned N=0; N != reglist.size(); ++N)
- substitutePhysReg(reglist[N].first->getOperand(reglist[N].second),
- pReg, *tri);
- changed |= !reglist.empty();
- }
- }
-
- DEBUG(dbgs() << "**** Post Machine Instrs ****\n");
- DEBUG(MF.dump());
-
- return changed;
- }
-
-};
-
-}
-
-// ************************************************************************ //
-
-namespace {
-
-/// AvailableSpills - As the local rewriter is scanning and rewriting an MBB
-/// from top down, keep track of which spill slots or remat are available in
-/// each register.
-///
-/// Note that not all physregs are created equal here. In particular, some
-/// physregs are reloads that we are allowed to clobber or ignore at any time.
-/// Other physregs are values that the register allocated program is using
-/// that we cannot CHANGE, but we can read if we like. We keep track of this
-/// on a per-stack-slot / remat id basis as the low bit in the value of the
-/// SpillSlotsAvailable entries. The predicate 'canClobberPhysReg()' checks
-/// this bit and addAvailable sets it if.
-class AvailableSpills {
- const TargetRegisterInfo *TRI;
- const TargetInstrInfo *TII;
-
- // SpillSlotsOrReMatsAvailable - This map keeps track of all of the spilled
- // or remat'ed virtual register values that are still available, due to
- // being loaded or stored to, but not invalidated yet.
- std::map<int, unsigned> SpillSlotsOrReMatsAvailable;
-
- // PhysRegsAvailable - This is the inverse of SpillSlotsOrReMatsAvailable,
- // indicating which stack slot values are currently held by a physreg. This
- // is used to invalidate entries in SpillSlotsOrReMatsAvailable when a
- // physreg is modified.
- std::multimap<unsigned, int> PhysRegsAvailable;
-
- void disallowClobberPhysRegOnly(unsigned PhysReg);
-
- void ClobberPhysRegOnly(unsigned PhysReg);
-public:
- AvailableSpills(const TargetRegisterInfo *tri, const TargetInstrInfo *tii)
- : TRI(tri), TII(tii) {
- }
-
- /// clear - Reset the state.
- void clear() {
- SpillSlotsOrReMatsAvailable.clear();
- PhysRegsAvailable.clear();
- }
-
- const TargetRegisterInfo *getRegInfo() const { return TRI; }
-
- /// getSpillSlotOrReMatPhysReg - If the specified stack slot or remat is
- /// available in a physical register, return that PhysReg, otherwise
- /// return 0.
- unsigned getSpillSlotOrReMatPhysReg(int Slot) const {
- std::map<int, unsigned>::const_iterator I =
- SpillSlotsOrReMatsAvailable.find(Slot);
- if (I != SpillSlotsOrReMatsAvailable.end()) {
- return I->second >> 1; // Remove the CanClobber bit.
- }
- return 0;
- }
-
- /// addAvailable - Mark that the specified stack slot / remat is available
- /// in the specified physreg. If CanClobber is true, the physreg can be
- /// modified at any time without changing the semantics of the program.
- void addAvailable(int SlotOrReMat, unsigned Reg, bool CanClobber = true) {
- // If this stack slot is thought to be available in some other physreg,
- // remove its record.
- ModifyStackSlotOrReMat(SlotOrReMat);
-
- PhysRegsAvailable.insert(std::make_pair(Reg, SlotOrReMat));
- SpillSlotsOrReMatsAvailable[SlotOrReMat]= (Reg << 1) |
- (unsigned)CanClobber;
-
- if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT)
- DEBUG(dbgs() << "Remembering RM#"
- << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1);
- else
- DEBUG(dbgs() << "Remembering SS#" << SlotOrReMat);
- DEBUG(dbgs() << " in physreg " << TRI->getName(Reg)
- << (CanClobber ? " canclobber" : "") << "\n");
- }
-
- /// canClobberPhysRegForSS - Return true if the spiller is allowed to change
- /// the value of the specified stackslot register if it desires. The
- /// specified stack slot must be available in a physreg for this query to
- /// make sense.
- bool canClobberPhysRegForSS(int SlotOrReMat) const {
- assert(SpillSlotsOrReMatsAvailable.count(SlotOrReMat) &&
- "Value not available!");
- return SpillSlotsOrReMatsAvailable.find(SlotOrReMat)->second & 1;
- }
-
- /// canClobberPhysReg - Return true if the spiller is allowed to clobber the
- /// physical register where values for some stack slot(s) might be
- /// available.
- bool canClobberPhysReg(unsigned PhysReg) const {
- std::multimap<unsigned, int>::const_iterator I =
- PhysRegsAvailable.lower_bound(PhysReg);
- while (I != PhysRegsAvailable.end() && I->first == PhysReg) {
- int SlotOrReMat = I->second;
- I++;
- if (!canClobberPhysRegForSS(SlotOrReMat))
- return false;
- }
- return true;
- }
-
- /// disallowClobberPhysReg - Unset the CanClobber bit of the specified
- /// stackslot register. The register is still available but is no longer
- /// allowed to be modifed.
- void disallowClobberPhysReg(unsigned PhysReg);
-
- /// ClobberPhysReg - This is called when the specified physreg changes
- /// value. We use this to invalidate any info about stuff that lives in
- /// it and any of its aliases.
- void ClobberPhysReg(unsigned PhysReg);
-
- /// ModifyStackSlotOrReMat - This method is called when the value in a stack
- /// slot changes. This removes information about which register the
- /// previous value for this slot lives in (as the previous value is dead
- /// now).
- void ModifyStackSlotOrReMat(int SlotOrReMat);
-
- /// ClobberSharingStackSlots - When a register mapped to a stack slot changes,
- /// other stack slots sharing the same register are no longer valid.
- void ClobberSharingStackSlots(int StackSlot);
-
- /// AddAvailableRegsToLiveIn - Availability information is being kept coming
- /// into the specified MBB. Add available physical registers as potential
- /// live-in's. If they are reused in the MBB, they will be added to the
- /// live-in set to make register scavenger and post-allocation scheduler.
- void AddAvailableRegsToLiveIn(MachineBasicBlock &MBB, BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps);
-};
-
-}
-
-// ************************************************************************ //
-
-// Given a location where a reload of a spilled register or a remat of
-// a constant is to be inserted, attempt to find a safe location to
-// insert the load at an earlier point in the basic-block, to hide
-// latency of the load and to avoid address-generation interlock
-// issues.
-static MachineBasicBlock::iterator
-ComputeReloadLoc(MachineBasicBlock::iterator const InsertLoc,
- MachineBasicBlock::iterator const Begin,
- unsigned PhysReg,
- const TargetRegisterInfo *TRI,
- bool DoReMat,
- int SSorRMId,
- const TargetInstrInfo *TII,
- const MachineFunction &MF)
-{
- if (!ScheduleSpills)
- return InsertLoc;
-
- // Spill backscheduling is of primary interest to addresses, so
- // don't do anything if the register isn't in the register class
- // used for pointers.
-
- const TargetLowering *TL = MF.getTarget().getTargetLowering();
-
- if (!TL->isTypeLegal(TL->getPointerTy()))
- // Believe it or not, this is true on 16-bit targets like PIC16.
- return InsertLoc;
-
- const TargetRegisterClass *ptrRegClass =
- TL->getRegClassFor(TL->getPointerTy());
- if (!ptrRegClass->contains(PhysReg))
- return InsertLoc;
-
- // Scan upwards through the preceding instructions. If an instruction doesn't
- // reference the stack slot or the register we're loading, we can
- // backschedule the reload up past it.
- MachineBasicBlock::iterator NewInsertLoc = InsertLoc;
- while (NewInsertLoc != Begin) {
- MachineBasicBlock::iterator Prev = prior(NewInsertLoc);
- for (unsigned i = 0; i < Prev->getNumOperands(); ++i) {
- MachineOperand &Op = Prev->getOperand(i);
- if (!DoReMat && Op.isFI() && Op.getIndex() == SSorRMId)
- goto stop;
- }
- if (Prev->findRegisterUseOperandIdx(PhysReg) != -1 ||
- Prev->findRegisterDefOperand(PhysReg))
- goto stop;
- for (const unsigned *Alias = TRI->getAliasSet(PhysReg); *Alias; ++Alias)
- if (Prev->findRegisterUseOperandIdx(*Alias) != -1 ||
- Prev->findRegisterDefOperand(*Alias))
- goto stop;
- NewInsertLoc = Prev;
- }
-stop:;
-
- // If we made it to the beginning of the block, turn around and move back
- // down just past any existing reloads. They're likely to be reloads/remats
- // for instructions earlier than what our current reload/remat is for, so
- // they should be scheduled earlier.
- if (NewInsertLoc == Begin) {
- int FrameIdx;
- while (InsertLoc != NewInsertLoc &&
- (TII->isLoadFromStackSlot(NewInsertLoc, FrameIdx) ||
- TII->isTriviallyReMaterializable(NewInsertLoc)))
- ++NewInsertLoc;
- }
-
- return NewInsertLoc;
-}
-
-namespace {
-
-// ReusedOp - For each reused operand, we keep track of a bit of information,
-// in case we need to rollback upon processing a new operand. See comments
-// below.
-struct ReusedOp {
- // The MachineInstr operand that reused an available value.
- unsigned Operand;
-
- // StackSlotOrReMat - The spill slot or remat id of the value being reused.
- unsigned StackSlotOrReMat;
-
- // PhysRegReused - The physical register the value was available in.
- unsigned PhysRegReused;
-
- // AssignedPhysReg - The physreg that was assigned for use by the reload.
- unsigned AssignedPhysReg;
-
- // VirtReg - The virtual register itself.
- unsigned VirtReg;
-
- ReusedOp(unsigned o, unsigned ss, unsigned prr, unsigned apr,
- unsigned vreg)
- : Operand(o), StackSlotOrReMat(ss), PhysRegReused(prr),
- AssignedPhysReg(apr), VirtReg(vreg) {}
-};
-
-/// ReuseInfo - This maintains a collection of ReuseOp's for each operand that
-/// is reused instead of reloaded.
-class ReuseInfo {
- MachineInstr &MI;
- std::vector<ReusedOp> Reuses;
- BitVector PhysRegsClobbered;
-public:
- ReuseInfo(MachineInstr &mi, const TargetRegisterInfo *tri) : MI(mi) {
- PhysRegsClobbered.resize(tri->getNumRegs());
- }
-
- bool hasReuses() const {
- return !Reuses.empty();
- }
-
- /// addReuse - If we choose to reuse a virtual register that is already
- /// available instead of reloading it, remember that we did so.
- void addReuse(unsigned OpNo, unsigned StackSlotOrReMat,
- unsigned PhysRegReused, unsigned AssignedPhysReg,
- unsigned VirtReg) {
- // If the reload is to the assigned register anyway, no undo will be
- // required.
- if (PhysRegReused == AssignedPhysReg) return;
-
- // Otherwise, remember this.
- Reuses.push_back(ReusedOp(OpNo, StackSlotOrReMat, PhysRegReused,
- AssignedPhysReg, VirtReg));
- }
-
- void markClobbered(unsigned PhysReg) {
- PhysRegsClobbered.set(PhysReg);
- }
-
- bool isClobbered(unsigned PhysReg) const {
- return PhysRegsClobbered.test(PhysReg);
- }
-
- /// GetRegForReload - We are about to emit a reload into PhysReg. If there
- /// is some other operand that is using the specified register, either pick
- /// a new register to use, or evict the previous reload and use this reg.
- unsigned GetRegForReload(const TargetRegisterClass *RC, unsigned PhysReg,
- MachineFunction &MF, MachineInstr *MI,
- AvailableSpills &Spills,
- std::vector<MachineInstr*> &MaybeDeadStores,
- SmallSet<unsigned, 8> &Rejected,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps,
- VirtRegMap &VRM);
-
- /// GetRegForReload - Helper for the above GetRegForReload(). Add a
- /// 'Rejected' set to remember which registers have been considered and
- /// rejected for the reload. This avoids infinite looping in case like
- /// this:
- /// t1 := op t2, t3
- /// t2 <- assigned r0 for use by the reload but ended up reuse r1
- /// t3 <- assigned r1 for use by the reload but ended up reuse r0
- /// t1 <- desires r1
- /// sees r1 is taken by t2, tries t2's reload register r0
- /// sees r0 is taken by t3, tries t3's reload register r1
- /// sees r1 is taken by t2, tries t2's reload register r0 ...
- unsigned GetRegForReload(unsigned VirtReg, unsigned PhysReg, MachineInstr *MI,
- AvailableSpills &Spills,
- std::vector<MachineInstr*> &MaybeDeadStores,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps,
- VirtRegMap &VRM) {
- SmallSet<unsigned, 8> Rejected;
- MachineFunction &MF = *MI->getParent()->getParent();
- const TargetRegisterClass* RC = MF.getRegInfo().getRegClass(VirtReg);
- return GetRegForReload(RC, PhysReg, MF, MI, Spills, MaybeDeadStores,
- Rejected, RegKills, KillOps, VRM);
- }
-};
-
-}
-
-// ****************** //
-// Utility Functions //
-// ****************** //
-
-/// findSinglePredSuccessor - Return via reference a vector of machine basic
-/// blocks each of which is a successor of the specified BB and has no other
-/// predecessor.
-static void findSinglePredSuccessor(MachineBasicBlock *MBB,
- SmallVectorImpl<MachineBasicBlock *> &Succs){
- for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
- SE = MBB->succ_end(); SI != SE; ++SI) {
- MachineBasicBlock *SuccMBB = *SI;
- if (SuccMBB->pred_size() == 1)
- Succs.push_back(SuccMBB);
- }
-}
-
-/// ResurrectConfirmedKill - Helper for ResurrectKill. This register is killed
-/// but not re-defined and it's being reused. Remove the kill flag for the
-/// register and unset the kill's marker and last kill operand.
-static void ResurrectConfirmedKill(unsigned Reg, const TargetRegisterInfo* TRI,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps) {
- DEBUG(dbgs() << "Resurrect " << TRI->getName(Reg) << "\n");
-
- MachineOperand *KillOp = KillOps[Reg];
- KillOp->setIsKill(false);
- // KillOps[Reg] might be a def of a super-register.
- unsigned KReg = KillOp->getReg();
- if (!RegKills[KReg])
- return;
-
- assert(KillOps[KReg]->getParent() == KillOp->getParent() &&
- "invalid superreg kill flags");
- KillOps[KReg] = NULL;
- RegKills.reset(KReg);
-
- // If it's a def of a super-register. Its other sub-regsters are no
- // longer killed as well.
- for (const unsigned *SR = TRI->getSubRegisters(KReg); *SR; ++SR) {
- DEBUG(dbgs() << " Resurrect subreg " << TRI->getName(*SR) << "\n");
-
- assert(KillOps[*SR]->getParent() == KillOp->getParent() &&
- "invalid subreg kill flags");
- KillOps[*SR] = NULL;
- RegKills.reset(*SR);
- }
-}
-
-/// ResurrectKill - Invalidate kill info associated with a previous MI. An
-/// optimization may have decided that it's safe to reuse a previously killed
-/// register. If we fail to erase the invalid kill flags, then the register
-/// scavenger may later clobber the register used by this MI. Note that this
-/// must be done even if this MI is being deleted! Consider:
-///
-/// USE $r1 (vreg1) <kill>
-/// ...
-/// $r1(vreg3) = COPY $r1 (vreg2)
-///
-/// RegAlloc has smartly assigned all three vregs to the same physreg. Initially
-/// vreg1's only use is a kill. The rewriter doesn't know it should be live
-/// until it rewrites vreg2. At that points it sees that the copy is dead and
-/// deletes it. However, deleting the copy implicitly forwards liveness of $r1
-/// (it's copy coalescing). We must resurrect $r1 by removing the kill flag at
-/// vreg1 before deleting the copy.
-static void ResurrectKill(MachineInstr &MI, unsigned Reg,
- const TargetRegisterInfo* TRI, BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps) {
- if (RegKills[Reg] && KillOps[Reg]->getParent() != &MI) {
- ResurrectConfirmedKill(Reg, TRI, RegKills, KillOps);
- return;
- }
- // No previous kill for this reg. Check for subreg kills as well.
- // d4 =
- // store d4, fi#0
- // ...
- // = s8<kill>
- // ...
- // = d4 <avoiding reload>
- for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
- unsigned SReg = *SR;
- if (RegKills[SReg] && KillOps[SReg]->getParent() != &MI)
- ResurrectConfirmedKill(SReg, TRI, RegKills, KillOps);
- }
-}
-
-/// InvalidateKills - MI is going to be deleted. If any of its operands are
-/// marked kill, then invalidate the information.
-static void InvalidateKills(MachineInstr &MI,
- const TargetRegisterInfo* TRI,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps,
- SmallVector<unsigned, 2> *KillRegs = NULL) {
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
- if (!MO.isReg() || !MO.isUse() || !MO.isKill() || MO.isUndef())
- continue;
- unsigned Reg = MO.getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg))
- continue;
- if (KillRegs)
- KillRegs->push_back(Reg);
- assert(Reg < KillOps.size());
- if (KillOps[Reg] == &MO) {
- // This operand was the kill, now no longer.
- KillOps[Reg] = NULL;
- RegKills.reset(Reg);
- for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
- if (RegKills[*SR]) {
- assert(KillOps[*SR] == &MO && "bad subreg kill flags");
- KillOps[*SR] = NULL;
- RegKills.reset(*SR);
- }
- }
- }
- else {
- // This operand may have reused a previously killed reg. Keep it live in
- // case it continues to be used after erasing this instruction.
- ResurrectKill(MI, Reg, TRI, RegKills, KillOps);
- }
- }
-}
-
-/// InvalidateRegDef - If the def operand of the specified def MI is now dead
-/// (since its spill instruction is removed), mark it isDead. Also checks if
-/// the def MI has other definition operands that are not dead. Returns it by
-/// reference.
-static bool InvalidateRegDef(MachineBasicBlock::iterator I,
- MachineInstr &NewDef, unsigned Reg,
- bool &HasLiveDef,
- const TargetRegisterInfo *TRI) {
- // Due to remat, it's possible this reg isn't being reused. That is,
- // the def of this reg (by prev MI) is now dead.
- MachineInstr *DefMI = I;
- MachineOperand *DefOp = NULL;
- for (unsigned i = 0, e = DefMI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = DefMI->getOperand(i);
- if (!MO.isReg() || !MO.isDef() || !MO.isKill() || MO.isUndef())
- continue;
- if (MO.getReg() == Reg)
- DefOp = &MO;
- else if (!MO.isDead())
- HasLiveDef = true;
- }
- if (!DefOp)
- return false;
-
- bool FoundUse = false, Done = false;
- MachineBasicBlock::iterator E = &NewDef;
- ++I; ++E;
- for (; !Done && I != E; ++I) {
- MachineInstr *NMI = I;
- for (unsigned j = 0, ee = NMI->getNumOperands(); j != ee; ++j) {
- MachineOperand &MO = NMI->getOperand(j);
- if (!MO.isReg() || MO.getReg() == 0 ||
- (MO.getReg() != Reg && !TRI->isSubRegister(Reg, MO.getReg())))
- continue;
- if (MO.isUse())
- FoundUse = true;
- Done = true; // Stop after scanning all the operands of this MI.
- }
- }
- if (!FoundUse) {
- // Def is dead!
- DefOp->setIsDead();
- return true;
- }
- return false;
-}
-
-/// UpdateKills - Track and update kill info. If a MI reads a register that is
-/// marked kill, then it must be due to register reuse. Transfer the kill info
-/// over.
-static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps) {
- // These do not affect kill info at all.
- if (MI.isDebugValue())
- return;
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
- if (!MO.isReg() || !MO.isUse() || MO.isUndef())
- continue;
- unsigned Reg = MO.getReg();
- if (Reg == 0)
- continue;
-
- // This operand may have reused a previously killed reg. Keep it live.
- ResurrectKill(MI, Reg, TRI, RegKills, KillOps);
-
- if (MO.isKill()) {
- RegKills.set(Reg);
- KillOps[Reg] = &MO;
- for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
- RegKills.set(*SR);
- KillOps[*SR] = &MO;
- }
- }
- }
-
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
- if (!MO.isReg() || !MO.getReg() || !MO.isDef())
- continue;
- unsigned Reg = MO.getReg();
- RegKills.reset(Reg);
- KillOps[Reg] = NULL;
- // It also defines (or partially define) aliases.
- for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
- RegKills.reset(*SR);
- KillOps[*SR] = NULL;
- }
- for (const unsigned *SR = TRI->getSuperRegisters(Reg); *SR; ++SR) {
- RegKills.reset(*SR);
- KillOps[*SR] = NULL;
- }
- }
-}
-
-/// ReMaterialize - Re-materialize definition for Reg targeting DestReg.
-///
-static void ReMaterialize(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MII,
- unsigned DestReg, unsigned Reg,
- const TargetInstrInfo *TII,
- const TargetRegisterInfo *TRI,
- VirtRegMap &VRM) {
- MachineInstr *ReMatDefMI = VRM.getReMaterializedMI(Reg);
-#ifndef NDEBUG
- const MCInstrDesc &MCID = ReMatDefMI->getDesc();
- assert(MCID.getNumDefs() == 1 &&
- "Don't know how to remat instructions that define > 1 values!");
-#endif
- TII->reMaterialize(MBB, MII, DestReg, 0, ReMatDefMI, *TRI);
- MachineInstr *NewMI = prior(MII);
- for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = NewMI->getOperand(i);
- if (!MO.isReg() || MO.getReg() == 0)
- continue;
- unsigned VirtReg = MO.getReg();
- if (TargetRegisterInfo::isPhysicalRegister(VirtReg))
- continue;
- assert(MO.isUse());
- unsigned Phys = VRM.getPhys(VirtReg);
- assert(Phys && "Virtual register is not assigned a register?");
- substitutePhysReg(MO, Phys, *TRI);
- }
- ++NumReMats;
-}
-
-/// findSuperReg - Find the SubReg's super-register of given register class
-/// where its SubIdx sub-register is SubReg.
-static unsigned findSuperReg(const TargetRegisterClass *RC, unsigned SubReg,
- unsigned SubIdx, const TargetRegisterInfo *TRI) {
- for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
- I != E; ++I) {
- unsigned Reg = *I;
- if (TRI->getSubReg(Reg, SubIdx) == SubReg)
- return Reg;
- }
- return 0;
-}
-
-// ******************************** //
-// Available Spills Implementation //
-// ******************************** //
-
-/// disallowClobberPhysRegOnly - Unset the CanClobber bit of the specified
-/// stackslot register. The register is still available but is no longer
-/// allowed to be modifed.
-void AvailableSpills::disallowClobberPhysRegOnly(unsigned PhysReg) {
- std::multimap<unsigned, int>::iterator I =
- PhysRegsAvailable.lower_bound(PhysReg);
- while (I != PhysRegsAvailable.end() && I->first == PhysReg) {
- int SlotOrReMat = I->second;
- I++;
- assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg &&
- "Bidirectional map mismatch!");
- SpillSlotsOrReMatsAvailable[SlotOrReMat] &= ~1;
- DEBUG(dbgs() << "PhysReg " << TRI->getName(PhysReg)
- << " copied, it is available for use but can no longer be modified\n");
- }
-}
-
-/// disallowClobberPhysReg - Unset the CanClobber bit of the specified
-/// stackslot register and its aliases. The register and its aliases may
-/// still available but is no longer allowed to be modifed.
-void AvailableSpills::disallowClobberPhysReg(unsigned PhysReg) {
- for (const unsigned *AS = TRI->getAliasSet(PhysReg); *AS; ++AS)
- disallowClobberPhysRegOnly(*AS);
- disallowClobberPhysRegOnly(PhysReg);
-}
-
-/// ClobberPhysRegOnly - This is called when the specified physreg changes
-/// value. We use this to invalidate any info about stuff we thing lives in it.
-void AvailableSpills::ClobberPhysRegOnly(unsigned PhysReg) {
- std::multimap<unsigned, int>::iterator I =
- PhysRegsAvailable.lower_bound(PhysReg);
- while (I != PhysRegsAvailable.end() && I->first == PhysReg) {
- int SlotOrReMat = I->second;
- PhysRegsAvailable.erase(I++);
- assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg &&
- "Bidirectional map mismatch!");
- SpillSlotsOrReMatsAvailable.erase(SlotOrReMat);
- DEBUG(dbgs() << "PhysReg " << TRI->getName(PhysReg)
- << " clobbered, invalidating ");
- if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT)
- DEBUG(dbgs() << "RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1 <<"\n");
- else
- DEBUG(dbgs() << "SS#" << SlotOrReMat << "\n");
- }
-}
-
-/// ClobberPhysReg - This is called when the specified physreg changes
-/// value. We use this to invalidate any info about stuff we thing lives in
-/// it and any of its aliases.
-void AvailableSpills::ClobberPhysReg(unsigned PhysReg) {
- for (const unsigned *AS = TRI->getAliasSet(PhysReg); *AS; ++AS)
- ClobberPhysRegOnly(*AS);
- ClobberPhysRegOnly(PhysReg);
-}
-
-/// AddAvailableRegsToLiveIn - Availability information is being kept coming
-/// into the specified MBB. Add available physical registers as potential
-/// live-in's. If they are reused in the MBB, they will be added to the
-/// live-in set to make register scavenger and post-allocation scheduler.
-void AvailableSpills::AddAvailableRegsToLiveIn(MachineBasicBlock &MBB,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps) {
- std::set<unsigned> NotAvailable;
- for (std::multimap<unsigned, int>::iterator
- I = PhysRegsAvailable.begin(), E = PhysRegsAvailable.end();
- I != E; ++I) {
- unsigned Reg = I->first;
- const TargetRegisterClass* RC = TRI->getMinimalPhysRegClass(Reg);
- // FIXME: A temporary workaround. We can't reuse available value if it's
- // not safe to move the def of the virtual register's class. e.g.
- // X86::RFP* register classes. Do not add it as a live-in.
- if (!TII->isSafeToMoveRegClassDefs(RC))
- // This is no longer available.
- NotAvailable.insert(Reg);
- else {
- MBB.addLiveIn(Reg);
- if (RegKills[Reg])
- ResurrectConfirmedKill(Reg, TRI, RegKills, KillOps);
- }
-
- // Skip over the same register.
- std::multimap<unsigned, int>::iterator NI = llvm::next(I);
- while (NI != E && NI->first == Reg) {
- ++I;
- ++NI;
- }
- }
-
- for (std::set<unsigned>::iterator I = NotAvailable.begin(),
- E = NotAvailable.end(); I != E; ++I) {
- ClobberPhysReg(*I);
- for (const unsigned *SubRegs = TRI->getSubRegisters(*I);
- *SubRegs; ++SubRegs)
- ClobberPhysReg(*SubRegs);
- }
-}
-
-/// ModifyStackSlotOrReMat - This method is called when the value in a stack
-/// slot changes. This removes information about which register the previous
-/// value for this slot lives in (as the previous value is dead now).
-void AvailableSpills::ModifyStackSlotOrReMat(int SlotOrReMat) {
- std::map<int, unsigned>::iterator It =
- SpillSlotsOrReMatsAvailable.find(SlotOrReMat);
- if (It == SpillSlotsOrReMatsAvailable.end()) return;
- unsigned Reg = It->second >> 1;
- SpillSlotsOrReMatsAvailable.erase(It);
-
- // This register may hold the value of multiple stack slots, only remove this
- // stack slot from the set of values the register contains.
- std::multimap<unsigned, int>::iterator I = PhysRegsAvailable.lower_bound(Reg);
- for (; ; ++I) {
- assert(I != PhysRegsAvailable.end() && I->first == Reg &&
- "Map inverse broken!");
- if (I->second == SlotOrReMat) break;
- }
- PhysRegsAvailable.erase(I);
-}
-
-void AvailableSpills::ClobberSharingStackSlots(int StackSlot) {
- std::map<int, unsigned>::iterator It =
- SpillSlotsOrReMatsAvailable.find(StackSlot);
- if (It == SpillSlotsOrReMatsAvailable.end()) return;
- unsigned Reg = It->second >> 1;
-
- // Erase entries in PhysRegsAvailable for other stack slots.
- std::multimap<unsigned, int>::iterator I = PhysRegsAvailable.lower_bound(Reg);
- while (I != PhysRegsAvailable.end() && I->first == Reg) {
- std::multimap<unsigned, int>::iterator NextI = llvm::next(I);
- if (I->second != StackSlot) {
- DEBUG(dbgs() << "Clobbered sharing SS#" << I->second << " in "
- << PrintReg(Reg, TRI) << '\n');
- SpillSlotsOrReMatsAvailable.erase(I->second);
- PhysRegsAvailable.erase(I);
- }
- I = NextI;
- }
-}
-
-// ************************** //
-// Reuse Info Implementation //
-// ************************** //
-
-/// GetRegForReload - We are about to emit a reload into PhysReg. If there
-/// is some other operand that is using the specified register, either pick
-/// a new register to use, or evict the previous reload and use this reg.
-unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC,
- unsigned PhysReg,
- MachineFunction &MF,
- MachineInstr *MI, AvailableSpills &Spills,
- std::vector<MachineInstr*> &MaybeDeadStores,
- SmallSet<unsigned, 8> &Rejected,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps,
- VirtRegMap &VRM) {
- const TargetInstrInfo* TII = MF.getTarget().getInstrInfo();
- const TargetRegisterInfo *TRI = Spills.getRegInfo();
-
- if (Reuses.empty()) return PhysReg; // This is most often empty.
-
- for (unsigned ro = 0, e = Reuses.size(); ro != e; ++ro) {
- ReusedOp &Op = Reuses[ro];
- // If we find some other reuse that was supposed to use this register
- // exactly for its reload, we can change this reload to use ITS reload
- // register. That is, unless its reload register has already been
- // considered and subsequently rejected because it has also been reused
- // by another operand.
- if (Op.PhysRegReused == PhysReg &&
- Rejected.count(Op.AssignedPhysReg) == 0 &&
- RC->contains(Op.AssignedPhysReg)) {
- // Yup, use the reload register that we didn't use before.
- unsigned NewReg = Op.AssignedPhysReg;
- Rejected.insert(PhysReg);
- return GetRegForReload(RC, NewReg, MF, MI, Spills, MaybeDeadStores,
- Rejected, RegKills, KillOps, VRM);
- } else {
- // Otherwise, we might also have a problem if a previously reused
- // value aliases the new register. If so, codegen the previous reload
- // and use this one.
- unsigned PRRU = Op.PhysRegReused;
- if (TRI->regsOverlap(PRRU, PhysReg)) {
- // Okay, we found out that an alias of a reused register
- // was used. This isn't good because it means we have
- // to undo a previous reuse.
- MachineBasicBlock *MBB = MI->getParent();
- const TargetRegisterClass *AliasRC =
- MBB->getParent()->getRegInfo().getRegClass(Op.VirtReg);
-
- // Copy Op out of the vector and remove it, we're going to insert an
- // explicit load for it.
- ReusedOp NewOp = Op;
- Reuses.erase(Reuses.begin()+ro);
-
- // MI may be using only a sub-register of PhysRegUsed.
- unsigned RealPhysRegUsed = MI->getOperand(NewOp.Operand).getReg();
- unsigned SubIdx = 0;
- assert(TargetRegisterInfo::isPhysicalRegister(RealPhysRegUsed) &&
- "A reuse cannot be a virtual register");
- if (PRRU != RealPhysRegUsed) {
- // What was the sub-register index?
- SubIdx = TRI->getSubRegIndex(PRRU, RealPhysRegUsed);
- assert(SubIdx &&
- "Operand physreg is not a sub-register of PhysRegUsed");
- }
-
- // Ok, we're going to try to reload the assigned physreg into the
- // slot that we were supposed to in the first place. However, that
- // register could hold a reuse. Check to see if it conflicts or
- // would prefer us to use a different register.
- unsigned NewPhysReg = GetRegForReload(RC, NewOp.AssignedPhysReg,
- MF, MI, Spills, MaybeDeadStores,
- Rejected, RegKills, KillOps, VRM);
-
- bool DoReMat = NewOp.StackSlotOrReMat > VirtRegMap::MAX_STACK_SLOT;
- int SSorRMId = DoReMat
- ? VRM.getReMatId(NewOp.VirtReg) : (int) NewOp.StackSlotOrReMat;
-
- // Back-schedule reloads and remats.
- MachineBasicBlock::iterator InsertLoc =
- ComputeReloadLoc(MI, MBB->begin(), PhysReg, TRI,
- DoReMat, SSorRMId, TII, MF);
-
- if (DoReMat) {
- ReMaterialize(*MBB, InsertLoc, NewPhysReg, NewOp.VirtReg, TII,
- TRI, VRM);
- } else {
- TII->loadRegFromStackSlot(*MBB, InsertLoc, NewPhysReg,
- NewOp.StackSlotOrReMat, AliasRC, TRI);
- MachineInstr *LoadMI = prior(InsertLoc);
- VRM.addSpillSlotUse(NewOp.StackSlotOrReMat, LoadMI);
- // Any stores to this stack slot are not dead anymore.
- MaybeDeadStores[NewOp.StackSlotOrReMat] = NULL;
- ++NumLoads;
- }
- Spills.ClobberPhysReg(NewPhysReg);
- Spills.ClobberPhysReg(NewOp.PhysRegReused);
-
- unsigned RReg = SubIdx ? TRI->getSubReg(NewPhysReg, SubIdx) :NewPhysReg;
- MI->getOperand(NewOp.Operand).setReg(RReg);
- MI->getOperand(NewOp.Operand).setSubReg(0);
-
- Spills.addAvailable(NewOp.StackSlotOrReMat, NewPhysReg);
- UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
- DEBUG(dbgs() << '\t' << *prior(InsertLoc));
-
- DEBUG(dbgs() << "Reuse undone!\n");
- --NumReused;
-
- // Finally, PhysReg is now available, go ahead and use it.
- return PhysReg;
- }
- }
- }
- return PhysReg;
-}
-
-// ************************************************************************ //
-
-/// FoldsStackSlotModRef - Return true if the specified MI folds the specified
-/// stack slot mod/ref. It also checks if it's possible to unfold the
-/// instruction by having it define a specified physical register instead.
-static bool FoldsStackSlotModRef(MachineInstr &MI, int SS, unsigned PhysReg,
- const TargetInstrInfo *TII,
- const TargetRegisterInfo *TRI,
- VirtRegMap &VRM) {
- if (VRM.hasEmergencySpills(&MI) || VRM.isSpillPt(&MI))
- return false;
-
- bool Found = false;
- VirtRegMap::MI2VirtMapTy::const_iterator I, End;
- for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ++I) {
- unsigned VirtReg = I->second.first;
- VirtRegMap::ModRef MR = I->second.second;
- if (MR & VirtRegMap::isModRef)
- if (VRM.getStackSlot(VirtReg) == SS) {
- Found= TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(), true, true) != 0;
- break;
- }
- }
- if (!Found)
- return false;
-
- // Does the instruction uses a register that overlaps the scratch register?
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
- if (!MO.isReg() || MO.getReg() == 0)
- continue;
- unsigned Reg = MO.getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
- if (!VRM.hasPhys(Reg))
- continue;
- Reg = VRM.getPhys(Reg);
- }
- if (TRI->regsOverlap(PhysReg, Reg))
- return false;
- }
- return true;
-}
-
-/// FindFreeRegister - Find a free register of a given register class by looking
-/// at (at most) the last two machine instructions.
-static unsigned FindFreeRegister(MachineBasicBlock::iterator MII,
- MachineBasicBlock &MBB,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI,
- BitVector &AllocatableRegs) {
- BitVector Defs(TRI->getNumRegs());
- BitVector Uses(TRI->getNumRegs());
- SmallVector<unsigned, 4> LocalUses;
- SmallVector<unsigned, 4> Kills;
-
- // Take a look at 2 instructions at most.
- unsigned Count = 0;
- while (Count < 2) {
- if (MII == MBB.begin())
- break;
- MachineInstr *PrevMI = prior(MII);
- MII = PrevMI;
-
- if (PrevMI->isDebugValue())
- continue; // Skip over dbg_value instructions.
- ++Count;
-
- for (unsigned i = 0, e = PrevMI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = PrevMI->getOperand(i);
- if (!MO.isReg() || MO.getReg() == 0)
- continue;
- unsigned Reg = MO.getReg();
- if (MO.isDef()) {
- Defs.set(Reg);
- for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
- Defs.set(*AS);
- } else {
- LocalUses.push_back(Reg);
- if (MO.isKill() && AllocatableRegs[Reg])
- Kills.push_back(Reg);
- }
- }
-
- for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
- unsigned Kill = Kills[i];
- if (!Defs[Kill] && !Uses[Kill] &&
- RC->contains(Kill))
- return Kill;
- }
- for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) {
- unsigned Reg = LocalUses[i];
- Uses.set(Reg);
- for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
- Uses.set(*AS);
- }
- }
-
- return 0;
-}
-
-static
-void AssignPhysToVirtReg(MachineInstr *MI, unsigned VirtReg, unsigned PhysReg,
- const TargetRegisterInfo &TRI) {
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.getReg() == VirtReg)
- substitutePhysReg(MO, PhysReg, TRI);
- }
-}
-
-namespace {
-
-struct RefSorter {
- bool operator()(const std::pair<MachineInstr*, int> &A,
- const std::pair<MachineInstr*, int> &B) {
- return A.second < B.second;
- }
-};
-
-// ***************************** //
-// Local Spiller Implementation //
-// ***************************** //
-
-class LocalRewriter : public VirtRegRewriter {
- MachineRegisterInfo *MRI;
- const TargetRegisterInfo *TRI;
- const TargetInstrInfo *TII;
- VirtRegMap *VRM;
- LiveIntervals *LIs;
- BitVector AllocatableRegs;
- DenseMap<MachineInstr*, unsigned> DistanceMap;
- DenseMap<int, SmallVector<MachineInstr*,4> > Slot2DbgValues;
-
- MachineBasicBlock *MBB; // Basic block currently being processed.
-
-public:
-
- bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
- LiveIntervals* LIs);
-
-private:
- void EraseInstr(MachineInstr *MI) {
- VRM->RemoveMachineInstrFromMaps(MI);
- LIs->RemoveMachineInstrFromMaps(MI);
- MI->eraseFromParent();
- }
-
- bool OptimizeByUnfold2(unsigned VirtReg, int SS,
- MachineBasicBlock::iterator &MII,
- std::vector<MachineInstr*> &MaybeDeadStores,
- AvailableSpills &Spills,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps);
-
- bool OptimizeByUnfold(MachineBasicBlock::iterator &MII,
- std::vector<MachineInstr*> &MaybeDeadStores,
- AvailableSpills &Spills,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps);
-
- bool CommuteToFoldReload(MachineBasicBlock::iterator &MII,
- unsigned VirtReg, unsigned SrcReg, int SS,
- AvailableSpills &Spills,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps,
- const TargetRegisterInfo *TRI);
-
- void SpillRegToStackSlot(MachineBasicBlock::iterator &MII,
- int Idx, unsigned PhysReg, int StackSlot,
- const TargetRegisterClass *RC,
- bool isAvailable, MachineInstr *&LastStore,
- AvailableSpills &Spills,
- SmallSet<MachineInstr*, 4> &ReMatDefs,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps);
-
- void TransferDeadness(unsigned Reg, BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps);
-
- bool InsertEmergencySpills(MachineInstr *MI);
-
- bool InsertRestores(MachineInstr *MI,
- AvailableSpills &Spills,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps);
-
- bool InsertSpills(MachineInstr *MI);
-
- void ProcessUses(MachineInstr &MI, AvailableSpills &Spills,
- std::vector<MachineInstr*> &MaybeDeadStores,
- BitVector &RegKills,
- ReuseInfo &ReusedOperands,
- std::vector<MachineOperand*> &KillOps);
-
- void RewriteMBB(LiveIntervals *LIs,
- AvailableSpills &Spills, BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps);
-};
-}
-
-bool LocalRewriter::runOnMachineFunction(MachineFunction &MF, VirtRegMap &vrm,
- LiveIntervals* lis) {
- MRI = &MF.getRegInfo();
- TRI = MF.getTarget().getRegisterInfo();
- TII = MF.getTarget().getInstrInfo();
- VRM = &vrm;
- LIs = lis;
- AllocatableRegs = TRI->getAllocatableSet(MF);
- DEBUG(dbgs() << "\n**** Local spiller rewriting function '"
- << MF.getFunction()->getName() << "':\n");
- DEBUG(dbgs() << "**** Machine Instrs (NOTE! Does not include spills and"
- " reloads!) ****\n");
- DEBUG(MF.print(dbgs(), LIs->getSlotIndexes()));
-
- // Spills - Keep track of which spilled values are available in physregs
- // so that we can choose to reuse the physregs instead of emitting
- // reloads. This is usually refreshed per basic block.
- AvailableSpills Spills(TRI, TII);
-
- // Keep track of kill information.
- BitVector RegKills(TRI->getNumRegs());
- std::vector<MachineOperand*> KillOps;
- KillOps.resize(TRI->getNumRegs(), NULL);
-
- // SingleEntrySuccs - Successor blocks which have a single predecessor.
- SmallVector<MachineBasicBlock*, 4> SinglePredSuccs;
- SmallPtrSet<MachineBasicBlock*,16> EarlyVisited;
-
- // Traverse the basic blocks depth first.
- MachineBasicBlock *Entry = MF.begin();
- SmallPtrSet<MachineBasicBlock*,16> Visited;
- for (df_ext_iterator<MachineBasicBlock*,
- SmallPtrSet<MachineBasicBlock*,16> >
- DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
- DFI != E; ++DFI) {
- MBB = *DFI;
- if (!EarlyVisited.count(MBB))
- RewriteMBB(LIs, Spills, RegKills, KillOps);
-
- // If this MBB is the only predecessor of a successor. Keep the
- // availability information and visit it next.
- do {
- // Keep visiting single predecessor successor as long as possible.
- SinglePredSuccs.clear();
- findSinglePredSuccessor(MBB, SinglePredSuccs);
- if (SinglePredSuccs.empty())
- MBB = 0;
- else {
- // FIXME: More than one successors, each of which has MBB has
- // the only predecessor.
- MBB = SinglePredSuccs[0];
- if (!Visited.count(MBB) && EarlyVisited.insert(MBB)) {
- Spills.AddAvailableRegsToLiveIn(*MBB, RegKills, KillOps);
- RewriteMBB(LIs, Spills, RegKills, KillOps);
- }
- }
- } while (MBB);
-
- // Clear the availability info.
- Spills.clear();
- }
-
- DEBUG(dbgs() << "**** Post Machine Instrs ****\n");
- DEBUG(MF.print(dbgs(), LIs->getSlotIndexes()));
-
- // Mark unused spill slots.
- MachineFrameInfo *MFI = MF.getFrameInfo();
- int SS = VRM->getLowSpillSlot();
- if (SS != VirtRegMap::NO_STACK_SLOT) {
- for (int e = VRM->getHighSpillSlot(); SS <= e; ++SS) {
- SmallVector<MachineInstr*, 4> &DbgValues = Slot2DbgValues[SS];
- if (!VRM->isSpillSlotUsed(SS)) {
- MFI->RemoveStackObject(SS);
- for (unsigned j = 0, ee = DbgValues.size(); j != ee; ++j) {
- MachineInstr *DVMI = DbgValues[j];
- DEBUG(dbgs() << "Removing debug info referencing FI#" << SS << '\n');
- EraseInstr(DVMI);
- }
- ++NumDSS;
- }
- DbgValues.clear();
- }
- }
- Slot2DbgValues.clear();
-
- return true;
-}
-
-/// OptimizeByUnfold2 - Unfold a series of load / store folding instructions if
-/// a scratch register is available.
-/// xorq %r12<kill>, %r13
-/// addq %rax, -184(%rbp)
-/// addq %r13, -184(%rbp)
-/// ==>
-/// xorq %r12<kill>, %r13
-/// movq -184(%rbp), %r12
-/// addq %rax, %r12
-/// addq %r13, %r12
-/// movq %r12, -184(%rbp)
-bool LocalRewriter::
-OptimizeByUnfold2(unsigned VirtReg, int SS,
- MachineBasicBlock::iterator &MII,
- std::vector<MachineInstr*> &MaybeDeadStores,
- AvailableSpills &Spills,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps) {
-
- MachineBasicBlock::iterator NextMII = llvm::next(MII);
- // Skip over dbg_value instructions.
- while (NextMII != MBB->end() && NextMII->isDebugValue())
- NextMII = llvm::next(NextMII);
- if (NextMII == MBB->end())
- return false;
-
- if (TII->getOpcodeAfterMemoryUnfold(MII->getOpcode(), true, true) == 0)
- return false;
-
- // Now let's see if the last couple of instructions happens to have freed up
- // a register.
- const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
- unsigned PhysReg = FindFreeRegister(MII, *MBB, RC, TRI, AllocatableRegs);
- if (!PhysReg)
- return false;
-
- MachineFunction &MF = *MBB->getParent();
- TRI = MF.getTarget().getRegisterInfo();
- MachineInstr &MI = *MII;
- if (!FoldsStackSlotModRef(MI, SS, PhysReg, TII, TRI, *VRM))
- return false;
-
- // If the next instruction also folds the same SS modref and can be unfoled,
- // then it's worthwhile to issue a load from SS into the free register and
- // then unfold these instructions.
- if (!FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, *VRM))
- return false;
-
- // Back-schedule reloads and remats.
- ComputeReloadLoc(MII, MBB->begin(), PhysReg, TRI, false, SS, TII, MF);
-
- // Load from SS to the spare physical register.
- TII->loadRegFromStackSlot(*MBB, MII, PhysReg, SS, RC, TRI);
- // This invalidates Phys.
- Spills.ClobberPhysReg(PhysReg);
- // Remember it's available.
- Spills.addAvailable(SS, PhysReg);
- MaybeDeadStores[SS] = NULL;
-
- // Unfold current MI.
- SmallVector<MachineInstr*, 4> NewMIs;
- if (!TII->unfoldMemoryOperand(MF, &MI, VirtReg, false, false, NewMIs))
- llvm_unreachable("Unable unfold the load / store folding instruction!");
- assert(NewMIs.size() == 1);
- AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI);
- VRM->transferRestorePts(&MI, NewMIs[0]);
- MII = MBB->insert(MII, NewMIs[0]);
- InvalidateKills(MI, TRI, RegKills, KillOps);
- EraseInstr(&MI);
- ++NumModRefUnfold;
-
- // Unfold next instructions that fold the same SS.
- do {
- MachineInstr &NextMI = *NextMII;
- NextMII = llvm::next(NextMII);
- NewMIs.clear();
- if (!TII->unfoldMemoryOperand(MF, &NextMI, VirtReg, false, false, NewMIs))
- llvm_unreachable("Unable unfold the load / store folding instruction!");
- assert(NewMIs.size() == 1);
- AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI);
- VRM->transferRestorePts(&NextMI, NewMIs[0]);
- MBB->insert(NextMII, NewMIs[0]);
- InvalidateKills(NextMI, TRI, RegKills, KillOps);
- EraseInstr(&NextMI);
- ++NumModRefUnfold;
- // Skip over dbg_value instructions.
- while (NextMII != MBB->end() && NextMII->isDebugValue())
- NextMII = llvm::next(NextMII);
- if (NextMII == MBB->end())
- break;
- } while (FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, *VRM));
-
- // Store the value back into SS.
- TII->storeRegToStackSlot(*MBB, NextMII, PhysReg, true, SS, RC, TRI);
- MachineInstr *StoreMI = prior(NextMII);
- VRM->addSpillSlotUse(SS, StoreMI);
- VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
-
- return true;
-}
-
-/// OptimizeByUnfold - Turn a store folding instruction into a load folding
-/// instruction. e.g.
-/// xorl %edi, %eax
-/// movl %eax, -32(%ebp)
-/// movl -36(%ebp), %eax
-/// orl %eax, -32(%ebp)
-/// ==>
-/// xorl %edi, %eax
-/// orl -36(%ebp), %eax
-/// mov %eax, -32(%ebp)
-/// This enables unfolding optimization for a subsequent instruction which will
-/// also eliminate the newly introduced store instruction.
-bool LocalRewriter::
-OptimizeByUnfold(MachineBasicBlock::iterator &MII,
- std::vector<MachineInstr*> &MaybeDeadStores,
- AvailableSpills &Spills,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps) {
- MachineFunction &MF = *MBB->getParent();
- MachineInstr &MI = *MII;
- unsigned UnfoldedOpc = 0;
- unsigned UnfoldPR = 0;
- unsigned UnfoldVR = 0;
- int FoldedSS = VirtRegMap::NO_STACK_SLOT;
- VirtRegMap::MI2VirtMapTy::const_iterator I, End;
- for (tie(I, End) = VRM->getFoldedVirts(&MI); I != End; ) {
- // Only transform a MI that folds a single register.
- if (UnfoldedOpc)
- return false;
- UnfoldVR = I->second.first;
- VirtRegMap::ModRef MR = I->second.second;
- // MI2VirtMap be can updated which invalidate the iterator.
- // Increment the iterator first.
- ++I;
- if (VRM->isAssignedReg(UnfoldVR))
- continue;
- // If this reference is not a use, any previous store is now dead.
- // Otherwise, the store to this stack slot is not dead anymore.
- FoldedSS = VRM->getStackSlot(UnfoldVR);
- MachineInstr* DeadStore = MaybeDeadStores[FoldedSS];
- if (DeadStore && (MR & VirtRegMap::isModRef)) {
- unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(FoldedSS);
- if (!PhysReg || !DeadStore->readsRegister(PhysReg))
- continue;
- UnfoldPR = PhysReg;
- UnfoldedOpc = TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(),
- false, true);
- }
- }
-
- if (!UnfoldedOpc) {
- if (!UnfoldVR)
- return false;
-
- // Look for other unfolding opportunities.
- return OptimizeByUnfold2(UnfoldVR, FoldedSS, MII, MaybeDeadStores, Spills,
- RegKills, KillOps);
- }
-
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
- if (!MO.isReg() || MO.getReg() == 0 || !MO.isUse())
- continue;
- unsigned VirtReg = MO.getReg();
- if (TargetRegisterInfo::isPhysicalRegister(VirtReg) || MO.getSubReg())
- continue;
- if (VRM->isAssignedReg(VirtReg)) {
- unsigned PhysReg = VRM->getPhys(VirtReg);
- if (PhysReg && TRI->regsOverlap(PhysReg, UnfoldPR))
- return false;
- } else if (VRM->isReMaterialized(VirtReg))
- continue;
- int SS = VRM->getStackSlot(VirtReg);
- unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
- if (PhysReg) {
- if (TRI->regsOverlap(PhysReg, UnfoldPR))
- return false;
- continue;
- }
- if (VRM->hasPhys(VirtReg)) {
- PhysReg = VRM->getPhys(VirtReg);
- if (!TRI->regsOverlap(PhysReg, UnfoldPR))
- continue;
- }
-
- // Ok, we'll need to reload the value into a register which makes
- // it impossible to perform the store unfolding optimization later.
- // Let's see if it is possible to fold the load if the store is
- // unfolded. This allows us to perform the store unfolding
- // optimization.
- SmallVector<MachineInstr*, 4> NewMIs;
- if (TII->unfoldMemoryOperand(MF, &MI, UnfoldVR, false, false, NewMIs)) {
- assert(NewMIs.size() == 1);
- MachineInstr *NewMI = NewMIs.back();
- MBB->insert(MII, NewMI);
- NewMIs.clear();
- int Idx = NewMI->findRegisterUseOperandIdx(VirtReg, false);
- assert(Idx != -1);
- SmallVector<unsigned, 1> Ops;
- Ops.push_back(Idx);
- MachineInstr *FoldedMI = TII->foldMemoryOperand(NewMI, Ops, SS);
- NewMI->eraseFromParent();
- if (FoldedMI) {
- VRM->addSpillSlotUse(SS, FoldedMI);
- if (!VRM->hasPhys(UnfoldVR))
- VRM->assignVirt2Phys(UnfoldVR, UnfoldPR);
- VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
- MII = FoldedMI;
- InvalidateKills(MI, TRI, RegKills, KillOps);
- EraseInstr(&MI);
- return true;
- }
- }
- }
-
- return false;
-}
-
-/// CommuteChangesDestination - We are looking for r0 = op r1, r2 and
-/// where SrcReg is r1 and it is tied to r0. Return true if after
-/// commuting this instruction it will be r0 = op r2, r1.
-static bool CommuteChangesDestination(MachineInstr *DefMI,
- const MCInstrDesc &MCID,
- unsigned SrcReg,
- const TargetInstrInfo *TII,
- unsigned &DstIdx) {
- if (MCID.getNumDefs() != 1 && MCID.getNumOperands() != 3)
- return false;
- if (!DefMI->getOperand(1).isReg() ||
- DefMI->getOperand(1).getReg() != SrcReg)
- return false;
- unsigned DefIdx;
- if (!DefMI->isRegTiedToDefOperand(1, &DefIdx) || DefIdx != 0)
- return false;
- unsigned SrcIdx1, SrcIdx2;
- if (!TII->findCommutedOpIndices(DefMI, SrcIdx1, SrcIdx2))
- return false;
- if (SrcIdx1 == 1 && SrcIdx2 == 2) {
- DstIdx = 2;
- return true;
- }
- return false;
-}
-
-/// CommuteToFoldReload -
-/// Look for
-/// r1 = load fi#1
-/// r1 = op r1, r2<kill>
-/// store r1, fi#1
-///
-/// If op is commutable and r2 is killed, then we can xform these to
-/// r2 = op r2, fi#1
-/// store r2, fi#1
-bool LocalRewriter::
-CommuteToFoldReload(MachineBasicBlock::iterator &MII,
- unsigned VirtReg, unsigned SrcReg, int SS,
- AvailableSpills &Spills,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps,
- const TargetRegisterInfo *TRI) {
- if (MII == MBB->begin() || !MII->killsRegister(SrcReg))
- return false;
-
- MachineInstr &MI = *MII;
- MachineBasicBlock::iterator DefMII = prior(MII);
- MachineInstr *DefMI = DefMII;
- const MCInstrDesc &MCID = DefMI->getDesc();
- unsigned NewDstIdx;
- if (DefMII != MBB->begin() &&
- MCID.isCommutable() &&
- CommuteChangesDestination(DefMI, MCID, SrcReg, TII, NewDstIdx)) {
- MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
- unsigned NewReg = NewDstMO.getReg();
- if (!NewDstMO.isKill() || TRI->regsOverlap(NewReg, SrcReg))
- return false;
- MachineInstr *ReloadMI = prior(DefMII);
- int FrameIdx;
- unsigned DestReg = TII->isLoadFromStackSlot(ReloadMI, FrameIdx);
- if (DestReg != SrcReg || FrameIdx != SS)
- return false;
- int UseIdx = DefMI->findRegisterUseOperandIdx(DestReg, false);
- if (UseIdx == -1)
- return false;
- unsigned DefIdx;
- if (!MI.isRegTiedToDefOperand(UseIdx, &DefIdx))
- return false;
- assert(DefMI->getOperand(DefIdx).isReg() &&
- DefMI->getOperand(DefIdx).getReg() == SrcReg);
-
- // Now commute def instruction.
- MachineInstr *CommutedMI = TII->commuteInstruction(DefMI, true);
- if (!CommutedMI)
- return false;
- MBB->insert(MII, CommutedMI);
- SmallVector<unsigned, 1> Ops;
- Ops.push_back(NewDstIdx);
- MachineInstr *FoldedMI = TII->foldMemoryOperand(CommutedMI, Ops, SS);
- // Not needed since foldMemoryOperand returns new MI.
- CommutedMI->eraseFromParent();
- if (!FoldedMI)
- return false;
-
- VRM->addSpillSlotUse(SS, FoldedMI);
- VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
- // Insert new def MI and spill MI.
- const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
- TII->storeRegToStackSlot(*MBB, &MI, NewReg, true, SS, RC, TRI);
- MII = prior(MII);
- MachineInstr *StoreMI = MII;
- VRM->addSpillSlotUse(SS, StoreMI);
- VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
- MII = FoldedMI; // Update MII to backtrack.
-
- // Delete all 3 old instructions.
- InvalidateKills(*ReloadMI, TRI, RegKills, KillOps);
- EraseInstr(ReloadMI);
- InvalidateKills(*DefMI, TRI, RegKills, KillOps);
- EraseInstr(DefMI);
- InvalidateKills(MI, TRI, RegKills, KillOps);
- EraseInstr(&MI);
-
- // If NewReg was previously holding value of some SS, it's now clobbered.
- // This has to be done now because it's a physical register. When this
- // instruction is re-visited, it's ignored.
- Spills.ClobberPhysReg(NewReg);
-
- ++NumCommutes;
- return true;
- }
-
- return false;
-}
-
-/// SpillRegToStackSlot - Spill a register to a specified stack slot. Check if
-/// the last store to the same slot is now dead. If so, remove the last store.
-void LocalRewriter::
-SpillRegToStackSlot(MachineBasicBlock::iterator &MII,
- int Idx, unsigned PhysReg, int StackSlot,
- const TargetRegisterClass *RC,
- bool isAvailable, MachineInstr *&LastStore,
- AvailableSpills &Spills,
- SmallSet<MachineInstr*, 4> &ReMatDefs,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps) {
-
- MachineBasicBlock::iterator oldNextMII = llvm::next(MII);
- TII->storeRegToStackSlot(*MBB, llvm::next(MII), PhysReg, true, StackSlot, RC,
- TRI);
- MachineInstr *StoreMI = prior(oldNextMII);
- VRM->addSpillSlotUse(StackSlot, StoreMI);
- DEBUG(dbgs() << "Store:\t" << *StoreMI);
-
- // If there is a dead store to this stack slot, nuke it now.
- if (LastStore) {
- DEBUG(dbgs() << "Removed dead store:\t" << *LastStore);
- ++NumDSE;
- SmallVector<unsigned, 2> KillRegs;
- InvalidateKills(*LastStore, TRI, RegKills, KillOps, &KillRegs);
- MachineBasicBlock::iterator PrevMII = LastStore;
- bool CheckDef = PrevMII != MBB->begin();
- if (CheckDef)
- --PrevMII;
- EraseInstr(LastStore);
- if (CheckDef) {
- // Look at defs of killed registers on the store. Mark the defs
- // as dead since the store has been deleted and they aren't
- // being reused.
- for (unsigned j = 0, ee = KillRegs.size(); j != ee; ++j) {
- bool HasOtherDef = false;
- if (InvalidateRegDef(PrevMII, *MII, KillRegs[j], HasOtherDef, TRI)) {
- MachineInstr *DeadDef = PrevMII;
- if (ReMatDefs.count(DeadDef) && !HasOtherDef) {
- // FIXME: This assumes a remat def does not have side effects.
- EraseInstr(DeadDef);
- ++NumDRM;
- }
- }
- }
- }
- }
-
- // Allow for multi-instruction spill sequences, as on PPC Altivec. Presume
- // the last of multiple instructions is the actual store.
- LastStore = prior(oldNextMII);
-
- // If the stack slot value was previously available in some other
- // register, change it now. Otherwise, make the register available,
- // in PhysReg.
- Spills.ModifyStackSlotOrReMat(StackSlot);
- Spills.ClobberPhysReg(PhysReg);
- Spills.addAvailable(StackSlot, PhysReg, isAvailable);
- ++NumStores;
-}
-
-/// isSafeToDelete - Return true if this instruction doesn't produce any side
-/// effect and all of its defs are dead.
-static bool isSafeToDelete(MachineInstr &MI) {
- const MCInstrDesc &MCID = MI.getDesc();
- if (MCID.mayLoad() || MCID.mayStore() || MCID.isTerminator() ||
- MCID.isCall() || MCID.isBarrier() || MCID.isReturn() ||
- MI.isLabel() || MI.isDebugValue() ||
- MI.hasUnmodeledSideEffects())
- return false;
-
- // Technically speaking inline asm without side effects and no defs can still
- // be deleted. But there is so much bad inline asm code out there, we should
- // let them be.
- if (MI.isInlineAsm())
- return false;
-
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
- if (!MO.isReg() || !MO.getReg())
- continue;
- if (MO.isDef() && !MO.isDead())
- return false;
- if (MO.isUse() && MO.isKill())
- // FIXME: We can't remove kill markers or else the scavenger will assert.
- // An alternative is to add a ADD pseudo instruction to replace kill
- // markers.
- return false;
- }
- return true;
-}
-
-/// TransferDeadness - A identity copy definition is dead and it's being
-/// removed. Find the last def or use and mark it as dead / kill.
-void LocalRewriter::
-TransferDeadness(unsigned Reg, BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps) {
- SmallPtrSet<MachineInstr*, 4> Seens;
- SmallVector<std::pair<MachineInstr*, int>,8> Refs;
- for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(Reg),
- RE = MRI->reg_end(); RI != RE; ++RI) {
- MachineInstr *UDMI = &*RI;
- if (UDMI->isDebugValue() || UDMI->getParent() != MBB)
- continue;
- DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UDMI);
- if (DI == DistanceMap.end())
- continue;
- if (Seens.insert(UDMI))
- Refs.push_back(std::make_pair(UDMI, DI->second));
- }
-
- if (Refs.empty())
- return;
- std::sort(Refs.begin(), Refs.end(), RefSorter());
-
- while (!Refs.empty()) {
- MachineInstr *LastUDMI = Refs.back().first;
- Refs.pop_back();
-
- MachineOperand *LastUD = NULL;
- for (unsigned i = 0, e = LastUDMI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = LastUDMI->getOperand(i);
- if (!MO.isReg() || MO.getReg() != Reg)
- continue;
- if (!LastUD || (LastUD->isUse() && MO.isDef()))
- LastUD = &MO;
- if (LastUDMI->isRegTiedToDefOperand(i))
- break;
- }
- if (LastUD->isDef()) {
- // If the instruction has no side effect, delete it and propagate
- // backward further. Otherwise, mark is dead and we are done.
- if (!isSafeToDelete(*LastUDMI)) {
- LastUD->setIsDead();
- break;
- }
- EraseInstr(LastUDMI);
- } else {
- LastUD->setIsKill();
- RegKills.set(Reg);
- KillOps[Reg] = LastUD;
- break;
- }
- }
-}
-
-/// InsertEmergencySpills - Insert emergency spills before MI if requested by
-/// VRM. Return true if spills were inserted.
-bool LocalRewriter::InsertEmergencySpills(MachineInstr *MI) {
- if (!VRM->hasEmergencySpills(MI))
- return false;
- MachineBasicBlock::iterator MII = MI;
- SmallSet<int, 4> UsedSS;
- std::vector<unsigned> &EmSpills = VRM->getEmergencySpills(MI);
- for (unsigned i = 0, e = EmSpills.size(); i != e; ++i) {
- unsigned PhysReg = EmSpills[i];
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysReg);
- assert(RC && "Unable to determine register class!");
- int SS = VRM->getEmergencySpillSlot(RC);
- if (UsedSS.count(SS))
- llvm_unreachable("Need to spill more than one physical registers!");
- UsedSS.insert(SS);
- TII->storeRegToStackSlot(*MBB, MII, PhysReg, true, SS, RC, TRI);
- MachineInstr *StoreMI = prior(MII);
- VRM->addSpillSlotUse(SS, StoreMI);
-
- // Back-schedule reloads and remats.
- MachineBasicBlock::iterator InsertLoc =
- ComputeReloadLoc(llvm::next(MII), MBB->begin(), PhysReg, TRI, false, SS,
- TII, *MBB->getParent());
-
- TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SS, RC, TRI);
-
- MachineInstr *LoadMI = prior(InsertLoc);
- VRM->addSpillSlotUse(SS, LoadMI);
- ++NumPSpills;
- DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
- }
- return true;
-}
-
-/// InsertRestores - Restore registers before MI is requested by VRM. Return
-/// true is any instructions were inserted.
-bool LocalRewriter::InsertRestores(MachineInstr *MI,
- AvailableSpills &Spills,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps) {
- if (!VRM->isRestorePt(MI))
- return false;
- MachineBasicBlock::iterator MII = MI;
- std::vector<unsigned> &RestoreRegs = VRM->getRestorePtRestores(MI);
- for (unsigned i = 0, e = RestoreRegs.size(); i != e; ++i) {
- unsigned VirtReg = RestoreRegs[e-i-1]; // Reverse order.
- if (!VRM->getPreSplitReg(VirtReg))
- continue; // Split interval spilled again.
- unsigned Phys = VRM->getPhys(VirtReg);
- MRI->setPhysRegUsed(Phys);
-
- // Check if the value being restored if available. If so, it must be
- // from a predecessor BB that fallthrough into this BB. We do not
- // expect:
- // BB1:
- // r1 = load fi#1
- // ...
- // = r1<kill>
- // ... # r1 not clobbered
- // ...
- // = load fi#1
- bool DoReMat = VRM->isReMaterialized(VirtReg);
- int SSorRMId = DoReMat
- ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg);
- unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
- if (InReg == Phys) {
- // If the value is already available in the expected register, save
- // a reload / remat.
- if (SSorRMId)
- DEBUG(dbgs() << "Reusing RM#"
- << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1);
- else
- DEBUG(dbgs() << "Reusing SS#" << SSorRMId);
- DEBUG(dbgs() << " from physreg "
- << TRI->getName(InReg) << " for " << PrintReg(VirtReg)
- <<" instead of reloading into physreg "
- << TRI->getName(Phys) << '\n');
-
- // Reusing a physreg may resurrect it. But we expect ProcessUses to update
- // the kill flags for the current instruction after processing it.
-
- ++NumOmitted;
- continue;
- } else if (InReg && InReg != Phys) {
- if (SSorRMId)
- DEBUG(dbgs() << "Reusing RM#"
- << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1);
- else
- DEBUG(dbgs() << "Reusing SS#" << SSorRMId);
- DEBUG(dbgs() << " from physreg "
- << TRI->getName(InReg) << " for " << PrintReg(VirtReg)
- <<" by copying it into physreg "
- << TRI->getName(Phys) << '\n');
-
- // If the reloaded / remat value is available in another register,
- // copy it to the desired register.
-
- // Back-schedule reloads and remats.
- MachineBasicBlock::iterator InsertLoc =
- ComputeReloadLoc(MII, MBB->begin(), Phys, TRI, DoReMat, SSorRMId, TII,
- *MBB->getParent());
- MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI->getDebugLoc(),
- TII->get(TargetOpcode::COPY), Phys)
- .addReg(InReg, RegState::Kill);
-
- // This invalidates Phys.
- Spills.ClobberPhysReg(Phys);
- // Remember it's available.
- Spills.addAvailable(SSorRMId, Phys);
-
- CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
- UpdateKills(*CopyMI, TRI, RegKills, KillOps);
-
- DEBUG(dbgs() << '\t' << *CopyMI);
- ++NumCopified;
- continue;
- }
-
- // Back-schedule reloads and remats.
- MachineBasicBlock::iterator InsertLoc =
- ComputeReloadLoc(MII, MBB->begin(), Phys, TRI, DoReMat, SSorRMId, TII,
- *MBB->getParent());
-
- if (VRM->isReMaterialized(VirtReg)) {
- ReMaterialize(*MBB, InsertLoc, Phys, VirtReg, TII, TRI, *VRM);
- } else {
- const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
- TII->loadRegFromStackSlot(*MBB, InsertLoc, Phys, SSorRMId, RC, TRI);
- MachineInstr *LoadMI = prior(InsertLoc);
- VRM->addSpillSlotUse(SSorRMId, LoadMI);
- ++NumLoads;
- DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
- }
-
- // This invalidates Phys.
- Spills.ClobberPhysReg(Phys);
- // Remember it's available.
- Spills.addAvailable(SSorRMId, Phys);
-
- UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
- DEBUG(dbgs() << '\t' << *prior(MII));
- }
- return true;
-}
-
-/// InsertSpills - Insert spills after MI if requested by VRM. Return
-/// true if spills were inserted.
-bool LocalRewriter::InsertSpills(MachineInstr *MI) {
- if (!VRM->isSpillPt(MI))
- return false;
- MachineBasicBlock::iterator MII = MI;
- std::vector<std::pair<unsigned,bool> > &SpillRegs =
- VRM->getSpillPtSpills(MI);
- for (unsigned i = 0, e = SpillRegs.size(); i != e; ++i) {
- unsigned VirtReg = SpillRegs[i].first;
- bool isKill = SpillRegs[i].second;
- if (!VRM->getPreSplitReg(VirtReg))
- continue; // Split interval spilled again.
- const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
- unsigned Phys = VRM->getPhys(VirtReg);
- int StackSlot = VRM->getStackSlot(VirtReg);
- MachineBasicBlock::iterator oldNextMII = llvm::next(MII);
- TII->storeRegToStackSlot(*MBB, llvm::next(MII), Phys, isKill, StackSlot,
- RC, TRI);
- MachineInstr *StoreMI = prior(oldNextMII);
- VRM->addSpillSlotUse(StackSlot, StoreMI);
- DEBUG(dbgs() << "Store:\t" << *StoreMI);
- VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
- }
- return true;
-}
-
-
-/// ProcessUses - Process all of MI's spilled operands and all available
-/// operands.
-void LocalRewriter::ProcessUses(MachineInstr &MI, AvailableSpills &Spills,
- std::vector<MachineInstr*> &MaybeDeadStores,
- BitVector &RegKills,
- ReuseInfo &ReusedOperands,
- std::vector<MachineOperand*> &KillOps) {
- // Clear kill info.
- SmallSet<unsigned, 2> KilledMIRegs;
- SmallVector<unsigned, 4> VirtUseOps;
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
- if (!MO.isReg() || MO.getReg() == 0)
- continue; // Ignore non-register operands.
-
- unsigned VirtReg = MO.getReg();
-
- if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) {
- // Ignore physregs for spilling, but remember that it is used by this
- // function.
- MRI->setPhysRegUsed(VirtReg);
- continue;
- }
-
- // We want to process implicit virtual register uses first.
- if (MO.isImplicit())
- // If the virtual register is implicitly defined, emit a implicit_def
- // before so scavenger knows it's "defined".
- // FIXME: This is a horrible hack done the by register allocator to
- // remat a definition with virtual register operand.
- VirtUseOps.insert(VirtUseOps.begin(), i);
- else
- VirtUseOps.push_back(i);
-
- // A partial def causes problems because the same operand both reads and
- // writes the register. This rewriter is designed to rewrite uses and defs
- // separately, so a partial def would already have been rewritten to a
- // physreg by the time we get to processing defs.
- // Add an implicit use operand to model the partial def.
- if (MO.isDef() && MO.getSubReg() && MI.readsVirtualRegister(VirtReg) &&
- MI.findRegisterUseOperandIdx(VirtReg) == -1) {
- VirtUseOps.insert(VirtUseOps.begin(), MI.getNumOperands());
- MI.addOperand(MachineOperand::CreateReg(VirtReg,
- false, // isDef
- true)); // isImplicit
- DEBUG(dbgs() << "Partial redef: " << MI);
- }
- }
-
- // Process all of the spilled uses and all non spilled reg references.
- SmallVector<int, 2> PotentialDeadStoreSlots;
- KilledMIRegs.clear();
- for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) {
- unsigned i = VirtUseOps[j];
- unsigned VirtReg = MI.getOperand(i).getReg();
- assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
- "Not a virtual register?");
-
- unsigned SubIdx = MI.getOperand(i).getSubReg();
- if (VRM->isAssignedReg(VirtReg)) {
- // This virtual register was assigned a physreg!
- unsigned Phys = VRM->getPhys(VirtReg);
- MRI->setPhysRegUsed(Phys);
- if (MI.getOperand(i).isDef())
- ReusedOperands.markClobbered(Phys);
- substitutePhysReg(MI.getOperand(i), Phys, *TRI);
- if (VRM->isImplicitlyDefined(VirtReg))
- // FIXME: Is this needed?
- BuildMI(*MBB, &MI, MI.getDebugLoc(),
- TII->get(TargetOpcode::IMPLICIT_DEF), Phys);
- continue;
- }
-
- // This virtual register is now known to be a spilled value.
- if (!MI.getOperand(i).isUse())
- continue; // Handle defs in the loop below (handle use&def here though)
-
- bool AvoidReload = MI.getOperand(i).isUndef();
- // Check if it is defined by an implicit def. It should not be spilled.
- // Note, this is for correctness reason. e.g.
- // 8 %reg1024<def> = IMPLICIT_DEF
- // 12 %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
- // The live range [12, 14) are not part of the r1024 live interval since
- // it's defined by an implicit def. It will not conflicts with live
- // interval of r1025. Now suppose both registers are spilled, you can
- // easily see a situation where both registers are reloaded before
- // the INSERT_SUBREG and both target registers that would overlap.
- bool DoReMat = VRM->isReMaterialized(VirtReg);
- int SSorRMId = DoReMat
- ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg);
- int ReuseSlot = SSorRMId;
-
- // Check to see if this stack slot is available.
- unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
-
- // If this is a sub-register use, make sure the reuse register is in the
- // right register class. For example, for x86 not all of the 32-bit
- // registers have accessible sub-registers.
- // Similarly so for EXTRACT_SUBREG. Consider this:
- // EDI = op
- // MOV32_mr fi#1, EDI
- // ...
- // = EXTRACT_SUBREG fi#1
- // fi#1 is available in EDI, but it cannot be reused because it's not in
- // the right register file.
- if (PhysReg && !AvoidReload && SubIdx) {
- const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
- if (!RC->contains(PhysReg))
- PhysReg = 0;
- }
-
- if (PhysReg && !AvoidReload) {
- // This spilled operand might be part of a two-address operand. If this
- // is the case, then changing it will necessarily require changing the
- // def part of the instruction as well. However, in some cases, we
- // aren't allowed to modify the reused register. If none of these cases
- // apply, reuse it.
- bool CanReuse = true;
- bool isTied = MI.isRegTiedToDefOperand(i);
- if (isTied) {
- // Okay, we have a two address operand. We can reuse this physreg as
- // long as we are allowed to clobber the value and there isn't an
- // earlier def that has already clobbered the physreg.
- CanReuse = !ReusedOperands.isClobbered(PhysReg) &&
- Spills.canClobberPhysReg(PhysReg);
- }
- // If this is an asm, and a PhysReg alias is used elsewhere as an
- // earlyclobber operand, we can't also use it as an input.
- if (MI.isInlineAsm()) {
- for (unsigned k = 0, e = MI.getNumOperands(); k != e; ++k) {
- MachineOperand &MOk = MI.getOperand(k);
- if (MOk.isReg() && MOk.isEarlyClobber() &&
- TRI->regsOverlap(MOk.getReg(), PhysReg)) {
- CanReuse = false;
- DEBUG(dbgs() << "Not reusing physreg " << TRI->getName(PhysReg)
- << " for " << PrintReg(VirtReg) << ": " << MOk
- << '\n');
- break;
- }
- }
- }
-
- if (CanReuse) {
- // If this stack slot value is already available, reuse it!
- if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
- DEBUG(dbgs() << "Reusing RM#"
- << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
- else
- DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
- DEBUG(dbgs() << " from physreg "
- << TRI->getName(PhysReg) << " for " << PrintReg(VirtReg)
- << " instead of reloading into "
- << PrintReg(VRM->getPhys(VirtReg), TRI) << '\n');
- unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
- MI.getOperand(i).setReg(RReg);
- MI.getOperand(i).setSubReg(0);
-
- // Reusing a physreg may resurrect it. But we expect ProcessUses to
- // update the kill flags for the current instr after processing it.
-
- // The only technical detail we have is that we don't know that
- // PhysReg won't be clobbered by a reloaded stack slot that occurs
- // later in the instruction. In particular, consider 'op V1, V2'.
- // If V1 is available in physreg R0, we would choose to reuse it
- // here, instead of reloading it into the register the allocator
- // indicated (say R1). However, V2 might have to be reloaded
- // later, and it might indicate that it needs to live in R0. When
- // this occurs, we need to have information available that
- // indicates it is safe to use R1 for the reload instead of R0.
- //
- // To further complicate matters, we might conflict with an alias,
- // or R0 and R1 might not be compatible with each other. In this
- // case, we actually insert a reload for V1 in R1, ensuring that
- // we can get at R0 or its alias.
- ReusedOperands.addReuse(i, ReuseSlot, PhysReg,
- VRM->getPhys(VirtReg), VirtReg);
- if (isTied)
- // Only mark it clobbered if this is a use&def operand.
- ReusedOperands.markClobbered(PhysReg);
- ++NumReused;
-
- if (MI.getOperand(i).isKill() &&
- ReuseSlot <= VirtRegMap::MAX_STACK_SLOT) {
-
- // The store of this spilled value is potentially dead, but we
- // won't know for certain until we've confirmed that the re-use
- // above is valid, which means waiting until the other operands
- // are processed. For now we just track the spill slot, we'll
- // remove it after the other operands are processed if valid.
-
- PotentialDeadStoreSlots.push_back(ReuseSlot);
- }
-
- // Mark is isKill if it's there no other uses of the same virtual
- // register and it's not a two-address operand. IsKill will be
- // unset if reg is reused.
- if (!isTied && KilledMIRegs.count(VirtReg) == 0) {
- MI.getOperand(i).setIsKill();
- KilledMIRegs.insert(VirtReg);
- }
- continue;
- } // CanReuse
-
- // Otherwise we have a situation where we have a two-address instruction
- // whose mod/ref operand needs to be reloaded. This reload is already
- // available in some register "PhysReg", but if we used PhysReg as the
- // operand to our 2-addr instruction, the instruction would modify
- // PhysReg. This isn't cool if something later uses PhysReg and expects
- // to get its initial value.
- //
- // To avoid this problem, and to avoid doing a load right after a store,
- // we emit a copy from PhysReg into the designated register for this
- // operand.
- //
- // This case also applies to an earlyclobber'd PhysReg.
- unsigned DesignatedReg = VRM->getPhys(VirtReg);
- assert(DesignatedReg && "Must map virtreg to physreg!");
-
- // Note that, if we reused a register for a previous operand, the
- // register we want to reload into might not actually be
- // available. If this occurs, use the register indicated by the
- // reuser.
- if (ReusedOperands.hasReuses())
- DesignatedReg = ReusedOperands.
- GetRegForReload(VirtReg, DesignatedReg, &MI, Spills,
- MaybeDeadStores, RegKills, KillOps, *VRM);
-
- // If the mapped designated register is actually the physreg we have
- // incoming, we don't need to inserted a dead copy.
- if (DesignatedReg == PhysReg) {
- // If this stack slot value is already available, reuse it!
- if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
- DEBUG(dbgs() << "Reusing RM#"
- << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
- else
- DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
- DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg)
- << " for " << PrintReg(VirtReg)
- << " instead of reloading into same physreg.\n");
- unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
- MI.getOperand(i).setReg(RReg);
- MI.getOperand(i).setSubReg(0);
- ReusedOperands.markClobbered(RReg);
- ++NumReused;
- continue;
- }
-
- MRI->setPhysRegUsed(DesignatedReg);
- ReusedOperands.markClobbered(DesignatedReg);
-
- // Back-schedule reloads and remats.
- MachineBasicBlock::iterator InsertLoc =
- ComputeReloadLoc(&MI, MBB->begin(), PhysReg, TRI, DoReMat,
- SSorRMId, TII, *MBB->getParent());
- MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI.getDebugLoc(),
- TII->get(TargetOpcode::COPY),
- DesignatedReg).addReg(PhysReg);
- CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
- UpdateKills(*CopyMI, TRI, RegKills, KillOps);
-
- // This invalidates DesignatedReg.
- Spills.ClobberPhysReg(DesignatedReg);
-
- Spills.addAvailable(ReuseSlot, DesignatedReg);
- unsigned RReg =
- SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg;
- MI.getOperand(i).setReg(RReg);
- MI.getOperand(i).setSubReg(0);
- DEBUG(dbgs() << '\t' << *prior(InsertLoc));
- ++NumReused;
- continue;
- } // if (PhysReg)
-
- // Otherwise, reload it and remember that we have it.
- PhysReg = VRM->getPhys(VirtReg);
- assert(PhysReg && "Must map virtreg to physreg!");
-
- // Note that, if we reused a register for a previous operand, the
- // register we want to reload into might not actually be
- // available. If this occurs, use the register indicated by the
- // reuser.
- if (ReusedOperands.hasReuses())
- PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI,
- Spills, MaybeDeadStores, RegKills, KillOps, *VRM);
-
- MRI->setPhysRegUsed(PhysReg);
- ReusedOperands.markClobbered(PhysReg);
- if (AvoidReload)
- ++NumAvoided;
- else {
- // Back-schedule reloads and remats.
- MachineBasicBlock::iterator InsertLoc =
- ComputeReloadLoc(MI, MBB->begin(), PhysReg, TRI, DoReMat,
- SSorRMId, TII, *MBB->getParent());
-
- if (DoReMat) {
- ReMaterialize(*MBB, InsertLoc, PhysReg, VirtReg, TII, TRI, *VRM);
- } else {
- const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
- TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SSorRMId, RC,TRI);
- MachineInstr *LoadMI = prior(InsertLoc);
- VRM->addSpillSlotUse(SSorRMId, LoadMI);
- ++NumLoads;
- DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
- }
- // This invalidates PhysReg.
- Spills.ClobberPhysReg(PhysReg);
-
- // Any stores to this stack slot are not dead anymore.
- if (!DoReMat)
- MaybeDeadStores[SSorRMId] = NULL;
- Spills.addAvailable(SSorRMId, PhysReg);
- // Assumes this is the last use. IsKill will be unset if reg is reused
- // unless it's a two-address operand.
- if (!MI.isRegTiedToDefOperand(i) &&
- KilledMIRegs.count(VirtReg) == 0) {
- MI.getOperand(i).setIsKill();
- KilledMIRegs.insert(VirtReg);
- }
-
- UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
- DEBUG(dbgs() << '\t' << *prior(InsertLoc));
- }
- unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
- MI.getOperand(i).setReg(RReg);
- MI.getOperand(i).setSubReg(0);
- }
-
- // Ok - now we can remove stores that have been confirmed dead.
- for (unsigned j = 0, e = PotentialDeadStoreSlots.size(); j != e; ++j) {
- // This was the last use and the spilled value is still available
- // for reuse. That means the spill was unnecessary!
- int PDSSlot = PotentialDeadStoreSlots[j];
- MachineInstr* DeadStore = MaybeDeadStores[PDSSlot];
- if (DeadStore) {
- DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
- InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
- EraseInstr(DeadStore);
- MaybeDeadStores[PDSSlot] = NULL;
- ++NumDSE;
- }
- }
-}
-
-/// rewriteMBB - Keep track of which spills are available even after the
-/// register allocator is done with them. If possible, avoid reloading vregs.
-void
-LocalRewriter::RewriteMBB(LiveIntervals *LIs,
- AvailableSpills &Spills, BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps) {
-
- DEBUG(dbgs() << "\n**** Local spiller rewriting MBB '"
- << MBB->getName() << "':\n");
-
- MachineFunction &MF = *MBB->getParent();
-
- // MaybeDeadStores - When we need to write a value back into a stack slot,
- // keep track of the inserted store. If the stack slot value is never read
- // (because the value was used from some available register, for example), and
- // subsequently stored to, the original store is dead. This map keeps track
- // of inserted stores that are not used. If we see a subsequent store to the
- // same stack slot, the original store is deleted.
- std::vector<MachineInstr*> MaybeDeadStores;
- MaybeDeadStores.resize(MF.getFrameInfo()->getObjectIndexEnd(), NULL);
-
- // ReMatDefs - These are rematerializable def MIs which are not deleted.
- SmallSet<MachineInstr*, 4> ReMatDefs;
-
- // Keep track of the registers we have already spilled in case there are
- // multiple defs of the same register in MI.
- SmallSet<unsigned, 8> SpilledMIRegs;
-
- RegKills.reset();
- KillOps.clear();
- KillOps.resize(TRI->getNumRegs(), NULL);
-
- DistanceMap.clear();
- for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
- MII != E; ) {
- MachineBasicBlock::iterator NextMII = llvm::next(MII);
-
- if (OptimizeByUnfold(MII, MaybeDeadStores, Spills, RegKills, KillOps))
- NextMII = llvm::next(MII);
-
- if (InsertEmergencySpills(MII))
- NextMII = llvm::next(MII);
-
- InsertRestores(MII, Spills, RegKills, KillOps);
-
- if (InsertSpills(MII))
- NextMII = llvm::next(MII);
-
- bool Erased = false;
- bool BackTracked = false;
- MachineInstr &MI = *MII;
-
- // Remember DbgValue's which reference stack slots.
- if (MI.isDebugValue() && MI.getOperand(0).isFI())
- Slot2DbgValues[MI.getOperand(0).getIndex()].push_back(&MI);
-
- /// ReusedOperands - Keep track of operand reuse in case we need to undo
- /// reuse.
- ReuseInfo ReusedOperands(MI, TRI);
-
- ProcessUses(MI, Spills, MaybeDeadStores, RegKills, ReusedOperands, KillOps);
-
- DEBUG(dbgs() << '\t' << MI);
-
-
- // If we have folded references to memory operands, make sure we clear all
- // physical registers that may contain the value of the spilled virtual
- // register
-
- // Copy the folded virts to a small vector, we may change MI2VirtMap.
- SmallVector<std::pair<unsigned, VirtRegMap::ModRef>, 4> FoldedVirts;
- // C++0x FTW!
- for (std::pair<VirtRegMap::MI2VirtMapTy::const_iterator,
- VirtRegMap::MI2VirtMapTy::const_iterator> FVRange =
- VRM->getFoldedVirts(&MI);
- FVRange.first != FVRange.second; ++FVRange.first)
- FoldedVirts.push_back(FVRange.first->second);
-
- SmallSet<int, 2> FoldedSS;
- for (unsigned FVI = 0, FVE = FoldedVirts.size(); FVI != FVE; ++FVI) {
- unsigned VirtReg = FoldedVirts[FVI].first;
- VirtRegMap::ModRef MR = FoldedVirts[FVI].second;
- DEBUG(dbgs() << "Folded " << PrintReg(VirtReg) << " MR: " << MR);
-
- int SS = VRM->getStackSlot(VirtReg);
- if (SS == VirtRegMap::NO_STACK_SLOT)
- continue;
- FoldedSS.insert(SS);
- DEBUG(dbgs() << " - StackSlot: " << SS << "\n");
-
- // If this folded instruction is just a use, check to see if it's a
- // straight load from the virt reg slot.
- if ((MR & VirtRegMap::isRef) && !(MR & VirtRegMap::isMod)) {
- int FrameIdx;
- unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx);
- if (DestReg && FrameIdx == SS) {
- // If this spill slot is available, turn it into a copy (or nothing)
- // instead of leaving it as a load!
- if (unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SS)) {
- DEBUG(dbgs() << "Promoted Load To Copy: " << MI);
- if (DestReg != InReg) {
- MachineOperand *DefMO = MI.findRegisterDefOperand(DestReg);
- MachineInstr *CopyMI = BuildMI(*MBB, &MI, MI.getDebugLoc(),
- TII->get(TargetOpcode::COPY))
- .addReg(DestReg, RegState::Define, DefMO->getSubReg())
- .addReg(InReg, RegState::Kill);
- // Revisit the copy so we make sure to notice the effects of the
- // operation on the destreg (either needing to RA it if it's
- // virtual or needing to clobber any values if it's physical).
- NextMII = CopyMI;
- NextMII->setAsmPrinterFlag(MachineInstr::ReloadReuse);
- BackTracked = true;
- } else {
- DEBUG(dbgs() << "Removing now-noop copy: " << MI);
- // InvalidateKills resurrects any prior kill of the copy's source
- // allowing the source reg to be reused in place of the copy.
- Spills.disallowClobberPhysReg(InReg);
- }
-
- InvalidateKills(MI, TRI, RegKills, KillOps);
- EraseInstr(&MI);
- Erased = true;
- goto ProcessNextInst;
- }
- } else {
- unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
- SmallVector<MachineInstr*, 4> NewMIs;
- if (PhysReg &&
- TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, false, NewMIs)){
- MBB->insert(MII, NewMIs[0]);
- InvalidateKills(MI, TRI, RegKills, KillOps);
- EraseInstr(&MI);
- Erased = true;
- --NextMII; // backtrack to the unfolded instruction.
- BackTracked = true;
- goto ProcessNextInst;
- }
- }
- }
-
- // If this reference is not a use, any previous store is now dead.
- // Otherwise, the store to this stack slot is not dead anymore.
- MachineInstr* DeadStore = MaybeDeadStores[SS];
- if (DeadStore) {
- bool isDead = !(MR & VirtRegMap::isRef);
- MachineInstr *NewStore = NULL;
- if (MR & VirtRegMap::isModRef) {
- unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
- SmallVector<MachineInstr*, 4> NewMIs;
- // We can reuse this physreg as long as we are allowed to clobber
- // the value and there isn't an earlier def that has already clobbered
- // the physreg.
- if (PhysReg &&
- !ReusedOperands.isClobbered(PhysReg) &&
- Spills.canClobberPhysReg(PhysReg) &&
- !TII->isStoreToStackSlot(&MI, SS)) { // Not profitable!
- MachineOperand *KillOpnd =
- DeadStore->findRegisterUseOperand(PhysReg, true);
- // Note, if the store is storing a sub-register, it's possible the
- // super-register is needed below.
- if (KillOpnd && !KillOpnd->getSubReg() &&
- TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, true,NewMIs)){
- MBB->insert(MII, NewMIs[0]);
- NewStore = NewMIs[1];
- MBB->insert(MII, NewStore);
- VRM->addSpillSlotUse(SS, NewStore);
- InvalidateKills(MI, TRI, RegKills, KillOps);
- EraseInstr(&MI);
- Erased = true;
- --NextMII;
- --NextMII; // backtrack to the unfolded instruction.
- BackTracked = true;
- isDead = true;
- ++NumSUnfold;
- }
- }
- }
-
- if (isDead) { // Previous store is dead.
- // If we get here, the store is dead, nuke it now.
- DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
- InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
- EraseInstr(DeadStore);
- if (!NewStore)
- ++NumDSE;
- }
-
- MaybeDeadStores[SS] = NULL;
- if (NewStore) {
- // Treat this store as a spill merged into a copy. That makes the
- // stack slot value available.
- VRM->virtFolded(VirtReg, NewStore, VirtRegMap::isMod);
- goto ProcessNextInst;
- }
- }
-
- // If the spill slot value is available, and this is a new definition of
- // the value, the value is not available anymore.
- if (MR & VirtRegMap::isMod) {
- // Notice that the value in this stack slot has been modified.
- Spills.ModifyStackSlotOrReMat(SS);
-
- // If this is *just* a mod of the value, check to see if this is just a
- // store to the spill slot (i.e. the spill got merged into the copy). If
- // so, realize that the vreg is available now, and add the store to the
- // MaybeDeadStore info.
- int StackSlot;
- if (!(MR & VirtRegMap::isRef)) {
- if (unsigned SrcReg = TII->isStoreToStackSlot(&MI, StackSlot)) {
- assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
- "Src hasn't been allocated yet?");
-
- if (CommuteToFoldReload(MII, VirtReg, SrcReg, StackSlot,
- Spills, RegKills, KillOps, TRI)) {
- NextMII = llvm::next(MII);
- BackTracked = true;
- goto ProcessNextInst;
- }
-
- // Okay, this is certainly a store of SrcReg to [StackSlot]. Mark
- // this as a potentially dead store in case there is a subsequent
- // store into the stack slot without a read from it.
- MaybeDeadStores[StackSlot] = &MI;
-
- // If the stack slot value was previously available in some other
- // register, change it now. Otherwise, make the register
- // available in PhysReg.
- Spills.addAvailable(StackSlot, SrcReg, MI.killsRegister(SrcReg));
- }
- }
- }
- }
-
- // Process all of the spilled defs.
- SpilledMIRegs.clear();
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
- if (!(MO.isReg() && MO.getReg() && MO.isDef()))
- continue;
-
- unsigned VirtReg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(VirtReg)) {
- // Check to see if this is a noop copy. If so, eliminate the
- // instruction before considering the dest reg to be changed.
- // Also check if it's copying from an "undef", if so, we can't
- // eliminate this or else the undef marker is lost and it will
- // confuses the scavenger. This is extremely rare.
- if (MI.isIdentityCopy() && !MI.getOperand(1).isUndef() &&
- MI.getNumOperands() == 2) {
- ++NumDCE;
- DEBUG(dbgs() << "Removing now-noop copy: " << MI);
- SmallVector<unsigned, 2> KillRegs;
- InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs);
- if (MO.isDead() && !KillRegs.empty()) {
- // Source register or an implicit super/sub-register use is killed.
- assert(TRI->regsOverlap(KillRegs[0], MI.getOperand(0).getReg()));
- // Last def is now dead.
- TransferDeadness(MI.getOperand(1).getReg(), RegKills, KillOps);
- }
- EraseInstr(&MI);
- Erased = true;
- Spills.disallowClobberPhysReg(VirtReg);
- goto ProcessNextInst;
- }
-
- // If it's not a no-op copy, it clobbers the value in the destreg.
- Spills.ClobberPhysReg(VirtReg);
- ReusedOperands.markClobbered(VirtReg);
-
- // Check to see if this instruction is a load from a stack slot into
- // a register. If so, this provides the stack slot value in the reg.
- int FrameIdx;
- if (unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx)) {
- assert(DestReg == VirtReg && "Unknown load situation!");
-
- // If it is a folded reference, then it's not safe to clobber.
- bool Folded = FoldedSS.count(FrameIdx);
- // Otherwise, if it wasn't available, remember that it is now!
- Spills.addAvailable(FrameIdx, DestReg, !Folded);
- goto ProcessNextInst;
- }
-
- continue;
- }
-
- unsigned SubIdx = MO.getSubReg();
- bool DoReMat = VRM->isReMaterialized(VirtReg);
- if (DoReMat)
- ReMatDefs.insert(&MI);
-
- // The only vregs left are stack slot definitions.
- int StackSlot = VRM->getStackSlot(VirtReg);
- const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
-
- // If this def is part of a two-address operand, make sure to execute
- // the store from the correct physical register.
- unsigned PhysReg;
- unsigned TiedOp;
- if (MI.isRegTiedToUseOperand(i, &TiedOp)) {
- PhysReg = MI.getOperand(TiedOp).getReg();
- if (SubIdx) {
- unsigned SuperReg = findSuperReg(RC, PhysReg, SubIdx, TRI);
- assert(SuperReg && TRI->getSubReg(SuperReg, SubIdx) == PhysReg &&
- "Can't find corresponding super-register!");
- PhysReg = SuperReg;
- }
- } else {
- PhysReg = VRM->getPhys(VirtReg);
- if (ReusedOperands.isClobbered(PhysReg)) {
- // Another def has taken the assigned physreg. It must have been a
- // use&def which got it due to reuse. Undo the reuse!
- PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI,
- Spills, MaybeDeadStores, RegKills, KillOps, *VRM);
- }
- }
-
- // If StackSlot is available in a register that also holds other stack
- // slots, clobber those stack slots now.
- Spills.ClobberSharingStackSlots(StackSlot);
-
- assert(PhysReg && "VR not assigned a physical register?");
- MRI->setPhysRegUsed(PhysReg);
- unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
- ReusedOperands.markClobbered(RReg);
- MI.getOperand(i).setReg(RReg);
- MI.getOperand(i).setSubReg(0);
-
- if (!MO.isDead() && SpilledMIRegs.insert(VirtReg)) {
- MachineInstr *&LastStore = MaybeDeadStores[StackSlot];
- SpillRegToStackSlot(MII, -1, PhysReg, StackSlot, RC, true,
- LastStore, Spills, ReMatDefs, RegKills, KillOps);
- NextMII = llvm::next(MII);
-
- // Check to see if this is a noop copy. If so, eliminate the
- // instruction before considering the dest reg to be changed.
- if (MI.isIdentityCopy()) {
- ++NumDCE;
- DEBUG(dbgs() << "Removing now-noop copy: " << MI);
- InvalidateKills(MI, TRI, RegKills, KillOps);
- EraseInstr(&MI);
- Erased = true;
- UpdateKills(*LastStore, TRI, RegKills, KillOps);
- goto ProcessNextInst;
- }
- }
- }
- ProcessNextInst:
- // Delete dead instructions without side effects.
- if (!Erased && !BackTracked && isSafeToDelete(MI)) {
- InvalidateKills(MI, TRI, RegKills, KillOps);
- EraseInstr(&MI);
- Erased = true;
- }
- if (!Erased)
- DistanceMap.insert(std::make_pair(&MI, DistanceMap.size()));
- if (!Erased && !BackTracked) {
- for (MachineBasicBlock::iterator II = &MI; II != NextMII; ++II)
- UpdateKills(*II, TRI, RegKills, KillOps);
- }
- MII = NextMII;
- }
-
-}
-
-llvm::VirtRegRewriter* llvm::createVirtRegRewriter() {
- switch (RewriterOpt) {
- default: llvm_unreachable("Unreachable!");
- case local:
- return new LocalRewriter();
- case trivial:
- return new TrivialRewriter();
- }
-}
diff --git a/lib/CodeGen/VirtRegRewriter.h b/lib/CodeGen/VirtRegRewriter.h
deleted file mode 100644
index 93474e0d7ff7..000000000000
--- a/lib/CodeGen/VirtRegRewriter.h
+++ /dev/null
@@ -1,32 +0,0 @@
-//===-- llvm/CodeGen/VirtRegRewriter.h - VirtRegRewriter -*- C++ -*--------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_VIRTREGREWRITER_H
-#define LLVM_CODEGEN_VIRTREGREWRITER_H
-
-namespace llvm {
- class LiveIntervals;
- class MachineFunction;
- class VirtRegMap;
-
- /// VirtRegRewriter interface: Implementations of this interface assign
- /// spilled virtual registers to stack slots, rewriting the code.
- struct VirtRegRewriter {
- virtual ~VirtRegRewriter();
- virtual bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
- LiveIntervals* LIs) = 0;
- };
-
- /// createVirtRegRewriter - Create an return a rewriter object, as specified
- /// on the command line.
- VirtRegRewriter* createVirtRegRewriter();
-
-}
-
-#endif