aboutsummaryrefslogtreecommitdiff
path: root/lib/CodeGen
diff options
context:
space:
mode:
authorRoman Divacky <rdivacky@FreeBSD.org>2010-07-13 17:19:57 +0000
committerRoman Divacky <rdivacky@FreeBSD.org>2010-07-13 17:19:57 +0000
commit66e41e3c6e8b8fbc48d5d3b4d2bd9ce0be4ecb75 (patch)
tree9de1c5f67a98cd0e73c60838396486c984f63ac2 /lib/CodeGen
parentabdf259d487163e72081a8cf4991b1617206b41e (diff)
downloadsrc-66e41e3c6e8b8fbc48d5d3b4d2bd9ce0be4ecb75.tar.gz
src-66e41e3c6e8b8fbc48d5d3b4d2bd9ce0be4ecb75.zip
Update LLVM to r108243.vendor/llvm/llvm-r108243
Notes
Notes: svn path=/vendor/llvm/dist/; revision=210006 svn path=/vendor/llvm/llvm-r108243/; revision=210077; tag=vendor/llvm/llvm-r108243
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.cpp83
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.h1
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp94
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp4
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp246
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h18
-rw-r--r--lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp6
-rw-r--r--lib/CodeGen/BranchFolding.cpp68
-rw-r--r--lib/CodeGen/BranchFolding.h5
-rw-r--r--lib/CodeGen/CMakeLists.txt7
-rw-r--r--lib/CodeGen/CalcSpillWeights.cpp2
-rw-r--r--lib/CodeGen/CallingConvLower.cpp (renamed from lib/CodeGen/SelectionDAG/CallingConvLower.cpp)14
-rw-r--r--lib/CodeGen/CodePlacementOpt.cpp4
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.cpp158
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.h5
-rw-r--r--lib/CodeGen/DwarfEHPrepare.cpp110
-rw-r--r--lib/CodeGen/ELFCodeEmitter.cpp2
-rw-r--r--lib/CodeGen/ExactHazardRecognizer.h86
-rw-r--r--lib/CodeGen/GCStrategy.cpp6
-rw-r--r--lib/CodeGen/IfConversion.cpp400
-rw-r--r--lib/CodeGen/InlineSpiller.cpp408
-rw-r--r--lib/CodeGen/IntrinsicLowering.cpp51
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp15
-rw-r--r--lib/CodeGen/LatencyPriorityQueue.cpp2
-rw-r--r--lib/CodeGen/LiveInterval.cpp66
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp236
-rw-r--r--lib/CodeGen/LiveStackAnalysis.cpp4
-rw-r--r--lib/CodeGen/LiveVariables.cpp9
-rw-r--r--lib/CodeGen/LowerSubregs.cpp217
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp129
-rw-r--r--lib/CodeGen/MachineCSE.cpp46
-rw-r--r--lib/CodeGen/MachineDominators.cpp1
-rw-r--r--lib/CodeGen/MachineFunction.cpp16
-rw-r--r--lib/CodeGen/MachineInstr.cpp113
-rw-r--r--lib/CodeGen/MachineLICM.cpp118
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp130
-rw-r--r--lib/CodeGen/MachineSink.cpp102
-rw-r--r--lib/CodeGen/MachineVerifier.cpp3
-rw-r--r--lib/CodeGen/OptimizeExts.cpp24
-rw-r--r--lib/CodeGen/OptimizePHIs.cpp5
-rw-r--r--lib/CodeGen/PBQP/HeuristicSolver.h2
-rw-r--r--lib/CodeGen/PBQP/Heuristics/Briggs.h5
-rw-r--r--lib/CodeGen/PHIElimination.cpp63
-rw-r--r--lib/CodeGen/Passes.cpp26
-rw-r--r--lib/CodeGen/PostRAHazardRecognizer.cpp (renamed from lib/CodeGen/ExactHazardRecognizer.cpp)26
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp51
-rw-r--r--lib/CodeGen/PreAllocSplitting.cpp89
-rw-r--r--lib/CodeGen/ProcessImplicitDefs.cpp33
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp65
-rw-r--r--lib/CodeGen/RegAllocFast.cpp226
-rw-r--r--lib/CodeGen/RegAllocLinearScan.cpp33
-rw-r--r--lib/CodeGen/RegAllocLocal.cpp1254
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp25
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp156
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp33
-rw-r--r--lib/CodeGen/ScheduleDAG.cpp37
-rw-r--r--lib/CodeGen/ScheduleDAGEmit.cpp14
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.h5
-rw-r--r--lib/CodeGen/SelectionDAG/CMakeLists.txt1
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp267
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp354
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp63
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h144
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp133
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp335
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp40
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp150
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp72
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h4
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp8
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp140
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp17
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp241
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h5
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp158
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp1248
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h12
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp421
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp218
-rw-r--r--lib/CodeGen/ShadowStackGC.cpp14
-rw-r--r--lib/CodeGen/SimpleHazardRecognizer.h89
-rw-r--r--lib/CodeGen/SimpleRegisterCoalescing.cpp1790
-rw-r--r--lib/CodeGen/SimpleRegisterCoalescing.h84
-rw-r--r--lib/CodeGen/SjLjEHPrepare.cpp114
-rw-r--r--lib/CodeGen/SlotIndexes.cpp4
-rw-r--r--lib/CodeGen/Spiller.cpp209
-rw-r--r--lib/CodeGen/Spiller.h18
-rw-r--r--lib/CodeGen/StackProtector.cpp16
-rw-r--r--lib/CodeGen/StackSlotColoring.cpp14
-rw-r--r--lib/CodeGen/StrongPHIElimination.cpp22
-rw-r--r--lib/CodeGen/TailDuplication.cpp18
-rw-r--r--lib/CodeGen/TargetInstrInfoImpl.cpp182
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp112
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp303
-rw-r--r--lib/CodeGen/VirtRegRewriter.cpp110
99 files changed, 5841 insertions, 6454 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 4008a6a63cf8..a7189acc3fec 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -114,6 +115,7 @@ AggressiveAntiDepBreaker(MachineFunction& MFi,
TargetSubtarget::RegClassVector& CriticalPathRCs) :
AntiDepBreaker(), MF(MFi),
MRI(MF.getRegInfo()),
+ TII(MF.getTarget().getInstrInfo()),
TRI(MF.getTarget().getRegisterInfo()),
AllocatableSet(TRI->getAllocatableSet(MF)),
State(NULL) {
@@ -163,25 +165,27 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
DefIndices[AliasReg] = ~0u;
}
}
- } else {
- // In a non-return block, examine the live-in regs of all successors.
- for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ }
+
+ // In a non-return block, examine the live-in regs of all successors.
+ // Note a return block can have successors if the return instruction is
+ // predicated.
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
SE = BB->succ_end(); SI != SE; ++SI)
- for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+ for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
E = (*SI)->livein_end(); I != E; ++I) {
- unsigned Reg = *I;
- State->UnionGroups(Reg, 0);
- KillIndices[Reg] = BB->size();
- DefIndices[Reg] = ~0u;
- // Repeat, for all aliases.
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
- unsigned AliasReg = *Alias;
- State->UnionGroups(AliasReg, 0);
- KillIndices[AliasReg] = BB->size();
- DefIndices[AliasReg] = ~0u;
- }
+ unsigned Reg = *I;
+ State->UnionGroups(Reg, 0);
+ KillIndices[Reg] = BB->size();
+ DefIndices[Reg] = ~0u;
+ // Repeat, for all aliases.
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ unsigned AliasReg = *Alias;
+ State->UnionGroups(AliasReg, 0);
+ KillIndices[AliasReg] = BB->size();
+ DefIndices[AliasReg] = ~0u;
}
- }
+ }
// Mark live-out callee-saved registers. In a return block this is
// all callee-saved registers. In non-return this is any
@@ -390,7 +394,8 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
// If MI's defs have a special allocation requirement, don't allow
// any def registers to be changed. Also assume all registers
// defined in a call must not be changed (ABI).
- if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq()) {
+ if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() ||
+ TII->isPredicated(MI)) {
DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)");
State->UnionGroups(Reg, 0);
}
@@ -443,6 +448,26 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
RegRefs = State->GetRegRefs();
+ // If MI's uses have special allocation requirement, don't allow
+ // any use registers to be changed. Also assume all registers
+ // used in a call must not be changed (ABI).
+ // FIXME: The issue with predicated instruction is more complex. We are being
+ // conservatively here because the kill markers cannot be trusted after
+ // if-conversion:
+ // %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
+ // ...
+ // STR %R0, %R6<kill>, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395]
+ // %R6<def> = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12]
+ // STR %R0, %R6<kill>, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8)
+ //
+ // The first R6 kill is not really a kill since it's killed by a predicated
+ // instruction which may not be executed. The second R6 def may or may not
+ // re-define R6 so it's not safe to change it since the last R6 use cannot be
+ // changed.
+ bool Special = MI->getDesc().isCall() ||
+ MI->getDesc().hasExtraSrcRegAllocReq() ||
+ TII->isPredicated(MI);
+
// Scan the register uses for this instruction and update
// live-ranges, groups and RegRefs.
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -459,10 +484,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
// for the register.
HandleLastUse(Reg, Count, "(last-use)");
- // If MI's uses have special allocation requirement, don't allow
- // any use registers to be changed. Also assume all registers
- // used in a call must not be changed (ABI).
- if (MI->getDesc().isCall() || MI->getDesc().hasExtraSrcRegAllocReq()) {
+ if (Special) {
DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)");
State->UnionGroups(Reg, 0);
}
@@ -604,8 +626,12 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
// order. If that register is available, and the corresponding
// registers are available for the other group subregisters, then we
// can use those registers to rename.
+
+ // FIXME: Using getMinimalPhysRegClass is very conservative. We should
+ // check every use of the register and find the largest register class
+ // that can be used in all of them.
const TargetRegisterClass *SuperRC =
- TRI->getPhysicalRegisterRegClass(SuperReg, MVT::Other);
+ TRI->getMinimalPhysRegClass(SuperReg, MVT::Other);
const TargetRegisterClass::iterator RB = SuperRC->allocation_order_begin(MF);
const TargetRegisterClass::iterator RE = SuperRC->allocation_order_end(MF);
@@ -905,6 +931,19 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
AggressiveAntiDepState::RegisterReference>::iterator
Q = Range.first, QE = Range.second; Q != QE; ++Q) {
Q->second.Operand->setReg(NewReg);
+ // If the SU for the instruction being updated has debug
+ // information related to the anti-dependency register, make
+ // sure to update that as well.
+ const SUnit *SU = MISUnitMap[Q->second.Operand->getParent()];
+ if (!SU) continue;
+ for (unsigned i = 0, e = SU->DbgInstrList.size() ; i < e ; ++i) {
+ MachineInstr *DI = SU->DbgInstrList[i];
+ assert (DI->getNumOperands()==3 && DI->getOperand(0).isReg() &&
+ DI->getOperand(0).getReg()
+ && "Non register dbg_value attached to SUnit!");
+ if (DI->getOperand(0).getReg() == AntiDepReg)
+ DI->getOperand(0).setReg(NewReg);
+ }
}
// We just went back in time and modified history; the
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h
index 506d43e7f3fc..91ebb850d19d 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -115,6 +115,7 @@ namespace llvm {
class AggressiveAntiDepBreaker : public AntiDepBreaker {
MachineFunction& MF;
MachineRegisterInfo &MRI;
+ const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
/// AllocatableSet - The set of allocatable registers.
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 5a0c27b300ab..d9387a8e72c5 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -199,7 +199,7 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const {
case GlobalValue::LinkOnceODRLinkage:
case GlobalValue::WeakAnyLinkage:
case GlobalValue::WeakODRLinkage:
- case GlobalValue::LinkerPrivateLinkage:
+ case GlobalValue::LinkerPrivateWeakLinkage:
if (MAI->getWeakDefDirective() != 0) {
// .globl _foo
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
@@ -225,6 +225,7 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const {
break;
case GlobalValue::PrivateLinkage:
case GlobalValue::InternalLinkage:
+ case GlobalValue::LinkerPrivateLinkage:
break;
default:
llvm_unreachable("Unknown linkage type!");
@@ -330,7 +331,6 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
else if (GVKind.isThreadData()) {
OutStreamer.SwitchSection(TheSection);
- EmitLinkage(GV->getLinkage(), MangSym);
EmitAlignment(AlignLog, GV);
OutStreamer.EmitLabel(MangSym);
@@ -353,7 +353,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// - spare pointer, used when mapped by the runtime
// - pointer to mangled symbol above with initializer
unsigned PtrSize = TD->getPointerSizeInBits()/8;
- OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("__tlv_bootstrap"),
+ OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"),
PtrSize, 0);
OutStreamer.EmitIntValue(0, PtrSize, 0);
OutStreamer.EmitSymbolValue(MangSym, PtrSize, 0);
@@ -428,20 +428,12 @@ void AsmPrinter::EmitFunctionHeader() {
// Emit pre-function debug and/or EH information.
if (DE) {
- if (TimePassesIsEnabled) {
- NamedRegionTimer T(EHTimerName, DWARFGroupName);
- DE->BeginFunction(MF);
- } else {
- DE->BeginFunction(MF);
- }
+ NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DE->BeginFunction(MF);
}
if (DD) {
- if (TimePassesIsEnabled) {
- NamedRegionTimer T(DbgTimerName, DWARFGroupName);
- DD->beginFunction(MF);
- } else {
- DD->beginFunction(MF);
- }
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DD->beginFunction(MF);
}
}
@@ -458,14 +450,11 @@ void AsmPrinter::EmitFunctionEntryLabel() {
}
-/// EmitComments - Pretty-print comments for instructions.
-static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
- const MachineFunction *MF = MI.getParent()->getParent();
- const TargetMachine &TM = MF->getTarget();
-
- DebugLoc DL = MI.getDebugLoc();
+static void EmitDebugLoc(DebugLoc DL, const MachineFunction *MF,
+ raw_ostream &CommentOS) {
+ const LLVMContext &Ctx = MF->getFunction()->getContext();
if (!DL.isUnknown()) { // Print source line info.
- DIScope Scope(DL.getScope(MF->getFunction()->getContext()));
+ DIScope Scope(DL.getScope(Ctx));
// Omit the directory, because it's likely to be long and uninteresting.
if (Scope.Verify())
CommentOS << Scope.getFilename();
@@ -474,6 +463,23 @@ static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
CommentOS << ':' << DL.getLine();
if (DL.getCol() != 0)
CommentOS << ':' << DL.getCol();
+ DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx));
+ if (!InlinedAtDL.isUnknown()) {
+ CommentOS << "[ ";
+ EmitDebugLoc(InlinedAtDL, MF, CommentOS);
+ CommentOS << " ]";
+ }
+ }
+}
+
+/// EmitComments - Pretty-print comments for instructions.
+static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const TargetMachine &TM = MF->getTarget();
+
+ DebugLoc DL = MI.getDebugLoc();
+ if (!DL.isUnknown()) { // Print source line info.
+ EmitDebugLoc(DL, MF, CommentOS);
CommentOS << '\n';
}
@@ -611,12 +617,8 @@ void AsmPrinter::EmitFunctionBody() {
}
if (ShouldPrintDebugScopes) {
- if (TimePassesIsEnabled) {
- NamedRegionTimer T(DbgTimerName, DWARFGroupName);
- DD->beginScope(II);
- } else {
- DD->beginScope(II);
- }
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DD->beginScope(II);
}
if (isVerbose())
@@ -649,12 +651,8 @@ void AsmPrinter::EmitFunctionBody() {
}
if (ShouldPrintDebugScopes) {
- if (TimePassesIsEnabled) {
- NamedRegionTimer T(DbgTimerName, DWARFGroupName);
- DD->endScope(II);
- } else {
- DD->endScope(II);
- }
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DD->endScope(II);
}
}
}
@@ -692,20 +690,12 @@ void AsmPrinter::EmitFunctionBody() {
// Emit post-function debug information.
if (DD) {
- if (TimePassesIsEnabled) {
- NamedRegionTimer T(DbgTimerName, DWARFGroupName);
- DD->endFunction(MF);
- } else {
- DD->endFunction(MF);
- }
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DD->endFunction(MF);
}
if (DE) {
- if (TimePassesIsEnabled) {
- NamedRegionTimer T(EHTimerName, DWARFGroupName);
- DE->EndFunction();
- } else {
- DE->EndFunction();
- }
+ NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DE->EndFunction();
}
MMI->EndFunction();
@@ -730,19 +720,15 @@ bool AsmPrinter::doFinalization(Module &M) {
// Finalize debug and EH information.
if (DE) {
- if (TimePassesIsEnabled) {
- NamedRegionTimer T(EHTimerName, DWARFGroupName);
- DE->EndModule();
- } else {
+ {
+ NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled);
DE->EndModule();
}
delete DE; DE = 0;
}
if (DD) {
- if (TimePassesIsEnabled) {
- NamedRegionTimer T(DbgTimerName, DWARFGroupName);
- DD->endModule();
- } else {
+ {
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
DD->endModule();
}
delete DD; DD = 0;
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index ba6fed2a78ba..f6f3bae42a80 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -83,7 +83,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const {
// Tell SrcMgr about this buffer, it takes ownership of the buffer.
SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
- AsmParser Parser(SrcMgr, OutContext, OutStreamer, *MAI);
+ AsmParser Parser(TM.getTarget(), SrcMgr, OutContext, OutStreamer, *MAI);
OwningPtr<TargetAsmParser> TAP(TM.getTarget().createAsmParser(Parser));
if (!TAP)
report_fatal_error("Inline asm not supported by this streamer because"
@@ -279,7 +279,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
// Okay, we finally have a value number. Ask the target to print this
// operand!
if (CurVariant == -1 || CurVariant == AsmPrinterVariant) {
- unsigned OpNo = 1;
+ unsigned OpNo = 2;
bool Error = false;
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index b2c70d51f5a5..21396ca37f06 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -201,6 +201,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const {
case dwarf::DW_FORM_data8: Size = 8; break;
case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return;
case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return;
+ case dwarf::DW_FORM_addr: Size = Asm->getTargetData().getPointerSize(); break;
default: llvm_unreachable("DIE Value form not supported yet");
}
Asm->OutStreamer.EmitIntValue(Integer, Size, 0/*addrspace*/);
@@ -221,6 +222,7 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const {
case dwarf::DW_FORM_data8: return sizeof(int64_t);
case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer);
case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer);
+ case dwarf::DW_FORM_addr: return AP->getTargetData().getPointerSize();
default: llvm_unreachable("DIE Value form not supported yet"); break;
}
return 0;
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 890507cf3148..65c1d190216f 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -44,7 +44,8 @@ using namespace llvm;
static cl::opt<bool> PrintDbgScope("print-dbgscope", cl::Hidden,
cl::desc("Print DbgScope information for each machine instruction"));
-static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden,
+static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print",
+ cl::Hidden,
cl::desc("Disable debug info printing"));
static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden,
@@ -79,15 +80,13 @@ class CompileUnit {
/// IndexTyDie - An anonymous type for index type. Owned by CUDie.
DIE *IndexTyDie;
- /// GVToDieMap - Tracks the mapping of unit level debug informaton
+ /// MDNodeToDieMap - Tracks the mapping of unit level debug informaton
/// variables to debug information entries.
- /// FIXME : Rename GVToDieMap -> NodeToDieMap
- DenseMap<const MDNode *, DIE *> GVToDieMap;
+ DenseMap<const MDNode *, DIE *> MDNodeToDieMap;
- /// GVToDIEEntryMap - Tracks the mapping of unit level debug informaton
+ /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug informaton
/// descriptors to debug information entries using a DIEEntry proxy.
- /// FIXME : Rename
- DenseMap<const MDNode *, DIEEntry *> GVToDIEEntryMap;
+ DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap;
/// Globals - A map of globally visible named entities for this unit.
///
@@ -123,25 +122,25 @@ public:
/// getDIE - Returns the debug information entry map slot for the
/// specified debug variable.
- DIE *getDIE(const MDNode *N) { return GVToDieMap.lookup(N); }
+ DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); }
/// insertDIE - Insert DIE into the map.
void insertDIE(const MDNode *N, DIE *D) {
- GVToDieMap.insert(std::make_pair(N, D));
+ MDNodeToDieMap.insert(std::make_pair(N, D));
}
/// getDIEEntry - Returns the debug information entry for the speciefied
/// debug variable.
DIEEntry *getDIEEntry(const MDNode *N) {
- DenseMap<const MDNode *, DIEEntry *>::iterator I = GVToDIEEntryMap.find(N);
- if (I == GVToDIEEntryMap.end())
+ DenseMap<const MDNode *, DIEEntry *>::iterator I = MDNodeToDIEEntryMap.find(N);
+ if (I == MDNodeToDIEEntryMap.end())
return NULL;
return I->second;
}
/// insertDIEEntry - Insert debug information entry into the map.
void insertDIEEntry(const MDNode *N, DIEEntry *E) {
- GVToDIEEntryMap.insert(std::make_pair(N, E));
+ MDNodeToDIEEntryMap.insert(std::make_pair(N, E));
}
/// addDie - Adds or interns the DIE to the compile unit.
@@ -321,12 +320,12 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
DwarfFrameSectionSym = DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0;
DwarfStrSectionSym = TextSectionSym = 0;
DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0;
+ DwarfDebugLineSectionSym = CurrentLineSectionSym = 0;
FunctionBeginSym = FunctionEndSym = 0;
- if (TimePassesIsEnabled) {
- NamedRegionTimer T(DbgTimerName, DWARFGroupName);
- beginModule(M);
- } else {
- beginModule(M);
+ DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1);
+ {
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+ beginModule(M);
}
}
DwarfDebug::~DwarfDebug() {
@@ -378,7 +377,8 @@ DIEEntry *DwarfDebug::createDIEEntry(DIE *Entry) {
void DwarfDebug::addUInt(DIE *Die, unsigned Attribute,
unsigned Form, uint64_t Integer) {
if (!Form) Form = DIEInteger::BestForm(false, Integer);
- DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer);
+ DIEValue *Value = Integer == 1 ?
+ DIEIntegerOne : new (DIEValueAllocator) DIEInteger(Integer);
Die->addValue(Attribute, Form, Value);
}
@@ -866,6 +866,10 @@ void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) {
} else if (Context.isNameSpace()) {
DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context));
ContextDIE->addChild(Die);
+ } else if (Context.isSubprogram()) {
+ DIE *ContextDIE = createSubprogramDIE(DISubprogram(Context),
+ /*MakeDecl=*/false);
+ ContextDIE->addChild(Die);
} else if (DIE *ContextDIE = getCompileUnit(Context)->getDIE(Context))
ContextDIE->addChild(Die);
else
@@ -1055,6 +1059,10 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
if (DIDescriptor(ContainingType).isCompositeType())
addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
getOrCreateTypeDIE(DIType(ContainingType)));
+ else {
+ DIDescriptor Context = CTy.getContext();
+ addToContextOwner(&Buffer, Context);
+ }
break;
}
default:
@@ -1065,8 +1073,9 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
if (!Name.empty())
addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
- if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type ||
- Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) {
+ if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type
+ || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type)
+ {
// Add size if non-zero (derived types might be zero-sized.)
if (Size)
addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
@@ -1329,6 +1338,9 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) {
// DW_TAG_inlined_subroutine may refer to this DIE.
SPCU->insertDIE(SP, SPDie);
+ // Add to context owner.
+ addToContextOwner(SPDie, SP.getContext());
+
return SPDie;
}
@@ -1379,6 +1391,7 @@ static bool isSubprogramContext(const MDNode *Context) {
DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) {
CompileUnit *SPCU = getCompileUnit(SPNode);
DIE *SPDie = SPCU->getDIE(SPNode);
+
assert(SPDie && "Unable to find subprogram DIE!");
DISubprogram SP(SPNode);
@@ -1412,6 +1425,14 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) {
SPCU->addDie(SPDie);
}
+ // Pick up abstract subprogram DIE.
+ if (DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode)) {
+ SPDie = new DIE(dwarf::DW_TAG_subprogram);
+ addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin,
+ dwarf::DW_FORM_ref4, AbsSPDIE);
+ SPCU->addDie(SPDie);
+ }
+
addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber()));
addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
@@ -1483,7 +1504,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
const MCSymbol *StartLabel = getLabelBeforeInsn(RI->first);
const MCSymbol *EndLabel = getLabelAfterInsn(RI->second);
- if (StartLabel == FunctionBeginSym || EndLabel == 0) {
+ if (StartLabel == 0 || EndLabel == 0) {
assert (0 && "Unexpected Start and End labels for a inlined scope!");
return 0;
}
@@ -1605,11 +1626,13 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
// FIXME : Handle getNumOperands != 3
if (DVInsn->getNumOperands() == 3) {
if (DVInsn->getOperand(0).isReg())
- updated = addRegisterAddress(VariableDie, DVLabel, DVInsn->getOperand(0));
+ updated =
+ addRegisterAddress(VariableDie, DVLabel, DVInsn->getOperand(0));
else if (DVInsn->getOperand(0).isImm())
updated = addConstantValue(VariableDie, DVLabel, DVInsn->getOperand(0));
else if (DVInsn->getOperand(0).isFPImm())
- updated = addConstantFPValue(VariableDie, DVLabel, DVInsn->getOperand(0));
+ updated =
+ addConstantFPValue(VariableDie, DVLabel, DVInsn->getOperand(0));
} else {
MachineLocation Location = Asm->getDebugValueLocation(DVInsn);
if (Location.getReg()) {
@@ -1682,8 +1705,13 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
if (Scope->getInlinedAt())
ScopeDIE = constructInlinedScopeDIE(Scope);
else if (DS.isSubprogram()) {
- if (Scope->isAbstractScope())
+ ProcessedSPNodes.insert(DS);
+ if (Scope->isAbstractScope()) {
ScopeDIE = getCompileUnit(DS)->getDIE(DS);
+ // Note down abstract DIE.
+ if (ScopeDIE)
+ AbstractSPDies.insert(std::make_pair(DS, ScopeDIE));
+ }
else
ScopeDIE = updateSubprogramScopeDIE(DS);
}
@@ -1782,11 +1810,11 @@ void DwarfDebug::constructCompileUnit(const MDNode *N) {
addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN);
// Use DW_AT_entry_pc instead of DW_AT_low_pc/DW_AT_high_pc pair. This
// simplifies debug range entries.
- addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_data4, 0);
+ addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_addr, 0);
// DW_AT_stmt_list is a offset of line number information for this
- // compile unit in debug_line section. It is always zero when only one
- // compile unit is emitted in one object file.
- addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
+ // compile unit in debug_line section. This offset is calculated
+ // during endMoudle().
+ addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
if (!Dir.empty())
addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir);
@@ -1996,6 +2024,40 @@ void DwarfDebug::beginModule(Module *M) {
///
void DwarfDebug::endModule() {
if (!FirstCU) return;
+ const Module *M = MMI->getModule();
+ if (NamedMDNode *AllSPs = M->getNamedMetadata("llvm.dbg.sp")) {
+ for (unsigned SI = 0, SE = AllSPs->getNumOperands(); SI != SE; ++SI) {
+ if (ProcessedSPNodes.count(AllSPs->getOperand(SI)) != 0) continue;
+ DISubprogram SP(AllSPs->getOperand(SI));
+ if (!SP.Verify()) continue;
+
+ // Collect info for variables that were optimized out.
+ StringRef FName = SP.getLinkageName();
+ if (FName.empty())
+ FName = SP.getName();
+ NamedMDNode *NMD =
+ M->getNamedMetadata(Twine("llvm.dbg.lv.", getRealLinkageName(FName)));
+ if (!NMD) continue;
+ unsigned E = NMD->getNumOperands();
+ if (!E) continue;
+ DbgScope *Scope = new DbgScope(NULL, DIDescriptor(SP), NULL);
+ for (unsigned I = 0; I != E; ++I) {
+ DIVariable DV(NMD->getOperand(I));
+ if (!DV.Verify()) continue;
+ Scope->addVariable(new DbgVariable(DV));
+ }
+
+ // Construct subprogram DIE and add variables DIEs.
+ constructSubprogramDIE(SP);
+ DIE *ScopeDIE = getCompileUnit(SP)->getDIE(SP);
+ const SmallVector<DbgVariable *, 8> &Variables = Scope->getVariables();
+ for (unsigned i = 0, N = Variables.size(); i < N; ++i) {
+ DIE *VariableDIE = constructVariableDIE(Variables[i], Scope);
+ if (VariableDIE)
+ ScopeDIE->addChild(VariableDIE);
+ }
+ }
+ }
// Attach DW_AT_inline attribute with inlined subprogram DIEs.
for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(),
@@ -2037,15 +2099,15 @@ void DwarfDebug::endModule() {
// Compute DIE offsets and sizes.
computeSizeAndOffsets();
+ // Emit source line correspondence into a debug line section.
+ emitDebugLines();
+
// Emit all the DIEs into a debug info section
emitDebugInfo();
// Corresponding abbreviations into a abbrev section.
emitAbbreviations();
- // Emit source line correspondence into a debug line section.
- emitDebugLines();
-
// Emit info into a debug pubnames section.
emitDebugPubNames();
@@ -2150,8 +2212,9 @@ static bool isDbgValueInDefinedReg(const MachineInstr *MI) {
}
/// collectVariableInfo - Populate DbgScope entries with variables' info.
-void DwarfDebug::collectVariableInfo(const MachineFunction *MF) {
- SmallPtrSet<const MDNode *, 16> Processed;
+void
+DwarfDebug::collectVariableInfo(const MachineFunction *MF,
+ SmallPtrSet<const MDNode *, 16> &Processed) {
/// collection info from MMI table.
collectVariableInfoFromMMITable(MF, Processed);
@@ -2180,16 +2243,23 @@ void DwarfDebug::collectVariableInfo(const MachineFunction *MF) {
if (Processed.count(DV) != 0)
continue;
+ const MachineInstr *PrevMI = MInsn;
for (SmallVector<const MachineInstr *, 8>::iterator MI = I+1,
ME = DbgValues.end(); MI != ME; ++MI) {
const MDNode *Var =
(*MI)->getOperand((*MI)->getNumOperands()-1).getMetadata();
- if (Var == DV && isDbgValueInDefinedReg(*MI))
+ if (Var == DV && isDbgValueInDefinedReg(*MI) &&
+ !PrevMI->isIdenticalTo(*MI))
MultipleValues.push_back(*MI);
+ PrevMI = *MI;
}
DbgScope *Scope = findDbgScope(MInsn);
- if (!Scope && DV.getTag() == dwarf::DW_TAG_arg_variable)
+ bool CurFnArg = false;
+ if (DV.getTag() == dwarf::DW_TAG_arg_variable &&
+ DISubprogram(DV.getContext()).describes(MF->getFunction()))
+ CurFnArg = true;
+ if (!Scope && CurFnArg)
Scope = CurrentFnDbgScope;
// If variable scope is not found then skip this variable.
if (!Scope)
@@ -2198,7 +2268,7 @@ void DwarfDebug::collectVariableInfo(const MachineFunction *MF) {
Processed.insert(DV);
DbgVariable *RegVar = new DbgVariable(DV);
Scope->addVariable(RegVar);
- if (DV.getTag() != dwarf::DW_TAG_arg_variable)
+ if (!CurFnArg)
DbgVariableLabelsMap[RegVar] = getLabelBeforeInsn(MInsn);
if (DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc())) {
DbgVariableToDbgInstMap[AbsVar] = MInsn;
@@ -2217,7 +2287,8 @@ void DwarfDebug::collectVariableInfo(const MachineFunction *MF) {
const MachineInstr *Begin = NULL;
const MachineInstr *End = NULL;
for (SmallVector<const MachineInstr *, 4>::iterator
- MVI = MultipleValues.begin(), MVE = MultipleValues.end(); MVI != MVE; ++MVI) {
+ MVI = MultipleValues.begin(), MVE = MultipleValues.end();
+ MVI != MVE; ++MVI) {
if (!Begin) {
Begin = *MVI;
continue;
@@ -2241,8 +2312,11 @@ void DwarfDebug::collectVariableInfo(const MachineFunction *MF) {
}
// Collect info for variables that were optimized out.
+ const Function *F = MF->getFunction();
+ const Module *M = F->getParent();
if (NamedMDNode *NMD =
- MF->getFunction()->getParent()->getNamedMetadata("llvm.dbg.lv")) {
+ M->getNamedMetadata(Twine("llvm.dbg.lv.",
+ getRealLinkageName(F->getName())))) {
for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
DIVariable DV(cast_or_null<MDNode>(NMD->getOperand(i)));
if (!DV || !Processed.insert(DV))
@@ -2319,7 +2393,8 @@ void DwarfDebug::endScope(const MachineInstr *MI) {
}
/// getOrCreateDbgScope - Create DbgScope for the scope.
-DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, const MDNode *InlinedAt) {
+DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope,
+ const MDNode *InlinedAt) {
if (!InlinedAt) {
DbgScope *WScope = DbgScopeMap.lookup(Scope);
if (WScope)
@@ -2335,13 +2410,20 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, const MDNode *Inl
if (!WScope->getParent()) {
StringRef SPName = DISubprogram(Scope).getLinkageName();
- if (SPName == Asm->MF->getFunction()->getName())
+ // We used to check only for a linkage name, but that fails
+ // since we began omitting the linkage name for private
+ // functions. The new way is to check for the name in metadata,
+ // but that's not supported in old .ll test cases. Ergo, we
+ // check both.
+ if (SPName == Asm->MF->getFunction()->getName() ||
+ DISubprogram(Scope).getFunction() == Asm->MF->getFunction())
CurrentFnDbgScope = WScope;
}
return WScope;
}
+ getOrCreateAbstractScope(Scope);
DbgScope *WScope = DbgScopeMap.lookup(InlinedAt);
if (WScope)
return WScope;
@@ -2355,7 +2437,6 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, const MDNode *Inl
Parent->addScope(WScope);
ConcreteScopes[InlinedAt] = WScope;
- getOrCreateAbstractScope(Scope);
return WScope;
}
@@ -2365,8 +2446,6 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, const MDNode *Inl
static bool hasValidLocation(LLVMContext &Ctx,
const MachineInstr *MInsn,
const MDNode *&Scope, const MDNode *&InlinedAt) {
- if (MInsn->isDebugValue())
- return false;
DebugLoc DL = MInsn->getDebugLoc();
if (DL.isUnknown()) return false;
@@ -2488,7 +2567,8 @@ bool DwarfDebug::extractScopeInformation() {
// current instruction scope does not match scope of first instruction
// in this range then create a new instruction range.
DbgRange R(RangeBeginMI, PrevMI);
- MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope, PrevInlinedAt);
+ MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope,
+ PrevInlinedAt);
MIRanges.push_back(R);
}
@@ -2565,7 +2645,6 @@ void DwarfDebug::identifyScopeMarkers() {
RE = Ranges.end(); RI != RE; ++RI) {
assert(RI->first && "DbgRange does not have first instruction!");
assert(RI->second && "DbgRange does not have second instruction!");
- InsnsBeginScopeSet.insert(RI->first);
InsnsEndScopeSet.insert(RI->second);
}
}
@@ -2616,6 +2695,9 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
recordSourceLine(Line, Col, Scope);
+ /// ProcessedArgs - Collection of arguments already processed.
+ SmallPtrSet<const MDNode *, 8> ProcessedArgs;
+
DebugLoc PrevLoc;
for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
I != E; ++I)
@@ -2624,14 +2706,19 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
const MachineInstr *MI = II;
DebugLoc DL = MI->getDebugLoc();
if (MI->isDebugValue()) {
- // DBG_VALUE needs a label if the variable is local variable or
- // an argument whose location is changing.
assert (MI->getNumOperands() > 1 && "Invalid machine instruction!");
DIVariable DV(MI->getOperand(MI->getNumOperands() - 1).getMetadata());
if (!DV.Verify()) continue;
- if (DV.getTag() != dwarf::DW_TAG_arg_variable)
+ // If DBG_VALUE is for a local variable then it needs a label.
+ if (DV.getTag() != dwarf::DW_TAG_arg_variable
+ && isDbgValueInUndefinedReg(MI) == false)
InsnNeedsLabel.insert(MI);
- else if (!ProcessedArgs.insert(DV))
+ // DBG_VALUE for inlined functions argument needs a label.
+ else if (!DISubprogram(getDISubprogram(DV.getContext())).
+ describes(MF->getFunction()))
+ InsnNeedsLabel.insert(MI);
+ // DBG_VALUE indicating argument location change needs a label.
+ else if (isDbgValueInUndefinedReg(MI) == false && !ProcessedArgs.insert(DV))
InsnNeedsLabel.insert(MI);
} else {
// If location is unknown then instruction needs a location only if
@@ -2664,7 +2751,8 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
// Assumes in correct section after the entry point.
Asm->OutStreamer.EmitLabel(FunctionEndSym);
- collectVariableInfo(MF);
+ SmallPtrSet<const MDNode *, 16> ProcessedVars;
+ collectVariableInfo(MF, ProcessedVars);
// Get function line info.
if (!Lines.empty()) {
@@ -2679,9 +2767,31 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
// Construct abstract scopes.
for (SmallVector<DbgScope *, 4>::iterator AI = AbstractScopesList.begin(),
- AE = AbstractScopesList.end(); AI != AE; ++AI)
- constructScopeDIE(*AI);
-
+ AE = AbstractScopesList.end(); AI != AE; ++AI) {
+ DISubprogram SP((*AI)->getScopeNode());
+ if (SP.Verify()) {
+ // Collect info for variables that were optimized out.
+ StringRef FName = SP.getLinkageName();
+ if (FName.empty())
+ FName = SP.getName();
+ const Module *M = MF->getFunction()->getParent();
+ if (NamedMDNode *NMD =
+ M->getNamedMetadata(Twine("llvm.dbg.lv.",
+ getRealLinkageName(FName)))) {
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+ DIVariable DV(cast_or_null<MDNode>(NMD->getOperand(i)));
+ if (!DV || !ProcessedVars.insert(DV))
+ continue;
+ DbgScope *Scope = AbstractScopes.lookup(DV.getContext());
+ if (Scope)
+ Scope->addVariable(new DbgVariable(DV));
+ }
+ }
+ }
+ if (ProcessedSPNodes.count((*AI)->getScopeNode()) == 0)
+ constructScopeDIE(*AI);
+ }
+
DIE *CurFnDIE = constructScopeDIE(CurrentFnDbgScope);
if (!DisableFramePointerElim(*MF))
@@ -2696,13 +2806,11 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
// Clear debug info
CurrentFnDbgScope = NULL;
InsnNeedsLabel.clear();
- ProcessedArgs.clear();
DbgVariableToFrameIndexMap.clear();
VarToAbstractVarMap.clear();
DbgVariableToDbgInstMap.clear();
DbgVariableLabelsMap.clear();
DeleteContainerSeconds(DbgScopeMap);
- InsnsBeginScopeSet.clear();
InsnsEndScopeSet.clear();
ConcreteScopes.clear();
DeleteContainerSeconds(AbstractScopes);
@@ -2764,7 +2872,8 @@ DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) {
/// recordSourceLine - Register a source line with debug info. Returns the
/// unique label that was emitted and which provides correspondence to
/// the source line list.
-MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S) {
+MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col,
+ const MDNode *S) {
StringRef Dir;
StringRef Fn;
@@ -2790,6 +2899,16 @@ MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode
Src = GetOrCreateSourceID(Dir, Fn);
}
+#if 0
+ if (!Lines.empty()) {
+ SrcLineInfo lastSrcLineInfo = Lines.back();
+ // Emitting sequential line records with the same line number (but
+ // different addresses) seems to confuse GDB. Avoid this.
+ if (lastSrcLineInfo.getLine() == Line)
+ return NULL;
+ }
+#endif
+
MCSymbol *Label = MMI->getContext().CreateTempSymbol();
Lines.push_back(SrcLineInfo(Line, Col, Src, Label));
@@ -2898,7 +3017,8 @@ void DwarfDebug::EmitSectionLabels() {
if (const MCSection *MacroInfo = TLOF.getDwarfMacroInfoSection())
EmitSectionSym(Asm, MacroInfo);
- EmitSectionSym(Asm, TLOF.getDwarfLineSection());
+ DwarfDebugLineSectionSym =
+ EmitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line");
EmitSectionSym(Asm, TLOF.getDwarfLocSection());
EmitSectionSym(Asm, TLOF.getDwarfPubNamesSection());
EmitSectionSym(Asm, TLOF.getDwarfPubTypesSection());
@@ -2961,6 +3081,11 @@ void DwarfDebug::emitDIE(DIE *Die) {
4);
break;
}
+ case dwarf::DW_AT_stmt_list: {
+ Asm->EmitLabelDifference(CurrentLineSectionSym,
+ DwarfDebugLineSectionSym, 4);
+ break;
+ }
case dwarf::DW_AT_location: {
if (UseDotDebugLocEntry.count(Die) != 0) {
DIELabel *L = cast<DIELabel>(Values[i]);
@@ -3106,6 +3231,8 @@ void DwarfDebug::emitDebugLines() {
Asm->getObjFileLowering().getDwarfLineSection());
// Construct the section header.
+ CurrentLineSectionSym = Asm->GetTempSymbol("section_line_begin");
+ Asm->OutStreamer.EmitLabel(CurrentLineSectionSym);
Asm->OutStreamer.AddComment("Length of Source Line Info");
Asm->EmitLabelDifference(Asm->GetTempSymbol("line_end"),
Asm->GetTempSymbol("line_begin"), 4);
@@ -3491,8 +3618,9 @@ void DwarfDebug::emitDebugLoc() {
unsigned char Size = Asm->getTargetData().getPointerSize();
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0));
unsigned index = 1;
- for (SmallVector<DotDebugLocEntry, 4>::iterator I = DotDebugLocEntries.begin(),
- E = DotDebugLocEntries.end(); I != E; ++I, ++index) {
+ for (SmallVector<DotDebugLocEntry, 4>::iterator
+ I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end();
+ I != E; ++I, ++index) {
DotDebugLocEntry Entry = *I;
if (Entry.isEmpty()) {
Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 0d6116fc9861..5a281c851748 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -156,6 +156,9 @@ class DwarfDebug {
/// not included DbgScopeMap. AbstractScopes owns its DbgScope*s.
DenseMap<const MDNode *, DbgScope *> AbstractScopes;
+ /// AbstractSPDies - Collection of abstract subprogram DIEs.
+ DenseMap<const MDNode *, DIE *> AbstractSPDies;
+
/// AbstractScopesList - Tracks abstract scopes constructed while processing
/// a function. This list is cleared during endFunction().
SmallVector<DbgScope *, 4>AbstractScopesList;
@@ -210,7 +213,7 @@ class DwarfDebug {
DenseMap<DIE *, const MDNode *> ContainingTypeMap;
typedef SmallVector<DbgScope *, 2> ScopeVector;
- SmallPtrSet<const MachineInstr *, 8> InsnsBeginScopeSet;
+
SmallPtrSet<const MachineInstr *, 8> InsnsEndScopeSet;
/// InlineInfo - Keep track of inlined functions and their location. This
@@ -219,6 +222,10 @@ class DwarfDebug {
DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> > InlineInfo;
SmallVector<const MDNode *, 4> InlinedSPNodes;
+ // ProcessedSPNodes - This is a collection of subprogram MDNodes that
+ // are processed to create DIEs.
+ SmallPtrSet<const MDNode *, 16> ProcessedSPNodes;
+
/// LabelsBeforeInsn - Maps instruction with label emitted before
/// instruction.
DenseMap<const MachineInstr *, MCSymbol *> LabelsBeforeInsn;
@@ -231,9 +238,6 @@ class DwarfDebug {
/// a debuggging information entity.
SmallPtrSet<const MachineInstr *, 8> InsnNeedsLabel;
- /// ProcessedArgs - Collection of arguments already processed.
- SmallPtrSet<const MDNode *, 8> ProcessedArgs;
-
SmallVector<const MCSymbol *, 8> DebugRangeSymbols;
/// Previous instruction's location information. This is used to determine
@@ -257,7 +261,10 @@ class DwarfDebug {
MCSymbol *DwarfFrameSectionSym, *DwarfInfoSectionSym, *DwarfAbbrevSectionSym;
MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym;
MCSymbol *DwarfDebugLocSectionSym;
+ MCSymbol *DwarfDebugLineSectionSym, *CurrentLineSectionSym;
MCSymbol *FunctionBeginSym, *FunctionEndSym;
+
+ DIEInteger *DIEIntegerOne;
private:
/// getSourceDirectoryAndFileIds - Return the directory and file ids that
@@ -593,7 +600,8 @@ private:
bool extractScopeInformation();
/// collectVariableInfo - Populate DbgScope entries with variables' info.
- void collectVariableInfo(const MachineFunction *);
+ void collectVariableInfo(const MachineFunction *,
+ SmallPtrSet<const MDNode *, 16> &ProcessedVars);
/// collectVariableInfoFromMMITable - Collect variable information from
/// side table maintained by MMI.
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index f92127f22748..c8a63cf2393b 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -52,13 +52,13 @@ static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) {
SymName.append(MId.begin(), std::find(MId.begin(), MId.end(), '.'));
SymName += "__";
SymName += Id;
-
+
// Capitalize the first letter of the module name.
SymName[Letter] = toupper(SymName[Letter]);
-
+
SmallString<128> TmpStr;
AP.Mang->getNameWithPrefix(TmpStr, SymName);
-
+
MCSymbol *Sym = AP.OutContext.GetOrCreateSymbol(TmpStr);
AP.OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global);
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 9dec22ec78a3..7f98df0d22ea 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -358,23 +358,10 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
}
/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
-/// after it, replacing it with an unconditional branch to NewDest. This
-/// returns true if OldInst's block is modified, false if NewDest is modified.
+/// after it, replacing it with an unconditional branch to NewDest.
void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
MachineBasicBlock *NewDest) {
- MachineBasicBlock *OldBB = OldInst->getParent();
-
- // Remove all the old successors of OldBB from the CFG.
- while (!OldBB->succ_empty())
- OldBB->removeSuccessor(OldBB->succ_begin());
-
- // Remove all the dead instructions from the end of OldBB.
- OldBB->erase(OldInst, OldBB->end());
-
- // If OldBB isn't immediately before OldBB, insert a branch to it.
- if (++MachineFunction::iterator(OldBB) != MachineFunction::iterator(NewDest))
- TII->InsertBranch(*OldBB, NewDest, 0, SmallVector<MachineOperand, 0>());
- OldBB->addSuccessor(NewDest);
+ TII->ReplaceTailWithBranchTo(OldInst, NewDest);
++NumTailMerge;
}
@@ -383,6 +370,9 @@ void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
/// iterator. This returns the new MBB.
MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
MachineBasicBlock::iterator BBI1) {
+ if (!TII->isLegalToSplitMBBAt(CurMBB, BBI1))
+ return 0;
+
MachineFunction &MF = *CurMBB.getParent();
// Create the fall-through block.
@@ -443,18 +433,20 @@ static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
MachineFunction::iterator I = llvm::next(MachineFunction::iterator(CurMBB));
MachineBasicBlock *TBB = 0, *FBB = 0;
SmallVector<MachineOperand, 4> Cond;
+ DebugLoc dl; // FIXME: this is nowhere
if (I != MF->end() &&
!TII->AnalyzeBranch(*CurMBB, TBB, FBB, Cond, true)) {
MachineBasicBlock *NextBB = I;
if (TBB == NextBB && !Cond.empty() && !FBB) {
if (!TII->ReverseBranchCondition(Cond)) {
TII->RemoveBranch(*CurMBB);
- TII->InsertBranch(*CurMBB, SuccBB, NULL, Cond);
+ TII->InsertBranch(*CurMBB, SuccBB, NULL, Cond, dl);
return;
}
}
}
- TII->InsertBranch(*CurMBB, SuccBB, NULL, SmallVector<MachineOperand, 0>());
+ TII->InsertBranch(*CurMBB, SuccBB, NULL,
+ SmallVector<MachineOperand, 0>(), dl);
}
bool
@@ -625,9 +617,10 @@ void BranchFolder::RemoveBlocksWithHash(unsigned CurHash,
/// CreateCommonTailOnlyBlock - None of the blocks to be tail-merged consist
/// only of the common tail. Create a block that does by splitting one.
-unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
- unsigned maxCommonTailLength) {
- unsigned commonTailIndex = 0;
+bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
+ unsigned maxCommonTailLength,
+ unsigned &commonTailIndex) {
+ commonTailIndex = 0;
unsigned TimeEstimate = ~0U;
for (unsigned i = 0, e = SameTails.size(); i != e; ++i) {
// Use PredBB if possible; that doesn't require a new branch.
@@ -655,6 +648,11 @@ unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
<< maxCommonTailLength);
MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI);
+ if (!newMBB) {
+ DEBUG(dbgs() << "... failed!");
+ return false;
+ }
+
SameTails[commonTailIndex].setBlock(newMBB);
SameTails[commonTailIndex].setTailStartPos(newMBB->begin());
@@ -662,7 +660,7 @@ unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
if (PredBB == MBB)
PredBB = newMBB;
- return commonTailIndex;
+ return true;
}
// See if any of the blocks in MergePotentials (which all have a common single
@@ -757,7 +755,11 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
!SameTails[commonTailIndex].tailIsWholeBlock())) {
// None of the blocks consist entirely of the common tail.
// Split a block so that one does.
- commonTailIndex = CreateCommonTailOnlyBlock(PredBB, maxCommonTailLength);
+ if (!CreateCommonTailOnlyBlock(PredBB,
+ maxCommonTailLength, commonTailIndex)) {
+ RemoveBlocksWithHash(CurHash, SuccBB, PredBB);
+ continue;
+ }
}
MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
@@ -874,10 +876,11 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
}
// Remove the unconditional branch at the end, if any.
if (TBB && (Cond.empty() || FBB)) {
+ DebugLoc dl; // FIXME: this is nowhere
TII->RemoveBranch(*PBB);
if (!Cond.empty())
// reinsert conditional branch only, for now
- TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond);
+ TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond, dl);
}
MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P));
}
@@ -976,6 +979,7 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
bool MadeChange = false;
MachineFunction &MF = *MBB->getParent();
+ DebugLoc dl; // FIXME: this is nowhere
ReoptimizeBlock:
MachineFunction::iterator FallThrough = MBB;
@@ -1027,7 +1031,7 @@ ReoptimizeBlock:
TII->RemoveBranch(PrevBB);
PriorCond.clear();
if (PriorTBB != MBB)
- TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond);
+ TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl);
MadeChange = true;
++NumBranchOpts;
goto ReoptimizeBlock;
@@ -1066,7 +1070,7 @@ ReoptimizeBlock:
// the condition is false, remove the uncond second branch.
if (PriorFBB == MBB) {
TII->RemoveBranch(PrevBB);
- TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond);
+ TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl);
MadeChange = true;
++NumBranchOpts;
goto ReoptimizeBlock;
@@ -1079,7 +1083,7 @@ ReoptimizeBlock:
SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
if (!TII->ReverseBranchCondition(NewPriorCond)) {
TII->RemoveBranch(PrevBB);
- TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond);
+ TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond, dl);
MadeChange = true;
++NumBranchOpts;
goto ReoptimizeBlock;
@@ -1116,7 +1120,7 @@ ReoptimizeBlock:
<< "To make fallthrough to: " << *PriorTBB << "\n");
TII->RemoveBranch(PrevBB);
- TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond);
+ TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond, dl);
// Move this block to the end of the function.
MBB->moveAfter(--MF.end());
@@ -1145,7 +1149,7 @@ ReoptimizeBlock:
SmallVector<MachineOperand, 4> NewCond(CurCond);
if (!TII->ReverseBranchCondition(NewCond)) {
TII->RemoveBranch(*MBB);
- TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond);
+ TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond, dl);
MadeChange = true;
++NumBranchOpts;
goto ReoptimizeBlock;
@@ -1200,7 +1204,7 @@ ReoptimizeBlock:
PriorFBB = MBB;
}
TII->RemoveBranch(PrevBB);
- TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond);
+ TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, dl);
}
// Iterate through all the predecessors, revectoring each in-turn.
@@ -1226,7 +1230,7 @@ ReoptimizeBlock:
if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) {
TII->RemoveBranch(*PMBB);
NewCurCond.clear();
- TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond);
+ TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, dl);
MadeChange = true;
++NumBranchOpts;
PMBB->CorrectExtraCFGEdges(NewCurTBB, 0, false);
@@ -1246,7 +1250,7 @@ ReoptimizeBlock:
}
// Add the branch back if the block is more than just an uncond branch.
- TII->InsertBranch(*MBB, CurTBB, 0, CurCond);
+ TII->InsertBranch(*MBB, CurTBB, 0, CurCond, dl);
}
}
@@ -1286,7 +1290,7 @@ ReoptimizeBlock:
if (CurFallsThru) {
MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
CurCond.clear();
- TII->InsertBranch(*MBB, NextBB, 0, CurCond);
+ TII->InsertBranch(*MBB, NextBB, 0, CurCond, dl);
}
MBB->moveAfter(PredBB);
MadeChange = true;
diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h
index b08739564060..15dfa7f6bee5 100644
--- a/lib/CodeGen/BranchFolding.h
+++ b/lib/CodeGen/BranchFolding.h
@@ -102,8 +102,9 @@ namespace llvm {
MachineBasicBlock *PredBB);
void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB,
MachineBasicBlock* PredBB);
- unsigned CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
- unsigned maxCommonTailLength);
+ bool CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
+ unsigned maxCommonTailLength,
+ unsigned &commonTailIndex);
bool OptimizeBranches(MachineFunction &MF);
bool OptimizeBlock(MachineBasicBlock *MBB);
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 3e38872a36d6..ffeff1ee27a6 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -1,19 +1,20 @@
add_llvm_library(LLVMCodeGen
- Analysis.cpp
AggressiveAntiDepBreaker.cpp
+ Analysis.cpp
BranchFolding.cpp
CalcSpillWeights.cpp
+ CallingConvLower.cpp
CodePlacementOpt.cpp
CriticalAntiDepBreaker.cpp
DeadMachineInstructionElim.cpp
DwarfEHPrepare.cpp
ELFCodeEmitter.cpp
ELFWriter.cpp
- ExactHazardRecognizer.cpp
GCMetadata.cpp
GCMetadataPrinter.cpp
GCStrategy.cpp
IfConversion.cpp
+ InlineSpiller.cpp
IntrinsicLowering.cpp
LLVMTargetMachine.cpp
LatencyPriorityQueue.cpp
@@ -45,6 +46,7 @@ add_llvm_library(LLVMCodeGen
OptimizePHIs.cpp
PHIElimination.cpp
Passes.cpp
+ PostRAHazardRecognizer.cpp
PostRASchedulerList.cpp
PreAllocSplitting.cpp
ProcessImplicitDefs.cpp
@@ -52,7 +54,6 @@ add_llvm_library(LLVMCodeGen
PseudoSourceValue.cpp
RegAllocFast.cpp
RegAllocLinearScan.cpp
- RegAllocLocal.cpp
RegAllocPBQP.cpp
RegisterCoalescer.cpp
RegisterScavenging.cpp
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index a328d0e556e9..240a7b94fccf 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -116,7 +116,7 @@ bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &fn) {
SmallVector<LiveInterval*, 4> spillIs;
if (lis->isReMaterializable(li, spillIs, isLoad)) {
// If all of the definitions of the interval are re-materializable,
- // it is a preferred candidate for spilling. If non of the defs are
+ // it is a preferred candidate for spilling. If none of the defs are
// loads, then it's potentially very cheap to re-materialize.
// FIXME: this gets much more complicated once we support non-trivial
// re-materialization.
diff --git a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index 4e6c1fcc9604..62ad8171a9d4 100644
--- a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -80,13 +80,12 @@ CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
/// CheckReturn - Analyze the return values of a function, returning true if
/// the return can be performed without sret-demotion, and false otherwise.
-bool CCState::CheckReturn(const SmallVectorImpl<EVT> &OutTys,
- const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
+bool CCState::CheckReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
CCAssignFn Fn) {
// Determine which register each value should be copied into.
- for (unsigned i = 0, e = OutTys.size(); i != e; ++i) {
- EVT VT = OutTys[i];
- ISD::ArgFlagsTy ArgFlags = ArgsFlags[i];
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+ EVT VT = Outs[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this))
return false;
}
@@ -99,7 +98,7 @@ void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
CCAssignFn Fn) {
// Determine which register each value should be copied into.
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
- EVT VT = Outs[i].Val.getValueType();
+ EVT VT = Outs[i].VT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) {
#ifndef NDEBUG
@@ -111,14 +110,13 @@ void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
}
}
-
/// AnalyzeCallOperands - Analyze the outgoing arguments to a call,
/// incorporating info about the passed values into this state.
void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
CCAssignFn Fn) {
unsigned NumOps = Outs.size();
for (unsigned i = 0; i != NumOps; ++i) {
- EVT ArgVT = Outs[i].Val.getValueType();
+ EVT ArgVT = Outs[i].VT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
#ifndef NDEBUG
diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp
index 3ff2a046d233..e0e315c6c677 100644
--- a/lib/CodeGen/CodePlacementOpt.cpp
+++ b/lib/CodeGen/CodePlacementOpt.cpp
@@ -178,6 +178,8 @@ bool CodePlacementOpt::EliminateUnconditionalJumpsToTop(MachineFunction &MF,
continue;
// Move the block.
+ DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << Pred->getNumber()
+ << " to top of loop.\n");
Changed = true;
// Move it and all the blocks that can reach it via fallthrough edges
@@ -297,6 +299,8 @@ bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF,
continue;
// Move the block.
+ DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << BB->getNumber()
+ << " to be contiguous with loop.\n");
Changed = true;
// Process this block and all loop blocks contiguous with it, to keep
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index fd957b12fc44..e3746a985644 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -29,6 +30,7 @@ CriticalAntiDepBreaker::
CriticalAntiDepBreaker(MachineFunction& MFi) :
AntiDepBreaker(), MF(MFi),
MRI(MF.getRegInfo()),
+ TII(MF.getTarget().getInstrInfo()),
TRI(MF.getTarget().getRegisterInfo()),
AllocatableSet(TRI->getAllocatableSet(MF))
{
@@ -71,25 +73,27 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
DefIndices[AliasReg] = ~0u;
}
}
- } else {
- // In a non-return block, examine the live-in regs of all successors.
- for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ }
+
+ // In a non-return block, examine the live-in regs of all successors.
+ // Note a return block can have successors if the return instruction is
+ // predicated.
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
SE = BB->succ_end(); SI != SE; ++SI)
- for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+ for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
E = (*SI)->livein_end(); I != E; ++I) {
- unsigned Reg = *I;
- Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
- KillIndices[Reg] = BB->size();
- DefIndices[Reg] = ~0u;
- // Repeat, for all aliases.
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
- unsigned AliasReg = *Alias;
- Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
- KillIndices[AliasReg] = BB->size();
- DefIndices[AliasReg] = ~0u;
- }
+ unsigned Reg = *I;
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[Reg] = BB->size();
+ DefIndices[Reg] = ~0u;
+ // Repeat, for all aliases.
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ unsigned AliasReg = *Alias;
+ Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[AliasReg] = BB->size();
+ DefIndices[AliasReg] = ~0u;
}
- }
+ }
// Mark live-out callee-saved registers. In a return block this is
// all callee-saved registers. In non-return this is any
@@ -164,6 +168,26 @@ static const SDep *CriticalPathStep(const SUnit *SU) {
}
void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
+ // It's not safe to change register allocation for source operands of
+ // that have special allocation requirements. Also assume all registers
+ // used in a call must not be changed (ABI).
+ // FIXME: The issue with predicated instruction is more complex. We are being
+ // conservatively here because the kill markers cannot be trusted after
+ // if-conversion:
+ // %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
+ // ...
+ // STR %R0, %R6<kill>, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395]
+ // %R6<def> = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12]
+ // STR %R0, %R6<kill>, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8)
+ //
+ // The first R6 kill is not really a kill since it's killed by a predicated
+ // instruction which may not be executed. The second R6 def may or may not
+ // re-define R6 so it's not safe to change it since the last R6 use cannot be
+ // changed.
+ bool Special = MI->getDesc().isCall() ||
+ MI->getDesc().hasExtraSrcRegAllocReq() ||
+ TII->isPredicated(MI);
+
// Scan the register operands for this instruction and update
// Classes and RegRefs.
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -199,9 +223,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1))
RegRefs.insert(std::make_pair(Reg, &MO));
- // It's not safe to change register allocation for source operands of
- // that have special allocation requirements.
- if (MO.isUse() && MI->getDesc().hasExtraSrcRegAllocReq()) {
+ if (MO.isUse() && Special) {
if (KeepRegs.insert(Reg)) {
for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
*Subreg; ++Subreg)
@@ -216,38 +238,43 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
// Update liveness.
// Proceding upwards, registers that are defed but not used in this
// instruction are now dead.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
- if (Reg == 0) continue;
- if (!MO.isDef()) continue;
- // Ignore two-addr defs.
- if (MI->isRegTiedToUseOperand(i)) continue;
-
- DefIndices[Reg] = Count;
- KillIndices[Reg] = ~0u;
- assert(((KillIndices[Reg] == ~0u) !=
- (DefIndices[Reg] == ~0u)) &&
- "Kill and Def maps aren't consistent for Reg!");
- KeepRegs.erase(Reg);
- Classes[Reg] = 0;
- RegRefs.erase(Reg);
- // Repeat, for all subregs.
- for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
- *Subreg; ++Subreg) {
- unsigned SubregReg = *Subreg;
- DefIndices[SubregReg] = Count;
- KillIndices[SubregReg] = ~0u;
- KeepRegs.erase(SubregReg);
- Classes[SubregReg] = 0;
- RegRefs.erase(SubregReg);
- }
- // Conservatively mark super-registers as unusable.
- for (const unsigned *Super = TRI->getSuperRegisters(Reg);
- *Super; ++Super) {
- unsigned SuperReg = *Super;
- Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+
+ if (!TII->isPredicated(MI)) {
+ // Predicated defs are modeled as read + write, i.e. similar to two
+ // address updates.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ if (!MO.isDef()) continue;
+ // Ignore two-addr defs.
+ if (MI->isRegTiedToUseOperand(i)) continue;
+
+ DefIndices[Reg] = Count;
+ KillIndices[Reg] = ~0u;
+ assert(((KillIndices[Reg] == ~0u) !=
+ (DefIndices[Reg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for Reg!");
+ KeepRegs.erase(Reg);
+ Classes[Reg] = 0;
+ RegRefs.erase(Reg);
+ // Repeat, for all subregs.
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg) {
+ unsigned SubregReg = *Subreg;
+ DefIndices[SubregReg] = Count;
+ KillIndices[SubregReg] = ~0u;
+ KeepRegs.erase(SubregReg);
+ Classes[SubregReg] = 0;
+ RegRefs.erase(SubregReg);
+ }
+ // Conservatively mark super-registers as unusable.
+ for (const unsigned *Super = TRI->getSuperRegisters(Reg);
+ *Super; ++Super) {
+ unsigned SuperReg = *Super;
+ Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ }
}
}
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -334,10 +361,15 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
// so just duck out immediately if the block is empty.
if (SUnits.empty()) return 0;
+ // Keep a map of the MachineInstr*'s back to the SUnit representing them.
+ // This is used for updating debug information.
+ DenseMap<MachineInstr*,const SUnit*> MISUnitMap;
+
// Find the node at the bottom of the critical path.
const SUnit *Max = 0;
for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
const SUnit *SU = &SUnits[i];
+ MISUnitMap[SU->getInstr()] = SU;
if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency)
Max = SU;
}
@@ -473,7 +505,11 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
PrescanInstruction(MI);
- if (MI->getDesc().hasExtraDefRegAllocReq())
+ // If MI's defs have a special allocation requirement, don't allow
+ // any def registers to be changed. Also assume all registers
+ // defined in a call must not be changed (ABI).
+ if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() ||
+ TII->isPredicated(MI))
// If this instruction's defs have special allocation requirement, don't
// break this anti-dependency.
AntiDepReg = 0;
@@ -485,7 +521,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
if (Reg == 0) continue;
- if (MO.isUse() && AntiDepReg == Reg) {
+ if (MO.isUse() && TRI->regsOverlap(AntiDepReg, Reg)) {
AntiDepReg = 0;
break;
}
@@ -519,8 +555,22 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
std::multimap<unsigned, MachineOperand *>::iterator>
Range = RegRefs.equal_range(AntiDepReg);
for (std::multimap<unsigned, MachineOperand *>::iterator
- Q = Range.first, QE = Range.second; Q != QE; ++Q)
+ Q = Range.first, QE = Range.second; Q != QE; ++Q) {
Q->second->setReg(NewReg);
+ // If the SU for the instruction being updated has debug information
+ // related to the anti-dependency register, make sure to update that
+ // as well.
+ const SUnit *SU = MISUnitMap[Q->second->getParent()];
+ if (!SU) continue;
+ for (unsigned i = 0, e = SU->DbgInstrList.size() ; i < e ; ++i) {
+ MachineInstr *DI = SU->DbgInstrList[i];
+ assert (DI->getNumOperands()==3 && DI->getOperand(0).isReg() &&
+ DI->getOperand(0).getReg()
+ && "Non register dbg_value attached to SUnit!");
+ if (DI->getOperand(0).getReg() == AntiDepReg)
+ DI->getOperand(0).setReg(NewReg);
+ }
+ }
// We just went back in time and modified history; the
// liveness information for the anti-depenence reg is now
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index cc42dd2b8e32..540630083bcc 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -22,15 +22,18 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallSet.h"
#include <map>
namespace llvm {
+class TargetInstrInfo;
+class TargetRegisterInfo;
+
class CriticalAntiDepBreaker : public AntiDepBreaker {
MachineFunction& MF;
MachineRegisterInfo &MRI;
+ const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
/// AllocatableSet - The set of allocatable registers.
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index f6739f434044..01b31b420931 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -22,6 +22,7 @@
#include "llvm/Analysis/Dominators.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/CallSite.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
@@ -87,10 +88,13 @@ namespace {
/// CleanupSelectors - Any remaining eh.selector intrinsic calls which still
/// use the ".llvm.eh.catch.all.value" call need to convert to using its
/// initializer instead.
- bool CleanupSelectors();
+ bool CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels);
+
+ bool HasCatchAllInSelector(IntrinsicInst *);
/// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups.
- void FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels);
+ void FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels,
+ SmallPtrSet<IntrinsicInst*, 32> &CatchAllSels);
/// FindAllURoRInvokes - Find all URoR invokes in the function.
void FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes);
@@ -150,7 +154,7 @@ namespace {
Changed = true;
}
- return false;
+ return Changed;
}
public:
@@ -186,25 +190,32 @@ FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm, bool fast) {
return new DwarfEHPrepare(tm, fast);
}
+/// HasCatchAllInSelector - Return true if the intrinsic instruction has a
+/// catch-all.
+bool DwarfEHPrepare::HasCatchAllInSelector(IntrinsicInst *II) {
+ if (!EHCatchAllValue) return false;
+
+ unsigned ArgIdx = II->getNumArgOperands() - 1;
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(II->getArgOperand(ArgIdx));
+ return GV == EHCatchAllValue;
+}
+
/// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups.
void DwarfEHPrepare::
-FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels) {
+FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels,
+ SmallPtrSet<IntrinsicInst*, 32> &CatchAllSels) {
for (Value::use_iterator
I = SelectorIntrinsic->use_begin(),
E = SelectorIntrinsic->use_end(); I != E; ++I) {
- IntrinsicInst *SI = cast<IntrinsicInst>(I);
- if (!SI || SI->getParent()->getParent() != F) continue;
-
- unsigned NumOps = SI->getNumOperands();
- if (NumOps > 4) continue;
- bool IsCleanUp = (NumOps == 3);
+ IntrinsicInst *II = cast<IntrinsicInst>(I);
- if (!IsCleanUp)
- if (ConstantInt *CI = dyn_cast<ConstantInt>(SI->getOperand(3)))
- IsCleanUp = (CI->getZExtValue() == 0);
+ if (II->getParent()->getParent() != F)
+ continue;
- if (IsCleanUp)
- Sels.insert(SI);
+ if (!HasCatchAllInSelector(II))
+ Sels.insert(II);
+ else
+ CatchAllSels.insert(II);
}
}
@@ -222,7 +233,7 @@ FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes) {
/// CleanupSelectors - Any remaining eh.selector intrinsic calls which still use
/// the ".llvm.eh.catch.all.value" call need to convert to using its
/// initializer instead.
-bool DwarfEHPrepare::CleanupSelectors() {
+bool DwarfEHPrepare::CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels) {
if (!EHCatchAllValue) return false;
if (!SelectorIntrinsic) {
@@ -232,17 +243,15 @@ bool DwarfEHPrepare::CleanupSelectors() {
}
bool Changed = false;
- for (Value::use_iterator
- I = SelectorIntrinsic->use_begin(),
- E = SelectorIntrinsic->use_end(); I != E; ++I) {
- IntrinsicInst *Sel = dyn_cast<IntrinsicInst>(I);
- if (!Sel || Sel->getParent()->getParent() != F) continue;
+ for (SmallPtrSet<IntrinsicInst*, 32>::iterator
+ I = Sels.begin(), E = Sels.end(); I != E; ++I) {
+ IntrinsicInst *Sel = *I;
// Index of the ".llvm.eh.catch.all.value" variable.
- unsigned OpIdx = Sel->getNumOperands() - 1;
- GlobalVariable *GV = dyn_cast<GlobalVariable>(Sel->getOperand(OpIdx));
+ unsigned OpIdx = Sel->getNumArgOperands() - 1;
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(Sel->getArgOperand(OpIdx));
if (GV != EHCatchAllValue) continue;
- Sel->setOperand(OpIdx, EHCatchAllValue->getInitializer());
+ Sel->setArgOperand(OpIdx, EHCatchAllValue->getInitializer());
Changed = true;
}
@@ -293,8 +302,6 @@ DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
/// function. This is a candidate to merge the selector associated with the URoR
/// invoke with the one from the URoR's landing pad.
bool DwarfEHPrepare::HandleURoRInvokes() {
- if (!DT) return CleanupSelectors(); // We require DominatorTree information.
-
if (!EHCatchAllValue) {
EHCatchAllValue =
F->getParent()->getNamedGlobal(".llvm.eh.catch.all.value");
@@ -307,14 +314,20 @@ bool DwarfEHPrepare::HandleURoRInvokes() {
if (!SelectorIntrinsic) return false;
}
+ SmallPtrSet<IntrinsicInst*, 32> Sels;
+ SmallPtrSet<IntrinsicInst*, 32> CatchAllSels;
+ FindAllCleanupSelectors(Sels, CatchAllSels);
+
+ if (!DT)
+ // We require DominatorTree information.
+ return CleanupSelectors(CatchAllSels);
+
if (!URoR) {
URoR = F->getParent()->getFunction("_Unwind_Resume_or_Rethrow");
- if (!URoR) return CleanupSelectors();
+ if (!URoR) return CleanupSelectors(CatchAllSels);
}
- SmallPtrSet<IntrinsicInst*, 32> Sels;
SmallPtrSet<InvokeInst*, 32> URoRInvokes;
- FindAllCleanupSelectors(Sels);
FindAllURoRInvokes(URoRInvokes);
SmallPtrSet<IntrinsicInst*, 32> SelsToConvert;
@@ -340,7 +353,8 @@ bool DwarfEHPrepare::HandleURoRInvokes() {
if (!ExceptionValueIntrinsic) {
ExceptionValueIntrinsic =
Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_exception);
- if (!ExceptionValueIntrinsic) return CleanupSelectors();
+ if (!ExceptionValueIntrinsic)
+ return CleanupSelectors(CatchAllSels);
}
for (Value::use_iterator
@@ -360,21 +374,9 @@ bool DwarfEHPrepare::HandleURoRInvokes() {
// an eh.selector intrinsic call. If the eh.selector is a 'clean-up', we
// need to convert it to a 'catch-all'.
for (SmallPtrSet<IntrinsicInst*, 8>::iterator
- SI = SelCalls.begin(), SE = SelCalls.end(); SI != SE; ++SI) {
- IntrinsicInst *II = *SI;
- unsigned NumOps = II->getNumOperands();
-
- if (NumOps <= 4) {
- bool IsCleanUp = (NumOps == 3);
-
- if (!IsCleanUp)
- if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getOperand(3)))
- IsCleanUp = (CI->getZExtValue() == 0);
-
- if (IsCleanUp)
- SelsToConvert.insert(II);
- }
- }
+ SI = SelCalls.begin(), SE = SelCalls.end(); SI != SE; ++SI)
+ if (!HasCatchAllInSelector(*SI))
+ SelsToConvert.insert(*SI);
}
}
}
@@ -388,12 +390,22 @@ bool DwarfEHPrepare::HandleURoRInvokes() {
SI = SelsToConvert.begin(), SE = SelsToConvert.end();
SI != SE; ++SI) {
IntrinsicInst *II = *SI;
- SmallVector<Value*, 8> Args;
// Use the exception object pointer and the personality function
// from the original selector.
- Args.push_back(II->getOperand(1)); // Exception object pointer.
- Args.push_back(II->getOperand(2)); // Personality function.
+ CallSite CS(II);
+ IntrinsicInst::op_iterator I = CS.arg_begin();
+ IntrinsicInst::op_iterator E = CS.arg_end();
+ IntrinsicInst::op_iterator B = prior(E);
+
+ // Exclude last argument if it is an integer.
+ if (isa<ConstantInt>(B)) E = B;
+
+ // Add exception object pointer (front).
+ // Add personality function (next).
+ // Add in any filter IDs (rest).
+ SmallVector<Value*, 8> Args(I, E);
+
Args.push_back(EHCatchAllValue->getInitializer()); // Catch-all indicator.
CallInst *NewSelector =
@@ -409,7 +421,7 @@ bool DwarfEHPrepare::HandleURoRInvokes() {
}
}
- Changed |= CleanupSelectors();
+ Changed |= CleanupSelectors(CatchAllSels);
return Changed;
}
diff --git a/lib/CodeGen/ELFCodeEmitter.cpp b/lib/CodeGen/ELFCodeEmitter.cpp
index 8416d3bda930..36b0e6514b3a 100644
--- a/lib/CodeGen/ELFCodeEmitter.cpp
+++ b/lib/CodeGen/ELFCodeEmitter.cpp
@@ -90,7 +90,7 @@ bool ELFCodeEmitter::finishFunction(MachineFunction &MF) {
for (std::vector<MachineRelocation>::iterator MRI = JTRelocations.begin(),
MRE = JTRelocations.end(); MRI != MRE; ++MRI) {
MachineRelocation &MR = *MRI;
- unsigned MBBOffset = getMachineBasicBlockAddress(MR.getBasicBlock());
+ uintptr_t MBBOffset = getMachineBasicBlockAddress(MR.getBasicBlock());
MR.setResultPointer((void*)MBBOffset);
MR.setConstantVal(ES->SectionIdx);
JTSection.addRelocation(MR);
diff --git a/lib/CodeGen/ExactHazardRecognizer.h b/lib/CodeGen/ExactHazardRecognizer.h
deleted file mode 100644
index 91c81a970fa5..000000000000
--- a/lib/CodeGen/ExactHazardRecognizer.h
+++ /dev/null
@@ -1,86 +0,0 @@
-//=- llvm/CodeGen/ExactHazardRecognizer.h - Scheduling Support -*- C++ -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the ExactHazardRecognizer class, which
-// implements hazard-avoidance heuristics for scheduling, based on the
-// scheduling itineraries specified for the target.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_EXACTHAZARDRECOGNIZER_H
-#define LLVM_CODEGEN_EXACTHAZARDRECOGNIZER_H
-
-#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/Target/TargetInstrItineraries.h"
-
-namespace llvm {
- class ExactHazardRecognizer : public ScheduleHazardRecognizer {
- // ScoreBoard to track function unit usage. ScoreBoard[0] is a
- // mask of the FUs in use in the cycle currently being
- // schedule. ScoreBoard[1] is a mask for the next cycle. The
- // ScoreBoard is used as a circular buffer with the current cycle
- // indicated by Head.
- class ScoreBoard {
- unsigned *Data;
-
- // The maximum number of cycles monitored by the Scoreboard. This
- // value is determined based on the target itineraries to ensure
- // that all hazards can be tracked.
- size_t Depth;
- // Indices into the Scoreboard that represent the current cycle.
- size_t Head;
- public:
- ScoreBoard():Data(NULL), Depth(0), Head(0) { }
- ~ScoreBoard() {
- delete[] Data;
- }
-
- size_t getDepth() const { return Depth; }
- unsigned& operator[](size_t idx) const {
- assert(Depth && "ScoreBoard was not initialized properly!");
-
- return Data[(Head + idx) % Depth];
- }
-
- void reset(size_t d = 1) {
- if (Data == NULL) {
- Depth = d;
- Data = new unsigned[Depth];
- }
-
- memset(Data, 0, Depth * sizeof(Data[0]));
- Head = 0;
- }
-
- void advance() {
- Head = (Head + 1) % Depth;
- }
-
- // Print the scoreboard.
- void dump() const;
- };
-
- // Itinerary data for the target.
- const InstrItineraryData &ItinData;
-
- ScoreBoard ReservedScoreboard;
- ScoreBoard RequiredScoreboard;
-
- public:
- ExactHazardRecognizer(const InstrItineraryData &ItinData);
-
- virtual HazardType getHazardType(SUnit *SU);
- virtual void Reset();
- virtual void EmitInstruction(SUnit *SU);
- virtual void AdvanceCycle();
- };
-}
-
-#endif
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index 790cb2164897..71506cc6abb9 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -271,7 +271,7 @@ bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
case Intrinsic::gcwrite:
if (LowerWr) {
// Replace a write barrier with a simple store.
- Value *St = new StoreInst(CI->getOperand(1), CI->getOperand(3), CI);
+ Value *St = new StoreInst(CI->getArgOperand(0), CI->getArgOperand(2), CI);
CI->replaceAllUsesWith(St);
CI->eraseFromParent();
}
@@ -279,7 +279,7 @@ bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
case Intrinsic::gcread:
if (LowerRd) {
// Replace a read barrier with a simple load.
- Value *Ld = new LoadInst(CI->getOperand(2), "", CI);
+ Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI);
Ld->takeName(CI);
CI->replaceAllUsesWith(Ld);
CI->eraseFromParent();
@@ -290,7 +290,7 @@ bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
// Initialize the GC root, but do not delete the intrinsic. The
// backend needs the intrinsic to flag the stack slot.
Roots.push_back(cast<AllocaInst>(
- CI->getOperand(1)->stripPointerCasts()));
+ CI->getArgOperand(0)->stripPointerCasts()));
}
break;
default:
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index c61fd17e7911..6b445e0b8e0f 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -20,6 +20,7 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -33,20 +34,22 @@ using namespace llvm;
static cl::opt<int> IfCvtFnStart("ifcvt-fn-start", cl::init(-1), cl::Hidden);
static cl::opt<int> IfCvtFnStop("ifcvt-fn-stop", cl::init(-1), cl::Hidden);
static cl::opt<int> IfCvtLimit("ifcvt-limit", cl::init(-1), cl::Hidden);
-static cl::opt<bool> DisableSimple("disable-ifcvt-simple",
+static cl::opt<bool> DisableSimple("disable-ifcvt-simple",
cl::init(false), cl::Hidden);
-static cl::opt<bool> DisableSimpleF("disable-ifcvt-simple-false",
+static cl::opt<bool> DisableSimpleF("disable-ifcvt-simple-false",
cl::init(false), cl::Hidden);
-static cl::opt<bool> DisableTriangle("disable-ifcvt-triangle",
+static cl::opt<bool> DisableTriangle("disable-ifcvt-triangle",
cl::init(false), cl::Hidden);
-static cl::opt<bool> DisableTriangleR("disable-ifcvt-triangle-rev",
+static cl::opt<bool> DisableTriangleR("disable-ifcvt-triangle-rev",
cl::init(false), cl::Hidden);
-static cl::opt<bool> DisableTriangleF("disable-ifcvt-triangle-false",
+static cl::opt<bool> DisableTriangleF("disable-ifcvt-triangle-false",
cl::init(false), cl::Hidden);
-static cl::opt<bool> DisableTriangleFR("disable-ifcvt-triangle-false-rev",
+static cl::opt<bool> DisableTriangleFR("disable-ifcvt-triangle-false-rev",
cl::init(false), cl::Hidden);
-static cl::opt<bool> DisableDiamond("disable-ifcvt-diamond",
+static cl::opt<bool> DisableDiamond("disable-ifcvt-diamond",
cl::init(false), cl::Hidden);
+static cl::opt<bool> IfCvtBranchFold("ifcvt-branch-fold",
+ cl::init(true), cl::Hidden);
STATISTIC(NumSimple, "Number of simple if-conversions performed");
STATISTIC(NumSimpleFalse, "Number of simple (F) if-conversions performed");
@@ -115,7 +118,7 @@ namespace {
BB(0), TrueBB(0), FalseBB(0) {}
};
- /// IfcvtToken - Record information about pending if-conversions to attemp:
+ /// IfcvtToken - Record information about pending if-conversions to attempt:
/// BBI - Corresponding BBInfo.
/// Kind - Type of block. See IfcvtKind.
/// NeedSubsumption - True if the to-be-predicated BB has already been
@@ -146,6 +149,7 @@ namespace {
const TargetLowering *TLI;
const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
bool MadeChange;
int FnNum;
public:
@@ -167,8 +171,7 @@ namespace {
std::vector<IfcvtToken*> &Tokens);
bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Cond,
bool isTriangle = false, bool RevBranch = false);
- bool AnalyzeBlocks(MachineFunction &MF,
- std::vector<IfcvtToken*> &Tokens);
+ void AnalyzeBlocks(MachineFunction &MF, std::vector<IfcvtToken*> &Tokens);
void InvalidatePreds(MachineBasicBlock *BB);
void RemoveExtraEdges(BBInfo &BBI);
bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind);
@@ -177,14 +180,22 @@ namespace {
unsigned NumDups1, unsigned NumDups2);
void PredicateBlock(BBInfo &BBI,
MachineBasicBlock::iterator E,
- SmallVectorImpl<MachineOperand> &Cond);
+ SmallVectorImpl<MachineOperand> &Cond,
+ SmallSet<unsigned, 4> &Redefs);
void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
SmallVectorImpl<MachineOperand> &Cond,
+ SmallSet<unsigned, 4> &Redefs,
bool IgnoreBr = false);
- void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI);
+ void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true);
- bool MeetIfcvtSizeLimit(unsigned Size) const {
- return Size > 0 && Size <= TLI->getIfCvtBlockSizeLimit();
+ bool MeetIfcvtSizeLimit(MachineBasicBlock &BB, unsigned Size) const {
+ return Size > 0 && TII->isProfitableToIfCvt(BB, Size);
+ }
+
+ bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB, unsigned TSize,
+ MachineBasicBlock &FBB, unsigned FSize) const {
+ return TSize > 0 && FSize > 0 &&
+ TII->isProfitableToIfCvt(TBB, TSize, FBB, FSize);
}
// blockAlwaysFallThrough - Block ends without a terminator.
@@ -227,8 +238,15 @@ FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); }
bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
TLI = MF.getTarget().getTargetLowering();
TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getTarget().getRegisterInfo();
if (!TII) return false;
+ // Tail merge tend to expose more if-conversion opportunities.
+ BranchFolder BF(true);
+ bool BFChange = BF.OptimizeFunction(MF, TII,
+ MF.getTarget().getRegisterInfo(),
+ getAnalysisIfAvailable<MachineModuleInfo>());
+
DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'"
<< MF.getFunction()->getName() << "\'");
@@ -253,7 +271,8 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
while (IfCvtLimit == -1 || (int)NumIfCvts < IfCvtLimit) {
// Do an initial analysis for each basic block and find all the potential
// candidates to perform if-conversion.
- bool Change = AnalyzeBlocks(MF, Tokens);
+ bool Change = false;
+ AnalyzeBlocks(MF, Tokens);
while (!Tokens.empty()) {
IfcvtToken *Token = Tokens.back();
Tokens.pop_back();
@@ -281,7 +300,8 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
case ICSimpleFalse: {
bool isFalse = Kind == ICSimpleFalse;
if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break;
- DEBUG(dbgs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? " false" :"")
+ DEBUG(dbgs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ?
+ " false" : "")
<< "): BB#" << BBI.BB->getNumber() << " ("
<< ((Kind == ICSimpleFalse)
? BBI.FalseBB->getNumber()
@@ -289,8 +309,8 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
RetVal = IfConvertSimple(BBI, Kind);
DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
if (RetVal) {
- if (isFalse) NumSimpleFalse++;
- else NumSimple++;
+ if (isFalse) ++NumSimpleFalse;
+ else ++NumSimple;
}
break;
}
@@ -316,11 +336,11 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
if (RetVal) {
if (isFalse) {
- if (isRev) NumTriangleFRev++;
- else NumTriangleFalse++;
+ if (isRev) ++NumTriangleFRev;
+ else ++NumTriangleFalse;
} else {
- if (isRev) NumTriangleRev++;
- else NumTriangle++;
+ if (isRev) ++NumTriangleRev;
+ else ++NumTriangle;
}
}
break;
@@ -332,7 +352,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
<< BBI.FalseBB->getNumber() << ") ");
RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2);
DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
- if (RetVal) NumDiamonds++;
+ if (RetVal) ++NumDiamonds;
break;
}
}
@@ -361,13 +381,14 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
Roots.clear();
BBAnalysis.clear();
- if (MadeChange) {
+ if (MadeChange && IfCvtBranchFold) {
BranchFolder BF(false);
BF.OptimizeFunction(MF, TII,
MF.getTarget().getRegisterInfo(),
getAnalysisIfAvailable<MachineModuleInfo>());
}
+ MadeChange |= BFChange;
return MadeChange;
}
@@ -387,9 +408,10 @@ static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
/// ReverseBranchCondition - Reverse the condition of the end of the block
/// branch. Swap block's 'true' and 'false' successors.
bool IfConverter::ReverseBranchCondition(BBInfo &BBI) {
+ DebugLoc dl; // FIXME: this is nowhere
if (!TII->ReverseBranchCondition(BBI.BrCond)) {
TII->RemoveBranch(*BBI.BB);
- TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond);
+ TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond, dl);
std::swap(BBI.TrueBB, BBI.FalseBB);
return true;
}
@@ -420,7 +442,7 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const {
if (TrueBBI.BB->pred_size() > 1) {
if (TrueBBI.CannotBeCopied ||
- TrueBBI.NonPredSize > TLI->getIfCvtDupBlockSizeLimit())
+ !TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize))
return false;
Dups = TrueBBI.NonPredSize;
}
@@ -431,7 +453,7 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const {
/// ValidTriangle - Returns true if the 'true' and 'false' blocks (along
/// with their common predecessor) forms a valid triangle shape for ifcvt.
/// If 'FalseBranch' is true, it checks if 'true' block's false branch
-/// branches to the false branch rather than the other way around. It also
+/// branches to the 'false' block rather than the other way around. It also
/// returns the number of instructions that the ifcvt would need to duplicate
/// if performed in 'Dups'.
bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
@@ -457,7 +479,7 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
++Size;
}
}
- if (Size > TLI->getIfCvtDupBlockSizeLimit())
+ if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size))
return false;
Dups = Size;
}
@@ -514,7 +536,27 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
MachineBasicBlock::iterator TI = TrueBBI.BB->begin();
MachineBasicBlock::iterator FI = FalseBBI.BB->begin();
- while (TI != TrueBBI.BB->end() && FI != FalseBBI.BB->end()) {
+ MachineBasicBlock::iterator TIE = TrueBBI.BB->end();
+ MachineBasicBlock::iterator FIE = FalseBBI.BB->end();
+ // Skip dbg_value instructions
+ while (TI != TIE && TI->isDebugValue())
+ ++TI;
+ while (FI != FIE && FI->isDebugValue())
+ ++FI;
+ while (TI != TIE && FI != FIE) {
+ // Skip dbg_value instructions. These do not count.
+ if (TI->isDebugValue()) {
+ while (TI != TIE && TI->isDebugValue())
+ ++TI;
+ if (TI == TIE)
+ break;
+ }
+ if (FI->isDebugValue()) {
+ while (FI != FIE && FI->isDebugValue())
+ ++FI;
+ if (FI == FIE)
+ break;
+ }
if (!TI->isIdenticalTo(FI))
break;
++Dups1;
@@ -524,7 +566,27 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
TI = firstNonBranchInst(TrueBBI.BB, TII);
FI = firstNonBranchInst(FalseBBI.BB, TII);
- while (TI != TrueBBI.BB->begin() && FI != FalseBBI.BB->begin()) {
+ MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
+ MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
+ // Skip dbg_value instructions at end of the bb's.
+ while (TI != TIB && TI->isDebugValue())
+ --TI;
+ while (FI != FIB && FI->isDebugValue())
+ --FI;
+ while (TI != TIB && FI != FIB) {
+ // Skip dbg_value instructions. These do not count.
+ if (TI->isDebugValue()) {
+ while (TI != TIB && TI->isDebugValue())
+ --TI;
+ if (TI == TIB)
+ break;
+ }
+ if (FI->isDebugValue()) {
+ while (FI != FIB && FI->isDebugValue())
+ --FI;
+ if (FI == FIB)
+ break;
+ }
if (!TI->isIdenticalTo(FI))
break;
++Dups2;
@@ -556,7 +618,7 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
// No false branch. This BB must end with a conditional branch and a
// fallthrough.
if (!BBI.FalseBB)
- BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB);
+ BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB);
if (!BBI.FalseBB) {
// Malformed bcc? True and false blocks are the same?
BBI.IsUnpredicable = true;
@@ -569,6 +631,9 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
BBI.ClobbersPred = false;
for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end();
I != E; ++I) {
+ if (I->isDebugValue())
+ continue;
+
const TargetInstrDesc &TID = I->getDesc();
if (TID.isNotDuplicable())
BBI.CannotBeCopied = true;
@@ -702,8 +767,8 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
bool FNeedSub = FalseBBI.Predicate.size() > 0;
bool Enqueued = false;
if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) &&
- MeetIfcvtSizeLimit(TrueBBI.NonPredSize - (Dups + Dups2)) &&
- MeetIfcvtSizeLimit(FalseBBI.NonPredSize - (Dups + Dups2)) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize - (Dups + Dups2),
+ *FalseBBI.BB, FalseBBI.NonPredSize - (Dups + Dups2)) &&
FeasibilityAnalysis(TrueBBI, BBI.BrCond) &&
FeasibilityAnalysis(FalseBBI, RevCond)) {
// Diamond:
@@ -720,7 +785,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
}
if (ValidTriangle(TrueBBI, FalseBBI, false, Dups) &&
- MeetIfcvtSizeLimit(TrueBBI.NonPredSize) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize) &&
FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) {
// Triangle:
// EBB
@@ -732,23 +797,23 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
Tokens.push_back(new IfcvtToken(BBI, ICTriangle, TNeedSub, Dups));
Enqueued = true;
}
-
+
if (ValidTriangle(TrueBBI, FalseBBI, true, Dups) &&
- MeetIfcvtSizeLimit(TrueBBI.NonPredSize) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize) &&
FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) {
Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups));
Enqueued = true;
}
if (ValidSimple(TrueBBI, Dups) &&
- MeetIfcvtSizeLimit(TrueBBI.NonPredSize) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize) &&
FeasibilityAnalysis(TrueBBI, BBI.BrCond)) {
// Simple (split, no rejoin):
// EBB
// | \_
// | |
// | TBB---> exit
- // |
+ // |
// FBB
Tokens.push_back(new IfcvtToken(BBI, ICSimple, TNeedSub, Dups));
Enqueued = true;
@@ -757,21 +822,21 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
if (CanRevCond) {
// Try the other path...
if (ValidTriangle(FalseBBI, TrueBBI, false, Dups) &&
- MeetIfcvtSizeLimit(FalseBBI.NonPredSize) &&
+ MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize) &&
FeasibilityAnalysis(FalseBBI, RevCond, true)) {
Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups));
Enqueued = true;
}
if (ValidTriangle(FalseBBI, TrueBBI, true, Dups) &&
- MeetIfcvtSizeLimit(FalseBBI.NonPredSize) &&
+ MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize) &&
FeasibilityAnalysis(FalseBBI, RevCond, true, true)) {
Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups));
Enqueued = true;
}
if (ValidSimple(FalseBBI, Dups) &&
- MeetIfcvtSizeLimit(FalseBBI.NonPredSize) &&
+ MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize) &&
FeasibilityAnalysis(FalseBBI, RevCond)) {
Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups));
Enqueued = true;
@@ -785,11 +850,9 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
}
/// AnalyzeBlocks - Analyze all blocks and find entries for all if-conversion
-/// candidates. It returns true if any CFG restructuring is done to expose more
-/// if-conversion opportunities.
-bool IfConverter::AnalyzeBlocks(MachineFunction &MF,
+/// candidates.
+void IfConverter::AnalyzeBlocks(MachineFunction &MF,
std::vector<IfcvtToken*> &Tokens) {
- bool Change = false;
std::set<MachineBasicBlock*> Visited;
for (unsigned i = 0, e = Roots.size(); i != e; ++i) {
for (idf_ext_iterator<MachineBasicBlock*> I=idf_ext_begin(Roots[i],Visited),
@@ -801,20 +864,23 @@ bool IfConverter::AnalyzeBlocks(MachineFunction &MF,
// Sort to favor more complex ifcvt scheme.
std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp);
-
- return Change;
}
/// canFallThroughTo - Returns true either if ToBB is the next block after BB or
/// that all the intervening blocks are empty (given BB can fall through to its
/// next block).
static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) {
- MachineFunction::iterator I = BB;
+ MachineFunction::iterator PI = BB;
+ MachineFunction::iterator I = llvm::next(PI);
MachineFunction::iterator TI = ToBB;
MachineFunction::iterator E = BB->getParent()->end();
- while (++I != TI)
- if (I == E || !I->empty())
+ while (I != TI) {
+ // Check isSuccessor to avoid case where the next block is empty, but
+ // it's not a successor.
+ if (I == E || !I->empty() || !PI->isSuccessor(I))
return false;
+ PI = I++;
+ }
return true;
}
@@ -836,8 +902,9 @@ void IfConverter::InvalidatePreds(MachineBasicBlock *BB) {
///
static void InsertUncondBranch(MachineBasicBlock *BB, MachineBasicBlock *ToBB,
const TargetInstrInfo *TII) {
+ DebugLoc dl; // FIXME: this is nowhere
SmallVector<MachineOperand, 0> NoCond;
- TII->InsertBranch(*BB, ToBB, NULL, NoCond);
+ TII->InsertBranch(*BB, ToBB, NULL, NoCond, dl);
}
/// RemoveExtraEdges - Remove true / false edges if either / both are no longer
@@ -849,6 +916,66 @@ void IfConverter::RemoveExtraEdges(BBInfo &BBI) {
BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
}
+/// InitPredRedefs / UpdatePredRedefs - Defs by predicated instructions are
+/// modeled as read + write (sort like two-address instructions). These
+/// routines track register liveness and add implicit uses to if-converted
+/// instructions to conform to the model.
+static void InitPredRedefs(MachineBasicBlock *BB, SmallSet<unsigned,4> &Redefs,
+ const TargetRegisterInfo *TRI) {
+ for (MachineBasicBlock::livein_iterator I = BB->livein_begin(),
+ E = BB->livein_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ Redefs.insert(Reg);
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg)
+ Redefs.insert(*Subreg);
+ }
+}
+
+static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs,
+ const TargetRegisterInfo *TRI,
+ bool AddImpUse = false) {
+ SmallVector<unsigned, 4> Defs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isDef())
+ Defs.push_back(Reg);
+ else if (MO.isKill()) {
+ Redefs.erase(Reg);
+ for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+ Redefs.erase(*SR);
+ }
+ }
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ unsigned Reg = Defs[i];
+ if (Redefs.count(Reg)) {
+ if (AddImpUse)
+ // Treat predicated update as read + write.
+ MI->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/,
+ true/*IsImp*/,false/*IsKill*/));
+ } else {
+ Redefs.insert(Reg);
+ for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+ Redefs.insert(*SR);
+ }
+ }
+}
+
+static void UpdatePredRedefs(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator E,
+ SmallSet<unsigned,4> &Redefs,
+ const TargetRegisterInfo *TRI) {
+ while (I != E) {
+ UpdatePredRedefs(I, Redefs, TRI);
+ ++I;
+ }
+}
+
/// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG.
///
bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
@@ -873,13 +1000,19 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
if (TII->ReverseBranchCondition(Cond))
assert(false && "Unable to reverse branch condition!");
+ // Initialize liveins to the first BB. These are potentiall redefined by
+ // predicated instructions.
+ SmallSet<unsigned, 4> Redefs;
+ InitPredRedefs(CvtBBI->BB, Redefs, TRI);
+ InitPredRedefs(NextBBI->BB, Redefs, TRI);
+
if (CvtBBI->BB->pred_size() > 1) {
BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
// Copy instructions in the true block, predicate them, and add them to
// the entry block.
- CopyAndPredicateBlock(BBI, *CvtBBI, Cond);
+ CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs);
} else {
- PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond);
+ PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs);
// Merge converted block into entry block.
BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
@@ -922,6 +1055,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
BBInfo *CvtBBI = &TrueBBI;
BBInfo *NextBBI = &FalseBBI;
+ DebugLoc dl; // FIXME: this is nowhere
SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
@@ -957,21 +1091,26 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
}
}
+ // Initialize liveins to the first BB. These are potentially redefined by
+ // predicated instructions.
+ SmallSet<unsigned, 4> Redefs;
+ InitPredRedefs(CvtBBI->BB, Redefs, TRI);
+ InitPredRedefs(NextBBI->BB, Redefs, TRI);
+
bool HasEarlyExit = CvtBBI->FalseBB != NULL;
- bool DupBB = CvtBBI->BB->pred_size() > 1;
- if (DupBB) {
+ if (CvtBBI->BB->pred_size() > 1) {
BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
// Copy instructions in the true block, predicate them, and add them to
// the entry block.
- CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true);
+ CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs, true);
} else {
// Predicate the 'true' block after removing its branch.
CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB);
- PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond);
+ PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs);
// Now merge the entry of the triangle with the true block.
BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
- MergeBlocks(BBI, *CvtBBI);
+ MergeBlocks(BBI, *CvtBBI, false);
}
// If 'true' block has a 'false' successor, add an exit branch to it.
@@ -980,7 +1119,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
CvtBBI->BrCond.end());
if (TII->ReverseBranchCondition(RevCond))
assert(false && "Unable to reverse branch condition!");
- TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond);
+ TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond, dl);
BBI.BB->addSuccessor(CvtBBI->FalseBB);
}
@@ -1009,7 +1148,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
RemoveExtraEdges(BBI);
// Update block info. BB can be iteratively if-converted.
- if (!IterIfcvt)
+ if (!IterIfcvt)
BBI.IsDone = true;
InvalidatePreds(BBI.BB);
CvtBBI->IsDone = true;
@@ -1044,9 +1183,9 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
return false;
}
- // Merge the 'true' and 'false' blocks by copying the instructions
- // from the 'false' block to the 'true' block. That is, unless the true
- // block would clobber the predicate, in that case, do the opposite.
+ // Put the predicated instructions from the 'true' block before the
+ // instructions from the 'false' block, unless the true block would clobber
+ // the predicate, in which case, do the opposite.
BBInfo *BBI1 = &TrueBBI;
BBInfo *BBI2 = &FalseBBI;
SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
@@ -1071,39 +1210,72 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
// Remove the conditional branch from entry to the blocks.
BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ // Initialize liveins to the first BB. These are potentially redefined by
+ // predicated instructions.
+ SmallSet<unsigned, 4> Redefs;
+ InitPredRedefs(BBI1->BB, Redefs, TRI);
+
// Remove the duplicated instructions at the beginnings of both paths.
MachineBasicBlock::iterator DI1 = BBI1->BB->begin();
MachineBasicBlock::iterator DI2 = BBI2->BB->begin();
+ MachineBasicBlock::iterator DIE1 = BBI1->BB->end();
+ MachineBasicBlock::iterator DIE2 = BBI2->BB->end();
+ // Skip dbg_value instructions
+ while (DI1 != DIE1 && DI1->isDebugValue())
+ ++DI1;
+ while (DI2 != DIE2 && DI2->isDebugValue())
+ ++DI2;
BBI1->NonPredSize -= NumDups1;
BBI2->NonPredSize -= NumDups1;
+
+ // Skip past the dups on each side separately since there may be
+ // differing dbg_value entries.
+ for (unsigned i = 0; i < NumDups1; ++DI1) {
+ if (!DI1->isDebugValue())
+ ++i;
+ }
while (NumDups1 != 0) {
- ++DI1;
++DI2;
- --NumDups1;
+ if (!DI2->isDebugValue())
+ --NumDups1;
}
+
+ UpdatePredRedefs(BBI1->BB->begin(), DI1, Redefs, TRI);
BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1);
BBI2->BB->erase(BBI2->BB->begin(), DI2);
// Predicate the 'true' block after removing its branch.
BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB);
DI1 = BBI1->BB->end();
- for (unsigned i = 0; i != NumDups2; ++i)
+ for (unsigned i = 0; i != NumDups2; ) {
+ // NumDups2 only counted non-dbg_value instructions, so this won't
+ // run off the head of the list.
+ assert (DI1 != BBI1->BB->begin());
--DI1;
+ // skip dbg_value instructions
+ if (!DI1->isDebugValue())
+ ++i;
+ }
BBI1->BB->erase(DI1, BBI1->BB->end());
- PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1);
+ PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, Redefs);
// Predicate the 'false' block.
BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB);
DI2 = BBI2->BB->end();
while (NumDups2 != 0) {
+ // NumDups2 only counted non-dbg_value instructions, so this won't
+ // run off the head of the list.
+ assert (DI2 != BBI2->BB->begin());
--DI2;
- --NumDups2;
+ // skip dbg_value instructions
+ if (!DI2->isDebugValue())
+ --NumDups2;
}
- PredicateBlock(*BBI2, DI2, *Cond2);
+ PredicateBlock(*BBI2, DI2, *Cond2, Redefs);
// Merge the true block into the entry of the diamond.
- MergeBlocks(BBI, *BBI1);
- MergeBlocks(BBI, *BBI2);
+ MergeBlocks(BBI, *BBI1, TailBB == 0);
+ MergeBlocks(BBI, *BBI2, TailBB == 0);
// If the if-converted block falls through or unconditionally branches into
// the tail block, and the tail block does not have other predecessors, then
@@ -1111,16 +1283,32 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
// tail, add a unconditional branch to it.
if (TailBB) {
BBInfo TailBBI = BBAnalysis[TailBB->getNumber()];
- if (TailBB->pred_size() == 1 && !TailBBI.HasFallThrough) {
- BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ bool CanMergeTail = !TailBBI.HasFallThrough;
+ // There may still be a fall-through edge from BBI1 or BBI2 to TailBB;
+ // check if there are any other predecessors besides those.
+ unsigned NumPreds = TailBB->pred_size();
+ if (NumPreds > 1)
+ CanMergeTail = false;
+ else if (NumPreds == 1 && CanMergeTail) {
+ MachineBasicBlock::pred_iterator PI = TailBB->pred_begin();
+ if (*PI != BBI1->BB && *PI != BBI2->BB)
+ CanMergeTail = false;
+ }
+ if (CanMergeTail) {
MergeBlocks(BBI, TailBBI);
TailBBI.IsDone = true;
} else {
+ BBI.BB->addSuccessor(TailBB);
InsertUncondBranch(BBI.BB, TailBB, TII);
BBI.HasFallThrough = false;
}
}
+ // RemoveExtraEdges won't work if the block has an unanalyzable branch,
+ // which can happen here if TailBB is unanalyzable and is merged, so
+ // explicitly remove BBI1 and BBI2 as successors.
+ BBI.BB->removeSuccessor(BBI1->BB);
+ BBI.BB->removeSuccessor(BBI2->BB);
RemoveExtraEdges(BBI);
// Update block info.
@@ -1135,9 +1323,10 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
/// specified end with the specified condition.
void IfConverter::PredicateBlock(BBInfo &BBI,
MachineBasicBlock::iterator E,
- SmallVectorImpl<MachineOperand> &Cond) {
+ SmallVectorImpl<MachineOperand> &Cond,
+ SmallSet<unsigned, 4> &Redefs) {
for (MachineBasicBlock::iterator I = BBI.BB->begin(); I != E; ++I) {
- if (TII->isPredicated(I))
+ if (I->isDebugValue() || TII->isPredicated(I))
continue;
if (!TII->PredicateInstruction(I, Cond)) {
#ifndef NDEBUG
@@ -1145,6 +1334,10 @@ void IfConverter::PredicateBlock(BBInfo &BBI,
#endif
llvm_unreachable(0);
}
+
+ // If the predicated instruction now redefines a register as the result of
+ // if-conversion, add an implicit kill.
+ UpdatePredRedefs(I, Redefs, TRI, true);
}
std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate));
@@ -1152,48 +1345,55 @@ void IfConverter::PredicateBlock(BBInfo &BBI,
BBI.IsAnalyzed = false;
BBI.NonPredSize = 0;
- NumIfConvBBs++;
+ ++NumIfConvBBs;
}
/// CopyAndPredicateBlock - Copy and predicate instructions from source BB to
/// the destination block. Skip end of block branches if IgnoreBr is true.
void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
SmallVectorImpl<MachineOperand> &Cond,
+ SmallSet<unsigned, 4> &Redefs,
bool IgnoreBr) {
MachineFunction &MF = *ToBBI.BB->getParent();
for (MachineBasicBlock::iterator I = FromBBI.BB->begin(),
E = FromBBI.BB->end(); I != E; ++I) {
const TargetInstrDesc &TID = I->getDesc();
- bool isPredicated = TII->isPredicated(I);
// Do not copy the end of the block branches.
- if (IgnoreBr && !isPredicated && TID.isBranch())
+ if (IgnoreBr && TID.isBranch())
break;
MachineInstr *MI = MF.CloneMachineInstr(I);
ToBBI.BB->insert(ToBBI.BB->end(), MI);
ToBBI.NonPredSize++;
- if (!isPredicated)
+ if (!TII->isPredicated(I) && !MI->isDebugValue()) {
if (!TII->PredicateInstruction(MI, Cond)) {
#ifndef NDEBUG
dbgs() << "Unable to predicate " << *I << "!\n";
#endif
llvm_unreachable(0);
}
+ }
+
+ // If the predicated instruction now redefines a register as the result of
+ // if-conversion, add an implicit kill.
+ UpdatePredRedefs(MI, Redefs, TRI, true);
}
- std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
- FromBBI.BB->succ_end());
- MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
- MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL;
+ if (!IgnoreBr) {
+ std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
+ FromBBI.BB->succ_end());
+ MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
+ MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL;
- for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
- MachineBasicBlock *Succ = Succs[i];
- // Fallthrough edge can't be transferred.
- if (Succ == FallThrough)
- continue;
- ToBBI.BB->addSuccessor(Succ);
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
+ MachineBasicBlock *Succ = Succs[i];
+ // Fallthrough edge can't be transferred.
+ if (Succ == FallThrough)
+ continue;
+ ToBBI.BB->addSuccessor(Succ);
+ }
}
std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(),
@@ -1203,25 +1403,18 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
ToBBI.IsAnalyzed = false;
- NumDupBBs++;
+ ++NumDupBBs;
}
/// MergeBlocks - Move all instructions from FromBB to the end of ToBB.
-///
-void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI) {
+/// This will leave FromBB as an empty block, so remove all of its
+/// successor edges except for the fall-through edge. If AddEdges is true,
+/// i.e., when FromBBI's branch is being moved, add those successor edges to
+/// ToBBI.
+void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
ToBBI.BB->splice(ToBBI.BB->end(),
FromBBI.BB, FromBBI.BB->begin(), FromBBI.BB->end());
- // Redirect all branches to FromBB to ToBB.
- std::vector<MachineBasicBlock *> Preds(FromBBI.BB->pred_begin(),
- FromBBI.BB->pred_end());
- for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
- MachineBasicBlock *Pred = Preds[i];
- if (Pred == ToBBI.BB)
- continue;
- Pred->ReplaceUsesOfBlockWith(FromBBI.BB, ToBBI.BB);
- }
-
std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
FromBBI.BB->succ_end());
MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
@@ -1233,7 +1426,8 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI) {
if (Succ == FallThrough)
continue;
FromBBI.BB->removeSuccessor(Succ);
- ToBBI.BB->addSuccessor(Succ);
+ if (AddEdges)
+ ToBBI.BB->addSuccessor(Succ);
}
// Now FromBBI always falls through to the next block!
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
new file mode 100644
index 000000000000..12adcaa3a22e
--- /dev/null
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -0,0 +1,408 @@
+//===-------- InlineSpiller.cpp - Insert spills and restores inline -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The inline spiller modifies the machine function directly instead of
+// inserting spills and restores in VirtRegMap.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "spiller"
+#include "Spiller.h"
+#include "VirtRegMap.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+class InlineSpiller : public Spiller {
+ MachineFunction &mf_;
+ LiveIntervals &lis_;
+ VirtRegMap &vrm_;
+ MachineFrameInfo &mfi_;
+ MachineRegisterInfo &mri_;
+ const TargetInstrInfo &tii_;
+ const TargetRegisterInfo &tri_;
+ const BitVector reserved_;
+
+ // Variables that are valid during spill(), but used by multiple methods.
+ LiveInterval *li_;
+ std::vector<LiveInterval*> *newIntervals_;
+ const TargetRegisterClass *rc_;
+ int stackSlot_;
+ const SmallVectorImpl<LiveInterval*> *spillIs_;
+
+ // Values of the current interval that can potentially remat.
+ SmallPtrSet<VNInfo*, 8> reMattable_;
+
+ // Values in reMattable_ that failed to remat at some point.
+ SmallPtrSet<VNInfo*, 8> usedValues_;
+
+ ~InlineSpiller() {}
+
+public:
+ InlineSpiller(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm)
+ : mf_(*mf), lis_(*lis), vrm_(*vrm),
+ mfi_(*mf->getFrameInfo()),
+ mri_(mf->getRegInfo()),
+ tii_(*mf->getTarget().getInstrInfo()),
+ tri_(*mf->getTarget().getRegisterInfo()),
+ reserved_(tri_.getReservedRegs(mf_)) {}
+
+ void spill(LiveInterval *li,
+ std::vector<LiveInterval*> &newIntervals,
+ SmallVectorImpl<LiveInterval*> &spillIs,
+ SlotIndex *earliestIndex);
+
+private:
+ bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx,
+ SlotIndex UseIdx);
+ bool reMaterializeFor(MachineBasicBlock::iterator MI);
+ void reMaterializeAll();
+
+ bool foldMemoryOperand(MachineBasicBlock::iterator MI,
+ const SmallVectorImpl<unsigned> &Ops);
+ void insertReload(LiveInterval &NewLI, MachineBasicBlock::iterator MI);
+ void insertSpill(LiveInterval &NewLI, MachineBasicBlock::iterator MI);
+};
+}
+
+namespace llvm {
+Spiller *createInlineSpiller(MachineFunction *mf,
+ LiveIntervals *lis,
+ const MachineLoopInfo *mli,
+ VirtRegMap *vrm) {
+ return new InlineSpiller(mf, lis, vrm);
+}
+}
+
+/// allUsesAvailableAt - Return true if all registers used by OrigMI at
+/// OrigIdx are also available with the same value at UseIdx.
+bool InlineSpiller::allUsesAvailableAt(const MachineInstr *OrigMI,
+ SlotIndex OrigIdx,
+ SlotIndex UseIdx) {
+ OrigIdx = OrigIdx.getUseIndex();
+ UseIdx = UseIdx.getUseIndex();
+ for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = OrigMI->getOperand(i);
+ if (!MO.isReg() || !MO.getReg() || MO.getReg() == li_->reg)
+ continue;
+ // Reserved registers are OK.
+ if (MO.isUndef() || !lis_.hasInterval(MO.getReg()))
+ continue;
+ // We don't want to move any defs.
+ if (MO.isDef())
+ return false;
+ // We cannot depend on virtual registers in spillIs_. They will be spilled.
+ for (unsigned si = 0, se = spillIs_->size(); si != se; ++si)
+ if ((*spillIs_)[si]->reg == MO.getReg())
+ return false;
+
+ LiveInterval &LI = lis_.getInterval(MO.getReg());
+ const VNInfo *OVNI = LI.getVNInfoAt(OrigIdx);
+ if (!OVNI)
+ continue;
+ if (OVNI != LI.getVNInfoAt(UseIdx))
+ return false;
+ }
+ return true;
+}
+
+/// reMaterializeFor - Attempt to rematerialize li_->reg before MI instead of
+/// reloading it.
+bool InlineSpiller::reMaterializeFor(MachineBasicBlock::iterator MI) {
+ SlotIndex UseIdx = lis_.getInstructionIndex(MI).getUseIndex();
+ VNInfo *OrigVNI = li_->getVNInfoAt(UseIdx);
+ if (!OrigVNI) {
+ DEBUG(dbgs() << "\tadding <undef> flags: ");
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isUse() && MO.getReg() == li_->reg)
+ MO.setIsUndef();
+ }
+ DEBUG(dbgs() << UseIdx << '\t' << *MI);
+ return true;
+ }
+ if (!reMattable_.count(OrigVNI)) {
+ DEBUG(dbgs() << "\tusing non-remat valno " << OrigVNI->id << ": "
+ << UseIdx << '\t' << *MI);
+ return false;
+ }
+ MachineInstr *OrigMI = lis_.getInstructionFromIndex(OrigVNI->def);
+ if (!allUsesAvailableAt(OrigMI, OrigVNI->def, UseIdx)) {
+ usedValues_.insert(OrigVNI);
+ DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << *MI);
+ return false;
+ }
+
+ // If the instruction also writes li_->reg, it had better not require the same
+ // register for uses and defs.
+ bool Reads, Writes;
+ SmallVector<unsigned, 8> Ops;
+ tie(Reads, Writes) = MI->readsWritesVirtualRegister(li_->reg, &Ops);
+ if (Writes) {
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(Ops[i]);
+ if (MO.isUse() ? MI->isRegTiedToDefOperand(Ops[i]) : MO.getSubReg()) {
+ usedValues_.insert(OrigVNI);
+ DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI);
+ return false;
+ }
+ }
+ }
+
+ // Alocate a new register for the remat.
+ unsigned NewVReg = mri_.createVirtualRegister(rc_);
+ vrm_.grow();
+ LiveInterval &NewLI = lis_.getOrCreateInterval(NewVReg);
+ NewLI.markNotSpillable();
+ newIntervals_->push_back(&NewLI);
+
+ // Finally we can rematerialize OrigMI before MI.
+ MachineBasicBlock &MBB = *MI->getParent();
+ tii_.reMaterialize(MBB, MI, NewLI.reg, 0, OrigMI, tri_);
+ MachineBasicBlock::iterator RematMI = MI;
+ SlotIndex DefIdx = lis_.InsertMachineInstrInMaps(--RematMI).getDefIndex();
+ DEBUG(dbgs() << "\tremat: " << DefIdx << '\t' << *RematMI);
+
+ // Replace operands
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(Ops[i]);
+ if (MO.isReg() && MO.isUse() && MO.getReg() == li_->reg) {
+ MO.setReg(NewVReg);
+ MO.setIsKill();
+ }
+ }
+ DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI);
+
+ VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, true,
+ lis_.getVNInfoAllocator());
+ NewLI.addRange(LiveRange(DefIdx, UseIdx.getDefIndex(), DefVNI));
+ DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
+ return true;
+}
+
+/// reMaterializeAll - Try to rematerialize as many uses of li_ as possible,
+/// and trim the live ranges after.
+void InlineSpiller::reMaterializeAll() {
+ // Do a quick scan of the interval values to find if any are remattable.
+ reMattable_.clear();
+ usedValues_.clear();
+ for (LiveInterval::const_vni_iterator I = li_->vni_begin(),
+ E = li_->vni_end(); I != E; ++I) {
+ VNInfo *VNI = *I;
+ if (VNI->isUnused() || !VNI->isDefAccurate())
+ continue;
+ MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def);
+ if (!DefMI || !tii_.isTriviallyReMaterializable(DefMI))
+ continue;
+ reMattable_.insert(VNI);
+ }
+
+ // Often, no defs are remattable.
+ if (reMattable_.empty())
+ return;
+
+ // Try to remat before all uses of li_->reg.
+ bool anyRemat = false;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ RI = mri_.use_nodbg_begin(li_->reg);
+ MachineInstr *MI = RI.skipInstruction();)
+ anyRemat |= reMaterializeFor(MI);
+
+ if (!anyRemat)
+ return;
+
+ // Remove any values that were completely rematted.
+ bool anyRemoved = false;
+ for (SmallPtrSet<VNInfo*, 8>::iterator I = reMattable_.begin(),
+ E = reMattable_.end(); I != E; ++I) {
+ VNInfo *VNI = *I;
+ if (VNI->hasPHIKill() || usedValues_.count(VNI))
+ continue;
+ MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def);
+ DEBUG(dbgs() << "\tremoving dead def: " << VNI->def << '\t' << *DefMI);
+ lis_.RemoveMachineInstrFromMaps(DefMI);
+ vrm_.RemoveMachineInstrFromMaps(DefMI);
+ DefMI->eraseFromParent();
+ li_->removeValNo(VNI);
+ anyRemoved = true;
+ }
+
+ if (!anyRemoved)
+ return;
+
+ // Removing values may cause debug uses where li_ is not live.
+ for (MachineRegisterInfo::use_iterator RI = mri_.use_begin(li_->reg);
+ MachineInstr *MI = RI.skipInstruction();) {
+ if (!MI->isDebugValue())
+ continue;
+ // Try to preserve the debug value if li_ is live immediately after it.
+ MachineBasicBlock::iterator NextMI = MI;
+ ++NextMI;
+ if (NextMI != MI->getParent()->end() && !lis_.isNotInMIMap(NextMI)) {
+ SlotIndex NearIdx = lis_.getInstructionIndex(NextMI);
+ if (li_->liveAt(NearIdx))
+ continue;
+ }
+ DEBUG(dbgs() << "Removing debug info due to remat:" << "\t" << *MI);
+ MI->eraseFromParent();
+ }
+}
+
+/// foldMemoryOperand - Try folding stack slot references in Ops into MI.
+/// Return true on success, and MI will be erased.
+bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI,
+ const SmallVectorImpl<unsigned> &Ops) {
+ // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied
+ // operands.
+ SmallVector<unsigned, 8> FoldOps;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ unsigned Idx = Ops[i];
+ MachineOperand &MO = MI->getOperand(Idx);
+ if (MO.isImplicit())
+ continue;
+ // FIXME: Teach targets to deal with subregs.
+ if (MO.getSubReg())
+ return false;
+ // Tied use operands should not be passed to foldMemoryOperand.
+ if (!MI->isRegTiedToDefOperand(Idx))
+ FoldOps.push_back(Idx);
+ }
+
+ MachineInstr *FoldMI = tii_.foldMemoryOperand(MI, FoldOps, stackSlot_);
+ if (!FoldMI)
+ return false;
+ lis_.ReplaceMachineInstrInMaps(MI, FoldMI);
+ vrm_.addSpillSlotUse(stackSlot_, FoldMI);
+ MI->eraseFromParent();
+ DEBUG(dbgs() << "\tfolded: " << *FoldMI);
+ return true;
+}
+
+/// insertReload - Insert a reload of NewLI.reg before MI.
+void InlineSpiller::insertReload(LiveInterval &NewLI,
+ MachineBasicBlock::iterator MI) {
+ MachineBasicBlock &MBB = *MI->getParent();
+ SlotIndex Idx = lis_.getInstructionIndex(MI).getDefIndex();
+ tii_.loadRegFromStackSlot(MBB, MI, NewLI.reg, stackSlot_, rc_, &tri_);
+ --MI; // Point to load instruction.
+ SlotIndex LoadIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
+ vrm_.addSpillSlotUse(stackSlot_, MI);
+ DEBUG(dbgs() << "\treload: " << LoadIdx << '\t' << *MI);
+ VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, 0, true,
+ lis_.getVNInfoAllocator());
+ NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI));
+}
+
+/// insertSpill - Insert a spill of NewLI.reg after MI.
+void InlineSpiller::insertSpill(LiveInterval &NewLI,
+ MachineBasicBlock::iterator MI) {
+ MachineBasicBlock &MBB = *MI->getParent();
+ SlotIndex Idx = lis_.getInstructionIndex(MI).getDefIndex();
+ tii_.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, stackSlot_, rc_, &tri_);
+ --MI; // Point to store instruction.
+ SlotIndex StoreIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
+ vrm_.addSpillSlotUse(stackSlot_, MI);
+ DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI);
+ VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, true,
+ lis_.getVNInfoAllocator());
+ NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI));
+}
+
+void InlineSpiller::spill(LiveInterval *li,
+ std::vector<LiveInterval*> &newIntervals,
+ SmallVectorImpl<LiveInterval*> &spillIs,
+ SlotIndex *earliestIndex) {
+ DEBUG(dbgs() << "Inline spilling " << *li << "\n");
+ assert(li->isSpillable() && "Attempting to spill already spilled value.");
+ assert(!li->isStackSlot() && "Trying to spill a stack slot.");
+
+ li_ = li;
+ newIntervals_ = &newIntervals;
+ rc_ = mri_.getRegClass(li->reg);
+ spillIs_ = &spillIs;
+
+ reMaterializeAll();
+
+ // Remat may handle everything.
+ if (li_->empty())
+ return;
+
+ stackSlot_ = vrm_.assignVirt2StackSlot(li->reg);
+
+ // Iterate over instructions using register.
+ for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(li->reg);
+ MachineInstr *MI = RI.skipInstruction();) {
+
+ // Debug values are not allowed to affect codegen.
+ if (MI->isDebugValue()) {
+ // Modify DBG_VALUE now that the value is in a spill slot.
+ uint64_t Offset = MI->getOperand(1).getImm();
+ const MDNode *MDPtr = MI->getOperand(2).getMetadata();
+ DebugLoc DL = MI->getDebugLoc();
+ if (MachineInstr *NewDV = tii_.emitFrameIndexDebugValue(mf_, stackSlot_,
+ Offset, MDPtr, DL)) {
+ DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
+ MachineBasicBlock *MBB = MI->getParent();
+ MBB->insert(MBB->erase(MI), NewDV);
+ } else {
+ DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI);
+ MI->eraseFromParent();
+ }
+ continue;
+ }
+
+ // Analyze instruction.
+ bool Reads, Writes;
+ SmallVector<unsigned, 8> Ops;
+ tie(Reads, Writes) = MI->readsWritesVirtualRegister(li->reg, &Ops);
+
+ // Attempt to fold memory ops.
+ if (foldMemoryOperand(MI, Ops))
+ continue;
+
+ // Allocate interval around instruction.
+ // FIXME: Infer regclass from instruction alone.
+ unsigned NewVReg = mri_.createVirtualRegister(rc_);
+ vrm_.grow();
+ LiveInterval &NewLI = lis_.getOrCreateInterval(NewVReg);
+ NewLI.markNotSpillable();
+
+ if (Reads)
+ insertReload(NewLI, MI);
+
+ // Rewrite instruction operands.
+ bool hasLiveDef = false;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(Ops[i]);
+ MO.setReg(NewVReg);
+ if (MO.isUse()) {
+ if (!MI->isRegTiedToDefOperand(Ops[i]))
+ MO.setIsKill();
+ } else {
+ if (!MO.isDead())
+ hasLiveDef = true;
+ }
+ }
+
+ // FIXME: Use a second vreg if instruction has no tied ops.
+ if (Writes && hasLiveDef)
+ insertSpill(NewLI, MI);
+
+ DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
+ newIntervals.push_back(&NewLI);
+ }
+}
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 63bb5f21f8f1..03ae214ae7da 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -16,6 +16,7 @@
#include "llvm/Module.h"
#include "llvm/Type.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/Support/CallSite.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/raw_ostream.h"
@@ -314,21 +315,22 @@ static Value *LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP) {
static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname,
const char *Dname,
const char *LDname) {
- switch (CI->getOperand(1)->getType()->getTypeID()) {
+ CallSite CS(CI);
+ switch (CI->getArgOperand(0)->getType()->getTypeID()) {
default: llvm_unreachable("Invalid type in intrinsic");
case Type::FloatTyID:
- ReplaceCallWith(Fname, CI, CI->op_begin() + 1, CI->op_end(),
+ ReplaceCallWith(Fname, CI, CS.arg_begin(), CS.arg_end(),
Type::getFloatTy(CI->getContext()));
break;
case Type::DoubleTyID:
- ReplaceCallWith(Dname, CI, CI->op_begin() + 1, CI->op_end(),
+ ReplaceCallWith(Dname, CI, CS.arg_begin(), CS.arg_end(),
Type::getDoubleTy(CI->getContext()));
break;
case Type::X86_FP80TyID:
case Type::FP128TyID:
case Type::PPC_FP128TyID:
- ReplaceCallWith(LDname, CI, CI->op_begin() + 1, CI->op_end(),
- CI->getOperand(1)->getType());
+ ReplaceCallWith(LDname, CI, CS.arg_begin(), CS.arg_end(),
+ CI->getArgOperand(0)->getType());
break;
}
}
@@ -340,6 +342,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
const Function *Callee = CI->getCalledFunction();
assert(Callee && "Cannot lower an indirect call!");
+ CallSite CS(CI);
switch (Callee->getIntrinsicID()) {
case Intrinsic::not_intrinsic:
report_fatal_error("Cannot lower a call to a non-intrinsic function '"+
@@ -353,7 +356,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
// by the lowerinvoke pass. In both cases, the right thing to do is to
// convert the call to an explicit setjmp or longjmp call.
case Intrinsic::setjmp: {
- Value *V = ReplaceCallWith("setjmp", CI, CI->op_begin() + 1, CI->op_end(),
+ Value *V = ReplaceCallWith("setjmp", CI, CS.arg_begin(), CS.arg_end(),
Type::getInt32Ty(Context));
if (!CI->getType()->isVoidTy())
CI->replaceAllUsesWith(V);
@@ -365,32 +368,32 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
break;
case Intrinsic::longjmp: {
- ReplaceCallWith("longjmp", CI, CI->op_begin() + 1, CI->op_end(),
+ ReplaceCallWith("longjmp", CI, CS.arg_begin(), CS.arg_end(),
Type::getVoidTy(Context));
break;
}
case Intrinsic::siglongjmp: {
// Insert the call to abort
- ReplaceCallWith("abort", CI, CI->op_end(), CI->op_end(),
+ ReplaceCallWith("abort", CI, CS.arg_end(), CS.arg_end(),
Type::getVoidTy(Context));
break;
}
case Intrinsic::ctpop:
- CI->replaceAllUsesWith(LowerCTPOP(Context, CI->getOperand(1), CI));
+ CI->replaceAllUsesWith(LowerCTPOP(Context, CI->getArgOperand(0), CI));
break;
case Intrinsic::bswap:
- CI->replaceAllUsesWith(LowerBSWAP(Context, CI->getOperand(1), CI));
+ CI->replaceAllUsesWith(LowerBSWAP(Context, CI->getArgOperand(0), CI));
break;
case Intrinsic::ctlz:
- CI->replaceAllUsesWith(LowerCTLZ(Context, CI->getOperand(1), CI));
+ CI->replaceAllUsesWith(LowerCTLZ(Context, CI->getArgOperand(0), CI));
break;
case Intrinsic::cttz: {
// cttz(x) -> ctpop(~X & (X-1))
- Value *Src = CI->getOperand(1);
+ Value *Src = CI->getArgOperand(0);
Value *NotSrc = Builder.CreateNot(Src);
NotSrc->setName(Src->getName() + ".not");
Value *SrcM1 = ConstantInt::get(Src->getType(), 1);
@@ -451,37 +454,37 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
case Intrinsic::memcpy: {
const IntegerType *IntPtr = TD.getIntPtrType(Context);
- Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr,
+ Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
/* isSigned */ false);
Value *Ops[3];
- Ops[0] = CI->getOperand(1);
- Ops[1] = CI->getOperand(2);
+ Ops[0] = CI->getArgOperand(0);
+ Ops[1] = CI->getArgOperand(1);
Ops[2] = Size;
- ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getOperand(1)->getType());
+ ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getArgOperand(0)->getType());
break;
}
case Intrinsic::memmove: {
const IntegerType *IntPtr = TD.getIntPtrType(Context);
- Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr,
+ Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
/* isSigned */ false);
Value *Ops[3];
- Ops[0] = CI->getOperand(1);
- Ops[1] = CI->getOperand(2);
+ Ops[0] = CI->getArgOperand(0);
+ Ops[1] = CI->getArgOperand(1);
Ops[2] = Size;
- ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getOperand(1)->getType());
+ ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getArgOperand(0)->getType());
break;
}
case Intrinsic::memset: {
const IntegerType *IntPtr = TD.getIntPtrType(Context);
- Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr,
+ Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
/* isSigned */ false);
Value *Ops[3];
- Ops[0] = CI->getOperand(1);
+ Ops[0] = CI->getArgOperand(0);
// Extend the amount to i32.
- Ops[1] = Builder.CreateIntCast(CI->getOperand(2), Type::getInt32Ty(Context),
+ Ops[1] = Builder.CreateIntCast(CI->getArgOperand(1), Type::getInt32Ty(Context),
/* isSigned */ false);
Ops[2] = Size;
- ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getOperand(1)->getType());
+ ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getArgOperand(0)->getType());
break;
}
case Intrinsic::sqrt: {
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index b584704bff3d..bf3137e49536 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -329,12 +329,15 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
if (OptLevel != CodeGenOpt::None)
PM.add(createOptimizePHIsPass());
- // Delete dead machine instructions regardless of optimization level.
- PM.add(createDeadMachineInstructionElimPass());
- printAndVerify(PM, "After codegen DCE pass",
- /* allowDoubleDefs= */ true);
-
if (OptLevel != CodeGenOpt::None) {
+ // With optimization, dead code should already be eliminated. However
+ // there is one known exception: lowered code for arguments that are only
+ // used by tail calls, where the tail calls reuse the incoming stack
+ // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
+ PM.add(createDeadMachineInstructionElimPass());
+ printAndVerify(PM, "After codegen DCE pass",
+ /* allowDoubleDefs= */ true);
+
PM.add(createOptimizeExtsPass());
if (!DisableMachineLICM)
PM.add(createMachineLICMPass());
@@ -358,7 +361,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
/* allowDoubleDefs= */ true);
// Perform register allocation.
- PM.add(createRegisterAllocator());
+ PM.add(createRegisterAllocator(OptLevel));
printAndVerify(PM, "After Register Allocation");
// Perform stack slot coloring and post-ra machine LICM.
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
index 03b4eab93dca..b9527fafbee8 100644
--- a/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -118,7 +118,7 @@ void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) {
SUnit *LatencyPriorityQueue::pop() {
if (empty()) return NULL;
std::vector<SUnit *>::iterator Best = Queue.begin();
- for (std::vector<SUnit *>::iterator I = next(Queue.begin()),
+ for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
E = Queue.end(); I != E; ++I)
if (Picker(*Best, *I))
Best = I;
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 025ad0538f2c..21a9b7d4db6f 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -68,6 +68,37 @@ bool LiveInterval::liveBeforeAndAt(SlotIndex I) const {
return r->end == I;
}
+/// killedAt - Return true if a live range ends at index. Note that the kill
+/// point is not contained in the half-open live range. It is usually the
+/// getDefIndex() slot following its last use.
+bool LiveInterval::killedAt(SlotIndex I) const {
+ Ranges::const_iterator r = std::lower_bound(ranges.begin(), ranges.end(), I);
+
+ // Now r points to the first interval with start >= I, or ranges.end().
+ if (r == ranges.begin())
+ return false;
+
+ --r;
+ // Now r points to the last interval with end <= I.
+ // r->end is the kill point.
+ return r->end == I;
+}
+
+/// killedInRange - Return true if the interval has kills in [Start,End).
+bool LiveInterval::killedInRange(SlotIndex Start, SlotIndex End) const {
+ Ranges::const_iterator r =
+ std::lower_bound(ranges.begin(), ranges.end(), End);
+
+ // Now r points to the first interval with start >= End, or ranges.end().
+ if (r == ranges.begin())
+ return false;
+
+ --r;
+ // Now r points to the last interval with end <= End.
+ // r->end is the kill point.
+ return r->end >= Start && r->end < End;
+}
+
// overlaps - Return true if the intersection of the two live intervals is
// not empty.
//
@@ -149,7 +180,6 @@ bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const {
void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) {
assert(I != ranges.end() && "Not a valid interval!");
VNInfo *ValNo = I->valno;
- SlotIndex OldEnd = I->end;
// Search for the first interval that we can't merge with.
Ranges::iterator MergeTo = next(I);
@@ -163,9 +193,6 @@ void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) {
// Erase any dead ranges.
ranges.erase(next(I), MergeTo);
- // Update kill info.
- ValNo->removeKills(OldEnd, I->end.getPrevSlot());
-
// If the newly formed range now touches the range after it and if they have
// the same value number, merge the two ranges into one range.
Ranges::iterator Next = next(I);
@@ -245,9 +272,6 @@ LiveInterval::addRangeFrom(LiveRange LR, iterator From) {
// endpoint as well.
if (End > it->end)
extendIntervalEndTo(it, End);
- else if (End < it->end)
- // Overlapping intervals, there might have been a kill here.
- it->valno->removeKill(End);
return it;
}
} else {
@@ -288,7 +312,6 @@ void LiveInterval::removeRange(SlotIndex Start, SlotIndex End,
VNInfo *ValNo = I->valno;
if (I->start == Start) {
if (I->end == End) {
- ValNo->removeKills(Start, End);
if (RemoveDeadValNo) {
// Check if val# is dead.
bool isDead = true;
@@ -296,7 +319,7 @@ void LiveInterval::removeRange(SlotIndex Start, SlotIndex End,
if (II != I && II->valno == ValNo) {
isDead = false;
break;
- }
+ }
if (isDead) {
// Now that ValNo is dead, remove it. If it is the largest value
// number, just nuke it (and any other deleted values neighboring it),
@@ -320,7 +343,6 @@ void LiveInterval::removeRange(SlotIndex Start, SlotIndex End,
// Otherwise if the span we are removing is at the end of the LiveRange,
// adjust the other way.
if (I->end == End) {
- ValNo->removeKills(Start, End);
I->end = Start;
return;
}
@@ -529,6 +551,7 @@ void LiveInterval::MergeValueInAsValue(
SmallVector<VNInfo*, 4> ReplacedValNos;
iterator IP = begin();
for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
+ assert(I->valno == RHS.getValNumInfo(I->valno->id) && "Bad VNInfo");
if (I->valno != RHSValNo)
continue;
SlotIndex Start = I->start, End = I->end;
@@ -823,10 +846,12 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
else {
OS << " = ";
for (LiveInterval::Ranges::const_iterator I = ranges.begin(),
- E = ranges.end(); I != E; ++I)
- OS << *I;
+ E = ranges.end(); I != E; ++I) {
+ OS << *I;
+ assert(I->valno == getValNumInfo(I->valno->id) && "Bad VNInfo");
+ }
}
-
+
// Print value number info.
if (getNumValNums()) {
OS << " ";
@@ -843,21 +868,6 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
OS << "?";
else
OS << vni->def;
- unsigned ee = vni->kills.size();
- if (ee || vni->hasPHIKill()) {
- OS << "-(";
- for (unsigned j = 0; j != ee; ++j) {
- OS << vni->kills[j];
- if (j != ee-1)
- OS << " ";
- }
- if (vni->hasPHIKill()) {
- if (ee)
- OS << " ";
- OS << "phi";
- }
- OS << ")";
- }
}
}
}
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index a6d38adeab04..194d03d8dbfb 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -50,9 +50,6 @@ using namespace llvm;
static cl::opt<bool> DisableReMat("disable-rematerialization",
cl::init(false), cl::Hidden);
-static cl::opt<bool> EnableFastSpilling("fast-spill",
- cl::init(false), cl::Hidden);
-
STATISTIC(numIntervals , "Number of original intervals");
STATISTIC(numFolds , "Number of loads/stores folded into instructions");
STATISTIC(numSplits , "Number of intervals split");
@@ -90,8 +87,8 @@ void LiveIntervals::releaseMemory() {
r2iMap_.clear();
- // Release VNInfo memroy regions after all VNInfo objects are dtor'd.
- VNInfoAllocator.DestroyAll();
+ // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd.
+ VNInfoAllocator.Reset();
while (!CloneMIs.empty()) {
MachineInstr *MI = CloneMIs.back();
CloneMIs.pop_back();
@@ -195,6 +192,10 @@ bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li,
if (tii_->isMoveInstr(MI, SrcReg, DstReg, SrcSubReg, DstSubReg))
if (SrcReg == li.reg || DstReg == li.reg)
continue;
+ if (MI.isCopy())
+ if (MI.getOperand(0).getReg() == li.reg ||
+ MI.getOperand(1).getReg() == li.reg)
+ continue;
// Check for operands using reg
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
@@ -218,10 +219,7 @@ bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li,
return false;
}
-/// conflictsWithSubPhysRegRef - Similar to conflictsWithPhysRegRef except
-/// it checks for sub-register reference and it can check use as well.
-bool LiveIntervals::conflictsWithSubPhysRegRef(LiveInterval &li,
- unsigned Reg, bool CheckUse,
+bool LiveIntervals::conflictsWithAliasRef(LiveInterval &li, unsigned Reg,
SmallPtrSet<MachineInstr*,32> &JoinedCopies) {
for (LiveInterval::Ranges::const_iterator
I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
@@ -239,12 +237,11 @@ bool LiveIntervals::conflictsWithSubPhysRegRef(LiveInterval &li,
MachineOperand& MO = MI->getOperand(i);
if (!MO.isReg())
continue;
- if (MO.isUse() && !CheckUse)
- continue;
unsigned PhysReg = MO.getReg();
- if (PhysReg == 0 || TargetRegisterInfo::isVirtualRegister(PhysReg))
+ if (PhysReg == 0 || PhysReg == Reg ||
+ TargetRegisterInfo::isVirtualRegister(PhysReg))
continue;
- if (tri_->isSubRegister(Reg, PhysReg))
+ if (tri_->regsOverlap(Reg, PhysReg))
return true;
}
}
@@ -272,7 +269,7 @@ bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) {
if (MO.getReg() == Reg && MO.isDef()) {
assert(MI.getOperand(MOIdx).getSubReg() != MO.getSubReg() &&
MI.getOperand(MOIdx).getSubReg() &&
- MO.getSubReg());
+ (MO.getSubReg() || MO.isImplicit()));
return true;
}
}
@@ -328,9 +325,10 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
MachineInstr *CopyMI = NULL;
unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
- if (mi->isExtractSubreg() || mi->isInsertSubreg() || mi->isSubregToReg() ||
- tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg))
+ if (mi->isCopyLike() ||
+ tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)) {
CopyMI = mi;
+ }
VNInfo *ValNo = interval.getNextValue(defIndex, CopyMI, true,
VNInfoAllocator);
@@ -356,7 +354,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
LiveRange LR(defIndex, killIdx, ValNo);
interval.addRange(LR);
DEBUG(dbgs() << " +" << LR << "\n");
- ValNo->addKill(killIdx);
return;
}
}
@@ -376,7 +373,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
// valno in the killing blocks.
assert(vi.AliveBlocks.empty() && "Phi join can't pass through blocks");
DEBUG(dbgs() << " phi-join");
- ValNo->addKill(indexes_->getTerminatorGap(mbb));
ValNo->setHasPHIKill(true);
} else {
// Iterate over all of the blocks that the variable is completely
@@ -407,7 +403,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
}
LiveRange LR(Start, killIdx, ValNo);
interval.addRange(LR);
- ValNo->addKill(killIdx);
DEBUG(dbgs() << " +" << LR);
}
@@ -434,11 +429,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
// are actually two values in the live interval. Because of this we
// need to take the LiveRegion that defines this register and split it
// into two values.
- // Two-address vregs should always only be redefined once. This means
- // that at this point, there should be exactly one value number in it.
- assert((PartReDef || interval.containsOneValue()) &&
- "Unexpected 2-addr liveint!");
- SlotIndex DefIndex = interval.getValNumInfo(0)->def.getDefIndex();
SlotIndex RedefIndex = MIIdx.getDefIndex();
if (MO.isEarlyClobber())
RedefIndex = MIIdx.getUseIndex();
@@ -446,8 +436,9 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
const LiveRange *OldLR =
interval.getLiveRangeContaining(RedefIndex.getUseIndex());
VNInfo *OldValNo = OldLR->valno;
+ SlotIndex DefIndex = OldValNo->def.getDefIndex();
- // Delete the initial value, which should be short and continuous,
+ // Delete the previous value, which should be short and continuous,
// because the 2-addr copy must be in the same MBB as the redef.
interval.removeRange(DefIndex, RedefIndex);
@@ -464,15 +455,14 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
// A re-def may be a copy. e.g. %reg1030:6<def> = VMOVD %reg1026, ...
unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
- if (PartReDef &&
- tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg))
+ if (PartReDef && (mi->isCopyLike() ||
+ tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)))
OldValNo->setCopy(&*mi);
// Add the new live interval which replaces the range for the input copy.
LiveRange LR(DefIndex, RedefIndex, ValNo);
DEBUG(dbgs() << " replace range with " << LR);
interval.addRange(LR);
- ValNo->addKill(RedefIndex);
// If this redefinition is dead, we need to add a dummy unit live
// range covering the def slot.
@@ -496,7 +486,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
VNInfo *ValNo;
MachineInstr *CopyMI = NULL;
unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
- if (mi->isExtractSubreg() || mi->isInsertSubreg() || mi->isSubregToReg()||
+ if (mi->isCopyLike() ||
tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg))
CopyMI = mi;
ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator);
@@ -504,7 +494,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
SlotIndex killIndex = getMBBEndIdx(mbb);
LiveRange LR(defIndex, killIndex, ValNo);
interval.addRange(LR);
- ValNo->addKill(indexes_->getTerminatorGap(mbb));
ValNo->setHasPHIKill(true);
DEBUG(dbgs() << " phi-join +" << LR);
} else {
@@ -600,7 +589,6 @@ exit:
ValNo->setHasRedefByEC(true);
LiveRange LR(start, end, ValNo);
interval.addRange(LR);
- LR.valno->addKill(end);
DEBUG(dbgs() << " +" << LR << '\n');
}
@@ -615,7 +603,7 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
else if (allocatableRegs_[MO.getReg()]) {
MachineInstr *CopyMI = NULL;
unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
- if (MI->isExtractSubreg() || MI->isInsertSubreg() || MI->isSubregToReg() ||
+ if (MI->isCopyLike() ||
tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg))
CopyMI = MI;
handlePhysicalRegisterDef(MBB, MI, MIIdx, MO,
@@ -701,7 +689,6 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
LiveRange LR(start, end, vni);
interval.addRange(LR);
- LR.valno->addKill(end);
DEBUG(dbgs() << " +" << LR << '\n');
}
@@ -787,37 +774,6 @@ LiveInterval* LiveIntervals::dupInterval(LiveInterval *li) {
return NewLI;
}
-/// getVNInfoSourceReg - Helper function that parses the specified VNInfo
-/// copy field and returns the source register that defines it.
-unsigned LiveIntervals::getVNInfoSourceReg(const VNInfo *VNI) const {
- if (!VNI->getCopy())
- return 0;
-
- if (VNI->getCopy()->isExtractSubreg()) {
- // If it's extracting out of a physical register, return the sub-register.
- unsigned Reg = VNI->getCopy()->getOperand(1).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- unsigned SrcSubReg = VNI->getCopy()->getOperand(2).getImm();
- unsigned DstSubReg = VNI->getCopy()->getOperand(0).getSubReg();
- if (SrcSubReg == DstSubReg)
- // %reg1034:3<def> = EXTRACT_SUBREG %EDX, 3
- // reg1034 can still be coalesced to EDX.
- return Reg;
- assert(DstSubReg == 0);
- Reg = tri_->getSubReg(Reg, VNI->getCopy()->getOperand(2).getImm());
- }
- return Reg;
- } else if (VNI->getCopy()->isInsertSubreg() ||
- VNI->getCopy()->isSubregToReg())
- return VNI->getCopy()->getOperand(2).getReg();
-
- unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
- if (tii_->isMoveInstr(*VNI->getCopy(), SrcReg, DstReg, SrcSubReg, DstSubReg))
- return SrcReg;
- llvm_unreachable("Unrecognized copy instruction!");
- return 0;
-}
-
//===----------------------------------------------------------------------===//
// Register allocator hooks.
//
@@ -991,22 +947,22 @@ bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI,
if (DefMI && (MRInfo & VirtRegMap::isMod))
return false;
- MachineInstr *fmi = isSS ? tii_->foldMemoryOperand(*mf_, MI, FoldOps, Slot)
- : tii_->foldMemoryOperand(*mf_, MI, FoldOps, DefMI);
+ MachineInstr *fmi = isSS ? tii_->foldMemoryOperand(MI, FoldOps, Slot)
+ : tii_->foldMemoryOperand(MI, FoldOps, DefMI);
if (fmi) {
// Remember this instruction uses the spill slot.
if (isSS) vrm.addSpillSlotUse(Slot, fmi);
// Attempt to fold the memory reference into the instruction. If
// we can do this, we don't need to insert spill code.
- MachineBasicBlock &MBB = *MI->getParent();
if (isSS && !mf_->getFrameInfo()->isImmutableObjectIndex(Slot))
vrm.virtFolded(Reg, MI, fmi, (VirtRegMap::ModRef)MRInfo);
vrm.transferSpillPts(MI, fmi);
vrm.transferRestorePts(MI, fmi);
vrm.transferEmergencySpills(MI, fmi);
ReplaceMachineInstrInMaps(MI, fmi);
- MI = MBB.insert(MBB.erase(MI), fmi);
+ MI->eraseFromParent();
+ MI = fmi;
++numFolds;
return true;
}
@@ -1098,7 +1054,6 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
if (!mop.isReg())
continue;
unsigned Reg = mop.getReg();
- unsigned RegI = Reg;
if (Reg == 0 || TargetRegisterInfo::isPhysicalRegister(Reg))
continue;
if (Reg != li.reg)
@@ -1140,26 +1095,8 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
//
// Keep track of whether we replace a use and/or def so that we can
// create the spill interval with the appropriate range.
-
- HasUse = mop.isUse();
- HasDef = mop.isDef();
SmallVector<unsigned, 2> Ops;
- Ops.push_back(i);
- for (unsigned j = i+1, e = MI->getNumOperands(); j != e; ++j) {
- const MachineOperand &MOj = MI->getOperand(j);
- if (!MOj.isReg())
- continue;
- unsigned RegJ = MOj.getReg();
- if (RegJ == 0 || TargetRegisterInfo::isPhysicalRegister(RegJ))
- continue;
- if (RegJ == RegI) {
- Ops.push_back(j);
- if (!MOj.isUndef()) {
- HasUse |= MOj.isUse();
- HasDef |= MOj.isDef();
- }
- }
- }
+ tie(HasUse, HasDef) = MI->readsWritesVirtualRegister(Reg, &Ops);
// Create a new virtual register for the spill interval.
// Create the new register now so we can map the fold instruction
@@ -1294,16 +1231,7 @@ bool LiveIntervals::anyKillInMBBAfterIdx(const LiveInterval &li,
const VNInfo *VNI,
MachineBasicBlock *MBB,
SlotIndex Idx) const {
- SlotIndex End = getMBBEndIdx(MBB);
- for (unsigned j = 0, ee = VNI->kills.size(); j != ee; ++j) {
- if (VNI->kills[j].isPHI())
- continue;
-
- SlotIndex KillIdx = VNI->kills[j];
- if (KillIdx > Idx && KillIdx <= End)
- return true;
- }
- return false;
+ return li.killedInRange(Idx.getNextSlot(), getMBBEndIdx(MBB));
}
/// RewriteInfo - Keep track of machine instrs that will be rewritten
@@ -1312,10 +1240,7 @@ namespace {
struct RewriteInfo {
SlotIndex Index;
MachineInstr *MI;
- bool HasUse;
- bool HasDef;
- RewriteInfo(SlotIndex i, MachineInstr *mi, bool u, bool d)
- : Index(i), MI(mi), HasUse(u), HasDef(d) {}
+ RewriteInfo(SlotIndex i, MachineInstr *mi) : Index(i), MI(mi) {}
};
struct RewriteInfoCompare {
@@ -1394,7 +1319,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
// easily see a situation where both registers are reloaded before
// the INSERT_SUBREG and both target registers that would overlap.
continue;
- RewriteMIs.push_back(RewriteInfo(index, MI, O.isUse(), O.isDef()));
+ RewriteMIs.push_back(RewriteInfo(index, MI));
}
std::sort(RewriteMIs.begin(), RewriteMIs.end(), RewriteInfoCompare());
@@ -1404,18 +1329,11 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
RewriteInfo &rwi = RewriteMIs[i];
++i;
SlotIndex index = rwi.Index;
- bool MIHasUse = rwi.HasUse;
- bool MIHasDef = rwi.HasDef;
MachineInstr *MI = rwi.MI;
// If MI def and/or use the same register multiple times, then there
// are multiple entries.
- unsigned NumUses = MIHasUse;
while (i != e && RewriteMIs[i].MI == MI) {
assert(RewriteMIs[i].Index == index);
- bool isUse = RewriteMIs[i].HasUse;
- if (isUse) ++NumUses;
- MIHasUse |= isUse;
- MIHasDef |= RewriteMIs[i].HasDef;
++i;
}
MachineBasicBlock *MBB = MI->getParent();
@@ -1440,7 +1358,8 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
// = use
// It's better to start a new interval to avoid artifically
// extend the new interval.
- if (MIHasDef && !MIHasUse) {
+ if (MI->readsWritesVirtualRegister(li.reg) ==
+ std::make_pair(false,true)) {
MBBVRegsMap.erase(MBB->getNumber());
ThisVReg = 0;
}
@@ -1652,103 +1571,9 @@ LiveIntervals::normalizeSpillWeights(std::vector<LiveInterval*> &NewLIs) {
}
std::vector<LiveInterval*> LiveIntervals::
-addIntervalsForSpillsFast(const LiveInterval &li,
- const MachineLoopInfo *loopInfo,
- VirtRegMap &vrm) {
- unsigned slot = vrm.assignVirt2StackSlot(li.reg);
-
- std::vector<LiveInterval*> added;
-
- assert(li.isSpillable() && "attempt to spill already spilled interval!");
-
- DEBUG({
- dbgs() << "\t\t\t\tadding intervals for spills for interval: ";
- li.dump();
- dbgs() << '\n';
- });
-
- const TargetRegisterClass* rc = mri_->getRegClass(li.reg);
-
- MachineRegisterInfo::reg_iterator RI = mri_->reg_begin(li.reg);
- while (RI != mri_->reg_end()) {
- MachineInstr* MI = &*RI;
-
- SmallVector<unsigned, 2> Indices;
- bool HasUse = false;
- bool HasDef = false;
-
- for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
- MachineOperand& mop = MI->getOperand(i);
- if (!mop.isReg() || mop.getReg() != li.reg) continue;
-
- HasUse |= MI->getOperand(i).isUse();
- HasDef |= MI->getOperand(i).isDef();
-
- Indices.push_back(i);
- }
-
- if (!tryFoldMemoryOperand(MI, vrm, NULL, getInstructionIndex(MI),
- Indices, true, slot, li.reg)) {
- unsigned NewVReg = mri_->createVirtualRegister(rc);
- vrm.grow();
- vrm.assignVirt2StackSlot(NewVReg, slot);
-
- // create a new register for this spill
- LiveInterval &nI = getOrCreateInterval(NewVReg);
- nI.markNotSpillable();
-
- // Rewrite register operands to use the new vreg.
- for (SmallVectorImpl<unsigned>::iterator I = Indices.begin(),
- E = Indices.end(); I != E; ++I) {
- MI->getOperand(*I).setReg(NewVReg);
-
- if (MI->getOperand(*I).isUse())
- MI->getOperand(*I).setIsKill(true);
- }
-
- // Fill in the new live interval.
- SlotIndex index = getInstructionIndex(MI);
- if (HasUse) {
- LiveRange LR(index.getLoadIndex(), index.getUseIndex(),
- nI.getNextValue(SlotIndex(), 0, false,
- getVNInfoAllocator()));
- DEBUG(dbgs() << " +" << LR);
- nI.addRange(LR);
- vrm.addRestorePoint(NewVReg, MI);
- }
- if (HasDef) {
- LiveRange LR(index.getDefIndex(), index.getStoreIndex(),
- nI.getNextValue(SlotIndex(), 0, false,
- getVNInfoAllocator()));
- DEBUG(dbgs() << " +" << LR);
- nI.addRange(LR);
- vrm.addSpillPoint(NewVReg, true, MI);
- }
-
- added.push_back(&nI);
-
- DEBUG({
- dbgs() << "\t\t\t\tadded new interval: ";
- nI.dump();
- dbgs() << '\n';
- });
- }
-
-
- RI = mri_->reg_begin(li.reg);
- }
-
- return added;
-}
-
-std::vector<LiveInterval*> LiveIntervals::
addIntervalsForSpills(const LiveInterval &li,
SmallVectorImpl<LiveInterval*> &SpillIs,
const MachineLoopInfo *loopInfo, VirtRegMap &vrm) {
-
- if (EnableFastSpilling)
- return addIntervalsForSpillsFast(li, loopInfo, vrm);
-
assert(li.isSpillable() && "attempt to spill already spilled interval!");
DEBUG({
@@ -2184,7 +2009,6 @@ LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
SlotIndex(getInstructionIndex(startInst).getDefIndex()),
startInst, true, getVNInfoAllocator());
VN->setHasPHIKill(true);
- VN->kills.push_back(indexes_->getTerminatorGap(startInst->getParent()));
LiveRange LR(
SlotIndex(getInstructionIndex(startInst).getDefIndex()),
getMBBEndIdx(startInst->getParent()), VN);
diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp
index 798b9b939cd3..709e2c6d5ca7 100644
--- a/lib/CodeGen/LiveStackAnalysis.cpp
+++ b/lib/CodeGen/LiveStackAnalysis.cpp
@@ -35,8 +35,8 @@ void LiveStacks::getAnalysisUsage(AnalysisUsage &AU) const {
}
void LiveStacks::releaseMemory() {
- // Release VNInfo memroy regions after all VNInfo objects are dtor'd.
- VNInfoAllocator.DestroyAll();
+ // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd.
+ VNInfoAllocator.Reset();
S2IMap.clear();
S2RCMap.clear();
}
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 079684eea079..41b891d30f23 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -286,7 +286,7 @@ MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) {
MachineInstr *LastDef = PhysRegDef[Reg];
MachineInstr *LastUse = PhysRegUse[Reg];
if (!LastDef && !LastUse)
- return false;
+ return 0;
MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef;
unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
@@ -609,7 +609,12 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
// Finally, if the last instruction in the block is a return, make sure to
// mark it as using all of the live-out values in the function.
- if (!MBB->empty() && MBB->back().getDesc().isReturn()) {
+ // Things marked both call and return are tail calls; do not do this for
+ // them. The tail callee need not take the same registers as input
+ // that it produces as output, and there are dependencies for its input
+ // registers elsewhere.
+ if (!MBB->empty() && MBB->back().getDesc().isReturn()
+ && !MBB->back().getDesc().isCall()) {
MachineInstr *Ret = &MBB->back();
for (MachineRegisterInfo::liveout_iterator
diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/LowerSubregs.cpp
index b0348a5b753c..dfd4eaeca660 100644
--- a/lib/CodeGen/LowerSubregs.cpp
+++ b/lib/CodeGen/LowerSubregs.cpp
@@ -53,15 +53,15 @@ namespace {
bool runOnMachineFunction(MachineFunction&);
private:
- bool LowerExtract(MachineInstr *MI);
- bool LowerInsert(MachineInstr *MI);
bool LowerSubregToReg(MachineInstr *MI);
+ bool LowerCopy(MachineInstr *MI);
void TransferDeadFlag(MachineInstr *MI, unsigned DstReg,
const TargetRegisterInfo *TRI);
void TransferKillFlag(MachineInstr *MI, unsigned SrcReg,
const TargetRegisterInfo *TRI,
bool AddIfNotFound = false);
+ void TransferImplicitDefs(MachineInstr *MI);
};
char LowerSubregsInstructionPass::ID = 0;
@@ -83,7 +83,7 @@ LowerSubregsInstructionPass::TransferDeadFlag(MachineInstr *MI,
if (MII->addRegisterDead(DstReg, TRI))
break;
assert(MII != MI->getParent()->begin() &&
- "copyRegToReg output doesn't reference destination register!");
+ "copyPhysReg output doesn't reference destination register!");
}
}
@@ -100,64 +100,24 @@ LowerSubregsInstructionPass::TransferKillFlag(MachineInstr *MI,
if (MII->addRegisterKilled(SrcReg, TRI, AddIfNotFound))
break;
assert(MII != MI->getParent()->begin() &&
- "copyRegToReg output doesn't reference source register!");
+ "copyPhysReg output doesn't reference source register!");
}
}
-bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) {
- MachineBasicBlock *MBB = MI->getParent();
-
- assert(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() &&
- MI->getOperand(1).isReg() && MI->getOperand(1).isUse() &&
- MI->getOperand(2).isImm() && "Malformed extract_subreg");
-
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned SuperReg = MI->getOperand(1).getReg();
- unsigned SubIdx = MI->getOperand(2).getImm();
- unsigned SrcReg = TRI->getSubReg(SuperReg, SubIdx);
-
- assert(TargetRegisterInfo::isPhysicalRegister(SuperReg) &&
- "Extract supperg source must be a physical register");
- assert(TargetRegisterInfo::isPhysicalRegister(DstReg) &&
- "Extract destination must be in a physical register");
- assert(SrcReg && "invalid subregister index for register");
-
- DEBUG(dbgs() << "subreg: CONVERTING: " << *MI);
-
- if (SrcReg == DstReg) {
- // No need to insert an identity copy instruction.
- if (MI->getOperand(1).isKill()) {
- // We must make sure the super-register gets killed. Replace the
- // instruction with KILL.
- MI->setDesc(TII->get(TargetOpcode::KILL));
- MI->RemoveOperand(2); // SubIdx
- DEBUG(dbgs() << "subreg: replace by: " << *MI);
- return true;
- }
-
- DEBUG(dbgs() << "subreg: eliminated!");
- } else {
- // Insert copy
- const TargetRegisterClass *TRCS = TRI->getPhysicalRegisterRegClass(DstReg);
- const TargetRegisterClass *TRCD = TRI->getPhysicalRegisterRegClass(SrcReg);
- bool Emitted = TII->copyRegToReg(*MBB, MI, DstReg, SrcReg, TRCD, TRCS,
- MI->getDebugLoc());
- (void)Emitted;
- assert(Emitted && "Subreg and Dst must be of compatible register class");
- // Transfer the kill/dead flags, if needed.
- if (MI->getOperand(0).isDead())
- TransferDeadFlag(MI, DstReg, TRI);
- if (MI->getOperand(1).isKill())
- TransferKillFlag(MI, SuperReg, TRI, true);
- DEBUG({
- MachineBasicBlock::iterator dMI = MI;
- dbgs() << "subreg: " << *(--dMI);
- });
+/// TransferImplicitDefs - MI is a pseudo-instruction, and the lowered
+/// replacement instructions immediately precede it. Copy any implicit-def
+/// operands from MI to the replacement instruction.
+void
+LowerSubregsInstructionPass::TransferImplicitDefs(MachineInstr *MI) {
+ MachineBasicBlock::iterator CopyMI = MI;
+ --CopyMI;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isImplicit() || MO.isUse())
+ continue;
+ CopyMI->addOperand(MachineOperand::CreateReg(MO.getReg(), true, true));
}
-
- DEBUG(dbgs() << '\n');
- MBB->erase(MI);
- return true;
}
bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) {
@@ -166,10 +126,10 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) {
MI->getOperand(1).isImm() &&
(MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) &&
MI->getOperand(3).isImm() && "Invalid subreg_to_reg");
-
+
unsigned DstReg = MI->getOperand(0).getReg();
unsigned InsReg = MI->getOperand(2).getReg();
- unsigned InsSIdx = MI->getOperand(2).getSubReg();
+ assert(!MI->getOperand(2).getSubReg() && "SubIdx on physreg?");
unsigned SubIdx = MI->getOperand(3).getImm();
assert(SubIdx != 0 && "Invalid index for insert_subreg");
@@ -182,27 +142,25 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) {
DEBUG(dbgs() << "subreg: CONVERTING: " << *MI);
- if (DstSubReg == InsReg && InsSIdx == 0) {
+ if (DstSubReg == InsReg) {
// No need to insert an identify copy instruction.
// Watch out for case like this:
- // %RAX<def> = ...
- // %RAX<def> = SUBREG_TO_REG 0, %EAX:3<kill>, 3
- // The first def is defining RAX, not EAX so the top bits were not
- // zero extended.
+ // %RAX<def> = SUBREG_TO_REG 0, %EAX<kill>, 3
+ // We must leave %RAX live.
+ if (DstReg != InsReg) {
+ MI->setDesc(TII->get(TargetOpcode::KILL));
+ MI->RemoveOperand(3); // SubIdx
+ MI->RemoveOperand(1); // Imm
+ DEBUG(dbgs() << "subreg: replace by: " << *MI);
+ return true;
+ }
DEBUG(dbgs() << "subreg: eliminated!");
} else {
- // Insert sub-register copy
- const TargetRegisterClass *TRC0= TRI->getPhysicalRegisterRegClass(DstSubReg);
- const TargetRegisterClass *TRC1= TRI->getPhysicalRegisterRegClass(InsReg);
- bool Emitted = TII->copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1,
- MI->getDebugLoc());
- (void)Emitted;
- assert(Emitted && "Subreg and Dst must be of compatible register class");
+ TII->copyPhysReg(*MBB, MI, MI->getDebugLoc(), DstSubReg, InsReg,
+ MI->getOperand(2).isKill());
// Transfer the kill/dead flags, if needed.
if (MI->getOperand(0).isDead())
TransferDeadFlag(MI, DstSubReg, TRI);
- if (MI->getOperand(2).isKill())
- TransferKillFlag(MI, InsReg, TRI);
DEBUG({
MachineBasicBlock::iterator dMI = MI;
dbgs() << "subreg: " << *(--dMI);
@@ -214,87 +172,39 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) {
return true;
}
-bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) {
- MachineBasicBlock *MBB = MI->getParent();
- assert((MI->getOperand(0).isReg() && MI->getOperand(0).isDef()) &&
- (MI->getOperand(1).isReg() && MI->getOperand(1).isUse()) &&
- (MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) &&
- MI->getOperand(3).isImm() && "Invalid insert_subreg");
-
- unsigned DstReg = MI->getOperand(0).getReg();
-#ifndef NDEBUG
- unsigned SrcReg = MI->getOperand(1).getReg();
-#endif
- unsigned InsReg = MI->getOperand(2).getReg();
- unsigned SubIdx = MI->getOperand(3).getImm();
+bool LowerSubregsInstructionPass::LowerCopy(MachineInstr *MI) {
+ MachineOperand &DstMO = MI->getOperand(0);
+ MachineOperand &SrcMO = MI->getOperand(1);
- assert(DstReg == SrcReg && "insert_subreg not a two-address instruction?");
- assert(SubIdx != 0 && "Invalid index for insert_subreg");
- unsigned DstSubReg = TRI->getSubReg(DstReg, SubIdx);
- assert(DstSubReg && "invalid subregister index for register");
- assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
- "Insert superreg source must be in a physical register");
- assert(TargetRegisterInfo::isPhysicalRegister(InsReg) &&
- "Inserted value must be in a physical register");
-
- DEBUG(dbgs() << "subreg: CONVERTING: " << *MI);
-
- if (DstSubReg == InsReg) {
- // No need to insert an identity copy instruction. If the SrcReg was
- // <undef>, we need to make sure it is alive by inserting a KILL
- if (MI->getOperand(1).isUndef() && !MI->getOperand(0).isDead()) {
- MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
- TII->get(TargetOpcode::KILL), DstReg);
- if (MI->getOperand(2).isUndef())
- MIB.addReg(InsReg, RegState::Undef);
- else
- MIB.addReg(InsReg, RegState::Kill);
- } else {
- DEBUG(dbgs() << "subreg: eliminated!\n");
- MBB->erase(MI);
+ if (SrcMO.getReg() == DstMO.getReg()) {
+ DEBUG(dbgs() << "identity copy: " << *MI);
+ // No need to insert an identity copy instruction, but replace with a KILL
+ // if liveness is changed.
+ if (DstMO.isDead() || SrcMO.isUndef() || MI->getNumOperands() > 2) {
+ // We must make sure the super-register gets killed. Replace the
+ // instruction with KILL.
+ MI->setDesc(TII->get(TargetOpcode::KILL));
+ DEBUG(dbgs() << "replaced by: " << *MI);
return true;
}
- } else {
- // Insert sub-register copy
- const TargetRegisterClass *TRC0= TRI->getPhysicalRegisterRegClass(DstSubReg);
- const TargetRegisterClass *TRC1= TRI->getPhysicalRegisterRegClass(InsReg);
- if (MI->getOperand(2).isUndef())
- // If the source register being inserted is undef, then this becomes a
- // KILL.
- BuildMI(*MBB, MI, MI->getDebugLoc(),
- TII->get(TargetOpcode::KILL), DstSubReg);
- else {
- bool Emitted = TII->copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1,
- MI->getDebugLoc());
- (void)Emitted;
- assert(Emitted && "Subreg and Dst must be of compatible register class");
- }
- MachineBasicBlock::iterator CopyMI = MI;
- --CopyMI;
-
- // INSERT_SUBREG is a two-address instruction so it implicitly kills SrcReg.
- if (!MI->getOperand(1).isUndef())
- CopyMI->addOperand(MachineOperand::CreateReg(DstReg, false, true, true));
-
- // Transfer the kill/dead flags, if needed.
- if (MI->getOperand(0).isDead()) {
- TransferDeadFlag(MI, DstSubReg, TRI);
- } else {
- // Make sure the full DstReg is live after this replacement.
- CopyMI->addOperand(MachineOperand::CreateReg(DstReg, true, true));
- }
-
- // Make sure the inserted register gets killed
- if (MI->getOperand(2).isKill() && !MI->getOperand(2).isUndef())
- TransferKillFlag(MI, InsReg, TRI);
+ // Vanilla identity copy.
+ MI->eraseFromParent();
+ return true;
}
- DEBUG({
- MachineBasicBlock::iterator dMI = MI;
- dbgs() << "subreg: " << *(--dMI) << "\n";
- });
+ DEBUG(dbgs() << "real copy: " << *MI);
+ TII->copyPhysReg(*MI->getParent(), MI, MI->getDebugLoc(),
+ DstMO.getReg(), SrcMO.getReg(), SrcMO.isKill());
- MBB->erase(MI);
+ if (DstMO.isDead())
+ TransferDeadFlag(MI, DstMO.getReg(), TRI);
+ if (MI->getNumOperands() > 2)
+ TransferImplicitDefs(MI);
+ DEBUG({
+ MachineBasicBlock::iterator dMI = MI;
+ dbgs() << "replaced by: " << *(--dMI);
+ });
+ MI->eraseFromParent();
return true;
}
@@ -317,12 +227,13 @@ bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) {
mi != me;) {
MachineBasicBlock::iterator nmi = llvm::next(mi);
MachineInstr *MI = mi;
- if (MI->isExtractSubreg()) {
- MadeChange |= LowerExtract(MI);
- } else if (MI->isInsertSubreg()) {
- MadeChange |= LowerInsert(MI);
- } else if (MI->isSubregToReg()) {
+ assert(!MI->isInsertSubreg() && "INSERT_SUBREG should no longer appear");
+ assert(MI->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
+ "EXTRACT_SUBREG should no longer appear");
+ if (MI->isSubregToReg()) {
MadeChange |= LowerSubregToReg(MI);
+ } else if (MI->isCopy()) {
+ MadeChange |= LowerCopy(MI);
}
mi = nmi;
}
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index eaaa1f85b563..a27ee479433b 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -13,7 +13,10 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/BasicBlock.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -136,6 +139,13 @@ void ilist_traits<MachineInstr>::deleteNode(MachineInstr* MI) {
Parent->getParent()->DeleteMachineInstr(MI);
}
+MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() {
+ iterator I = begin();
+ while (I != end() && I->isPHI())
+ ++I;
+ return I;
+}
+
MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {
iterator I = end();
while (I != begin() && (--I)->getDesc().isTerminator())
@@ -245,6 +255,7 @@ void MachineBasicBlock::updateTerminator() {
MachineBasicBlock *TBB = 0, *FBB = 0;
SmallVector<MachineOperand, 4> Cond;
+ DebugLoc dl; // FIXME: this is nowhere
bool B = TII->AnalyzeBranch(*this, TBB, FBB, Cond);
(void) B;
assert(!B && "UpdateTerminators requires analyzable predecessors!");
@@ -259,7 +270,7 @@ void MachineBasicBlock::updateTerminator() {
// its layout successor, insert a branch.
TBB = *succ_begin();
if (!isLayoutSuccessor(TBB))
- TII->InsertBranch(*this, TBB, 0, Cond);
+ TII->InsertBranch(*this, TBB, 0, Cond, dl);
}
} else {
if (FBB) {
@@ -270,10 +281,10 @@ void MachineBasicBlock::updateTerminator() {
if (TII->ReverseBranchCondition(Cond))
return;
TII->RemoveBranch(*this);
- TII->InsertBranch(*this, FBB, 0, Cond);
+ TII->InsertBranch(*this, FBB, 0, Cond, dl);
} else if (isLayoutSuccessor(FBB)) {
TII->RemoveBranch(*this);
- TII->InsertBranch(*this, TBB, 0, Cond);
+ TII->InsertBranch(*this, TBB, 0, Cond, dl);
}
} else {
// The block has a fallthrough conditional branch.
@@ -284,14 +295,14 @@ void MachineBasicBlock::updateTerminator() {
if (TII->ReverseBranchCondition(Cond)) {
// We can't reverse the condition, add an unconditional branch.
Cond.clear();
- TII->InsertBranch(*this, MBBA, 0, Cond);
+ TII->InsertBranch(*this, MBBA, 0, Cond, dl);
return;
}
TII->RemoveBranch(*this);
- TII->InsertBranch(*this, MBBA, 0, Cond);
+ TII->InsertBranch(*this, MBBA, 0, Cond, dl);
} else if (!isLayoutSuccessor(MBBA)) {
TII->RemoveBranch(*this);
- TII->InsertBranch(*this, TBB, MBBA, Cond);
+ TII->InsertBranch(*this, TBB, MBBA, Cond, dl);
}
}
}
@@ -331,12 +342,32 @@ void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) {
if (this == fromMBB)
return;
- for (MachineBasicBlock::succ_iterator I = fromMBB->succ_begin(),
- E = fromMBB->succ_end(); I != E; ++I)
- addSuccessor(*I);
+ while (!fromMBB->succ_empty()) {
+ MachineBasicBlock *Succ = *fromMBB->succ_begin();
+ addSuccessor(Succ);
+ fromMBB->removeSuccessor(Succ);
+ }
+}
+
+void
+MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) {
+ if (this == fromMBB)
+ return;
- while (!fromMBB->succ_empty())
- fromMBB->removeSuccessor(fromMBB->succ_begin());
+ while (!fromMBB->succ_empty()) {
+ MachineBasicBlock *Succ = *fromMBB->succ_begin();
+ addSuccessor(Succ);
+ fromMBB->removeSuccessor(Succ);
+
+ // Fix up any PHI nodes in the successor.
+ for (MachineBasicBlock::iterator MI = Succ->begin(), ME = Succ->end();
+ MI != ME && MI->isPHI(); ++MI)
+ for (unsigned i = 2, e = MI->getNumOperands()+1; i != e; i += 2) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.getMBB() == fromMBB)
+ MO.setMBB(this);
+ }
+ }
}
bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const {
@@ -395,6 +426,82 @@ bool MachineBasicBlock::canFallThrough() {
return FBB == 0;
}
+MachineBasicBlock *
+MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
+ MachineFunction *MF = getParent();
+ DebugLoc dl; // FIXME: this is nowhere
+
+ // We may need to update this's terminator, but we can't do that if AnalyzeBranch
+ // fails. If this uses a jump table, we won't touch it.
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (TII->AnalyzeBranch(*this, TBB, FBB, Cond))
+ return NULL;
+
+ MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
+ MF->insert(llvm::next(MachineFunction::iterator(this)), NMBB);
+ DEBUG(dbgs() << "PHIElimination splitting critical edge:"
+ " BB#" << getNumber()
+ << " -- BB#" << NMBB->getNumber()
+ << " -- BB#" << Succ->getNumber() << '\n');
+
+ ReplaceUsesOfBlockWith(Succ, NMBB);
+ updateTerminator();
+
+ // Insert unconditional "jump Succ" instruction in NMBB if necessary.
+ NMBB->addSuccessor(Succ);
+ if (!NMBB->isLayoutSuccessor(Succ)) {
+ Cond.clear();
+ MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, Succ, NULL, Cond, dl);
+ }
+
+ // Fix PHI nodes in Succ so they refer to NMBB instead of this
+ for (MachineBasicBlock::iterator i = Succ->begin(), e = Succ->end();
+ i != e && i->isPHI(); ++i)
+ for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2)
+ if (i->getOperand(ni+1).getMBB() == this)
+ i->getOperand(ni+1).setMBB(NMBB);
+
+ if (LiveVariables *LV =
+ P->getAnalysisIfAvailable<LiveVariables>())
+ LV->addNewBlock(NMBB, this, Succ);
+
+ if (MachineDominatorTree *MDT =
+ P->getAnalysisIfAvailable<MachineDominatorTree>())
+ MDT->addNewBlock(NMBB, this);
+
+ if (MachineLoopInfo *MLI =
+ P->getAnalysisIfAvailable<MachineLoopInfo>())
+ if (MachineLoop *TIL = MLI->getLoopFor(this)) {
+ // If one or the other blocks were not in a loop, the new block is not
+ // either, and thus LI doesn't need to be updated.
+ if (MachineLoop *DestLoop = MLI->getLoopFor(Succ)) {
+ if (TIL == DestLoop) {
+ // Both in the same loop, the NMBB joins loop.
+ DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase());
+ } else if (TIL->contains(DestLoop)) {
+ // Edge from an outer loop to an inner loop. Add to the outer loop.
+ TIL->addBasicBlockToLoop(NMBB, MLI->getBase());
+ } else if (DestLoop->contains(TIL)) {
+ // Edge from an inner loop to an outer loop. Add to the outer loop.
+ DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase());
+ } else {
+ // Edge from two loops with no containment relation. Because these
+ // are natural loops, we know that the destination block must be the
+ // header of its loop (adding a branch into a loop elsewhere would
+ // create an irreducible loop).
+ assert(DestLoop->getHeader() == Succ &&
+ "Should not create irreducible loops!");
+ if (MachineLoop *P = DestLoop->getParentLoop())
+ P->addBasicBlockToLoop(NMBB, MLI->getBase());
+ }
+ }
+ }
+
+ return NMBB;
+}
+
/// removeFromParent - This method unlinks 'this' from the containing function,
/// and returns it, but does not delete it.
MachineBasicBlock *MachineBasicBlock::removeFromParent() {
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 6f4f7a883409..833cc00027db 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -30,9 +30,7 @@ using namespace llvm;
STATISTIC(NumCoalesces, "Number of copies coalesced");
STATISTIC(NumCSEs, "Number of common subexpression eliminated");
-
-static cl::opt<bool> CSEPhysDef("machine-cse-phys-defs",
- cl::init(false), cl::Hidden);
+STATISTIC(NumPhysCSEs, "Number of phyreg defining common subexpr eliminated");
namespace {
class MachineCSE : public MachineFunctionPass {
@@ -128,6 +126,28 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
++NumCoalesces;
Changed = true;
}
+
+ if (!DefMI->isCopy())
+ continue;
+ SrcReg = DefMI->getOperand(1).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ continue;
+ if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg())
+ continue;
+ const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg);
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ const TargetRegisterClass *NewRC = getCommonSubClass(RC, SRC);
+ if (!NewRC)
+ continue;
+ DEBUG(dbgs() << "Coalescing: " << *DefMI);
+ DEBUG(dbgs() << "*** to: " << *MI);
+ MO.setReg(SrcReg);
+ MRI->clearKillFlags(SrcReg);
+ if (NewRC != SRC)
+ MRI->setRegClass(SrcReg, NewRC);
+ DefMI->eraseFromParent();
+ ++NumCoalesces;
+ Changed = true;
}
return Changed;
@@ -172,7 +192,8 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
/// hasLivePhysRegDefUse - Return true if the specified instruction read / write
/// physical registers (except for dead defs of physical registers). It also
-/// returns the physical register def by reference if it's the only one.
+/// returns the physical register def by reference if it's the only one and the
+/// instruction does not uses a physical register.
bool MachineCSE::hasLivePhysRegDefUse(const MachineInstr *MI,
const MachineBasicBlock *MBB,
unsigned &PhysDef) const {
@@ -186,9 +207,11 @@ bool MachineCSE::hasLivePhysRegDefUse(const MachineInstr *MI,
continue;
if (TargetRegisterInfo::isVirtualRegister(Reg))
continue;
- if (MO.isUse())
+ if (MO.isUse()) {
// Can't touch anything to read a physical register.
+ PhysDef = 0;
return true;
+ }
if (MO.isDead())
// If the def is dead, it's ok.
continue;
@@ -240,8 +263,8 @@ bool MachineCSE::PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI,
static bool isCopy(const MachineInstr *MI, const TargetInstrInfo *TII) {
unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
- return TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) ||
- MI->isExtractSubreg() || MI->isInsertSubreg() || MI->isSubregToReg();
+ return MI->isCopyLike() ||
+ TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx);
}
bool MachineCSE::isCSECandidate(MachineInstr *MI) {
@@ -356,6 +379,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
if (!isCSECandidate(MI))
continue;
+ bool DefPhys = false;
bool FoundCSE = VNT.count(MI);
if (!FoundCSE) {
// Look for trivial copy coalescing opportunities.
@@ -376,11 +400,13 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// ... Unless the CS is local and it also defines the physical register
// which is not clobbered in between.
- if (PhysDef && CSEPhysDef) {
+ if (PhysDef) {
unsigned CSVN = VNT.lookup(MI);
MachineInstr *CSMI = Exps[CSVN];
- if (PhysRegDefReaches(CSMI, MI, PhysDef))
+ if (PhysRegDefReaches(CSMI, MI, PhysDef)) {
FoundCSE = true;
+ DefPhys = true;
+ }
}
}
@@ -426,6 +452,8 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
}
MI->eraseFromParent();
++NumCSEs;
+ if (DefPhys)
+ ++NumPhysCSEs;
} else {
DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
VNT.insert(MI, CurrVN++);
diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp
index 408873903b0d..b5f8fbba99de 100644
--- a/lib/CodeGen/MachineDominators.cpp
+++ b/lib/CodeGen/MachineDominators.cpp
@@ -46,7 +46,6 @@ MachineDominatorTree::MachineDominatorTree()
}
MachineDominatorTree::~MachineDominatorTree() {
- DT->releaseMemory();
delete DT;
}
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index a38c881982e7..666120f032c6 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -378,7 +378,7 @@ void MachineFunction::viewCFG() const
#ifndef NDEBUG
ViewGraph(this, "mf" + getFunction()->getNameStr());
#else
- errs() << "SelectionDAG::viewGraph is only available in debug builds on "
+ errs() << "MachineFunction::viewCFG is only available in debug builds on "
<< "systems with Graphviz or gv!\n";
#endif // NDEBUG
}
@@ -388,7 +388,7 @@ void MachineFunction::viewCFGOnly() const
#ifndef NDEBUG
ViewGraph(this, "mf" + getFunction()->getNameStr(), true);
#else
- errs() << "SelectionDAG::viewGraph is only available in debug builds on "
+ errs() << "MachineFunction::viewCFGOnly is only available in debug builds on "
<< "systems with Graphviz or gv!\n";
#endif // NDEBUG
}
@@ -438,10 +438,16 @@ MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
/// index with a negative value.
///
int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
- bool Immutable, bool isSS) {
+ bool Immutable) {
assert(Size != 0 && "Cannot allocate zero size fixed stack objects!");
- Objects.insert(Objects.begin(), StackObject(Size, 1, SPOffset, Immutable,
- isSS));
+ // The alignment of the frame index can be determined from its offset from
+ // the incoming frame position. If the frame object is at offset 32 and
+ // the stack is guaranteed to be 16-byte aligned, then we know that the
+ // object is 16-byte aligned.
+ unsigned StackAlign = TFI.getStackAlignment();
+ unsigned Align = MinAlign(SPOffset, StackAlign);
+ Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
+ /*isSS*/false));
return -++NumFixedObjects;
}
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index e54cd5cf9492..6b2e98549c71 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -111,6 +111,26 @@ void MachineOperand::setReg(unsigned Reg) {
Contents.Reg.RegNo = Reg;
}
+void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx,
+ const TargetRegisterInfo &TRI) {
+ assert(TargetRegisterInfo::isVirtualRegister(Reg));
+ if (SubIdx && getSubReg())
+ SubIdx = TRI.composeSubRegIndices(SubIdx, getSubReg());
+ setReg(Reg);
+ if (SubIdx)
+ setSubReg(SubIdx);
+}
+
+void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) {
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+ if (getSubReg()) {
+ Reg = TRI.getSubReg(Reg, getSubReg());
+ assert(Reg && "Invalid SubReg for physical register");
+ setSubReg(0);
+ }
+ setReg(Reg);
+}
+
/// ChangeToImmediate - Replace this operand with a new immediate operand of
/// the specified value. If an operand is known to be an immediate already,
/// the setImm method should be used.
@@ -861,14 +881,14 @@ int MachineInstr::findFirstPredOperandIdx() const {
bool MachineInstr::
isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const {
if (isInlineAsm()) {
- assert(DefOpIdx >= 2);
+ assert(DefOpIdx >= 3);
const MachineOperand &MO = getOperand(DefOpIdx);
if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0)
return false;
// Determine the actual operand index that corresponds to this index.
unsigned DefNo = 0;
unsigned DefPart = 0;
- for (unsigned i = 1, e = getNumOperands(); i < e; ) {
+ for (unsigned i = 2, e = getNumOperands(); i < e; ) {
const MachineOperand &FMO = getOperand(i);
// After the normal asm operands there may be additional imp-def regs.
if (!FMO.isImm())
@@ -883,7 +903,7 @@ isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const {
}
++DefNo;
}
- for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
+ for (unsigned i = 2, e = getNumOperands(); i != e; ++i) {
const MachineOperand &FMO = getOperand(i);
if (!FMO.isImm())
continue;
@@ -926,7 +946,7 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const {
// Find the flag operand corresponding to UseOpIdx
unsigned FlagIdx, NumOps=0;
- for (FlagIdx = 1; FlagIdx < UseOpIdx; FlagIdx += NumOps+1) {
+ for (FlagIdx = 2; FlagIdx < UseOpIdx; FlagIdx += NumOps+1) {
const MachineOperand &UFMO = getOperand(FlagIdx);
// After the normal asm operands there may be additional imp-def regs.
if (!UFMO.isImm())
@@ -944,9 +964,9 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const {
if (!DefOpIdx)
return true;
- unsigned DefIdx = 1;
- // Remember to adjust the index. First operand is asm string, then there
- // is a flag for each.
+ unsigned DefIdx = 2;
+ // Remember to adjust the index. First operand is asm string, second is
+ // the AlignStack bit, then there is a flag for each.
while (DefNo) {
const MachineOperand &FMO = getOperand(DefIdx);
assert(FMO.isImm());
@@ -1017,6 +1037,29 @@ void MachineInstr::copyPredicates(const MachineInstr *MI) {
}
}
+void MachineInstr::substituteRegister(unsigned FromReg,
+ unsigned ToReg,
+ unsigned SubIdx,
+ const TargetRegisterInfo &RegInfo) {
+ if (TargetRegisterInfo::isPhysicalRegister(ToReg)) {
+ if (SubIdx)
+ ToReg = RegInfo.getSubReg(ToReg, SubIdx);
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || MO.getReg() != FromReg)
+ continue;
+ MO.substPhysReg(ToReg, RegInfo);
+ }
+ } else {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || MO.getReg() != FromReg)
+ continue;
+ MO.substVirtReg(ToReg, SubIdx, RegInfo);
+ }
+ }
+}
+
/// isSafeToMove - Return true if it is safe to move this instruction. If
/// SawStore is set to true, it means that there is a store (or call) between
/// the instruction's location and its intended destination.
@@ -1168,6 +1211,28 @@ void MachineInstr::dump() const {
dbgs() << " " << *this;
}
+static void printDebugLoc(DebugLoc DL, const MachineFunction *MF,
+ raw_ostream &CommentOS) {
+ const LLVMContext &Ctx = MF->getFunction()->getContext();
+ if (!DL.isUnknown()) { // Print source line info.
+ DIScope Scope(DL.getScope(Ctx));
+ // Omit the directory, because it's likely to be long and uninteresting.
+ if (Scope.Verify())
+ CommentOS << Scope.getFilename();
+ else
+ CommentOS << "<unknown>";
+ CommentOS << ':' << DL.getLine();
+ if (DL.getCol() != 0)
+ CommentOS << ':' << DL.getCol();
+ DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx));
+ if (!InlinedAtDL.isUnknown()) {
+ CommentOS << " @[ ";
+ printDebugLoc(InlinedAtDL, MF, CommentOS);
+ CommentOS << " ]";
+ }
+ }
+}
+
void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
// We can be a bit tidier if we know the TargetMachine and/or MachineFunction.
const MachineFunction *MF = 0;
@@ -1240,6 +1305,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
OS << "!\"" << MDS->getString() << '\"';
else
MO.print(OS, TM);
+ } else if (TM && (isInsertSubreg() || isRegSequence()) && MO.isImm()) {
+ OS << TM->getRegisterInfo()->getSubRegIndexName(MO.getImm());
} else
MO.print(OS, TM);
}
@@ -1265,19 +1332,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
if (!debugLoc.isUnknown() && MF) {
if (!HaveSemi) OS << ";";
-
- // TODO: print InlinedAtLoc information
-
- DIScope Scope(debugLoc.getScope(MF->getFunction()->getContext()));
OS << " dbg:";
- // Omit the directory, since it's usually long and uninteresting.
- if (Scope.Verify())
- OS << Scope.getFilename();
- else
- OS << "<unknown>";
- OS << ':' << debugLoc.getLine();
- if (debugLoc.getCol() != 0)
- OS << ':' << debugLoc.getCol();
+ printDebugLoc(debugLoc, MF, OS);
}
OS << "\n";
@@ -1418,6 +1474,25 @@ void MachineInstr::addRegisterDefined(unsigned IncomingReg,
true /*IsImp*/));
}
+void MachineInstr::setPhysRegsDeadExcept(const SmallVectorImpl<unsigned> &UsedRegs,
+ const TargetRegisterInfo &TRI) {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ bool Dead = true;
+ for (SmallVectorImpl<unsigned>::const_iterator I = UsedRegs.begin(),
+ E = UsedRegs.end(); I != E; ++I)
+ if (TRI.regsOverlap(*I, Reg)) {
+ Dead = false;
+ break;
+ }
+ // If there are no uses, including partial uses, the def is dead.
+ if (Dead) MO.setIsDead();
+ }
+}
+
unsigned
MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
unsigned Hash = MI->getOpcode() * 37;
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 61206173e645..956d21c0b34b 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -62,6 +62,7 @@ namespace {
// State that is updated as we process loops
bool Changed; // True if a loop is changed.
+ bool FirstInLoop; // True if it's the first LICM in the loop.
MachineLoop *CurLoop; // The current loop we are working on.
MachineBasicBlock *CurPreheader; // The preheader for CurLoop.
@@ -82,7 +83,6 @@ namespace {
const char *getPassName() const { return "Machine Instruction LICM"; }
- // FIXME: Loop preheaders?
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<MachineLoopInfo>();
@@ -127,8 +127,8 @@ namespace {
void AddToLiveIns(unsigned Reg);
/// IsLICMCandidate - Returns true if the instruction may be a suitable
- /// candidate for LICM. e.g. If the instruction is a call, then it's obviously
- /// not safe to hoist it.
+ /// candidate for LICM. e.g. If the instruction is a call, then it's
+ /// obviously not safe to hoist it.
bool IsLICMCandidate(MachineInstr &I);
/// IsLoopInvariantInst - Returns true if the instruction is loop
@@ -181,6 +181,10 @@ namespace {
/// current loop preheader that may become duplicates of instructions that
/// are hoisted out of the loop.
void InitCSEMap(MachineBasicBlock *BB);
+
+ /// getCurPreheader - Get the preheader for the current loop, splitting
+ /// a critical edge if needed.
+ MachineBasicBlock *getCurPreheader();
};
} // end anonymous namespace
@@ -192,12 +196,17 @@ FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) {
return new MachineLICM(PreRegAlloc);
}
-/// LoopIsOuterMostWithPreheader - Test if the given loop is the outer-most
-/// loop that has a preheader.
-static bool LoopIsOuterMostWithPreheader(MachineLoop *CurLoop) {
+/// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most
+/// loop that has a unique predecessor.
+static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
+ // Check whether this loop even has a unique predecessor.
+ if (!CurLoop->getLoopPredecessor())
+ return false;
+ // Ok, now check to see if any of its outer loops do.
for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop())
- if (L->getLoopPreheader())
+ if (L->getLoopPredecessor())
return false;
+ // None of them did, so this is the outermost with a unique predecessor.
return true;
}
@@ -207,7 +216,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
else
DEBUG(dbgs() << "******** Post-regalloc Machine LICM ********\n");
- Changed = false;
+ Changed = FirstInLoop = false;
TM = &MF.getTarget();
TII = TM->getInstrInfo();
TRI = TM->getRegisterInfo();
@@ -220,23 +229,17 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
DT = &getAnalysis<MachineDominatorTree>();
AA = &getAnalysis<AliasAnalysis>();
- for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I){
- CurLoop = *I;
+ SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
+ while (!Worklist.empty()) {
+ CurLoop = Worklist.pop_back_val();
+ CurPreheader = 0;
// If this is done before regalloc, only visit outer-most preheader-sporting
// loops.
- if (PreRegAlloc && !LoopIsOuterMostWithPreheader(CurLoop))
- continue;
-
- // Determine the block to which to hoist instructions. If we can't find a
- // suitable loop preheader, we can't do any hoisting.
- //
- // FIXME: We are only hoisting if the basic block coming into this loop
- // has only one successor. This isn't the case in general because we haven't
- // broken critical edges or added preheaders.
- CurPreheader = CurLoop->getLoopPreheader();
- if (!CurPreheader)
+ if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) {
+ Worklist.append(CurLoop->begin(), CurLoop->end());
continue;
+ }
if (!PreRegAlloc)
HoistRegionPostRA();
@@ -244,6 +247,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
// CSEMap is initialized for loop header when the first instruction is
// being hoisted.
MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
+ FirstInLoop = true;
HoistRegion(N);
CSEMap.clear();
}
@@ -436,13 +440,16 @@ void MachineLICM::AddToLiveIns(unsigned Reg) {
/// operands that is safe to hoist, this instruction is called to do the
/// dirty work.
void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
+ MachineBasicBlock *Preheader = getCurPreheader();
+ if (!Preheader) return;
+
// Now move the instructions to the predecessor, inserting it before any
// terminator instructions.
DEBUG({
dbgs() << "Hoisting " << *MI;
- if (CurPreheader->getBasicBlock())
+ if (Preheader->getBasicBlock())
dbgs() << " to MachineBasicBlock "
- << CurPreheader->getName();
+ << Preheader->getName();
if (MI->getParent()->getBasicBlock())
dbgs() << " from MachineBasicBlock "
<< MI->getParent()->getName();
@@ -451,7 +458,7 @@ void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
// Splice the instruction to the preheader.
MachineBasicBlock *MBB = MI->getParent();
- CurPreheader->splice(CurPreheader->getFirstTerminator(), MBB, MI);
+ Preheader->splice(Preheader->getFirstTerminator(), MBB, MI);
// Add register to livein list to all the BBs in the current loop since a
// loop invariant must be kept live throughout the whole loop. This is
@@ -490,26 +497,16 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N) {
/// candidate for LICM. e.g. If the instruction is a call, then it's obviously
/// not safe to hoist it.
bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
+ // It is not profitable to hoist implicitdefs. FIXME: Why not? what if they
+ // are an argument to some other otherwise-hoistable instruction?
if (I.isImplicitDef())
return false;
-
- const TargetInstrDesc &TID = I.getDesc();
- // Ignore stuff that we obviously can't hoist.
- if (TID.mayStore() || TID.isCall() || TID.isTerminator() ||
- TID.hasUnmodeledSideEffects())
+ // Check if it's safe to move the instruction.
+ bool DontMoveAcrossStore = true;
+ if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore))
return false;
-
- if (TID.mayLoad()) {
- // Okay, this instruction does a load. As a refinement, we allow the target
- // to decide whether the loaded value is actually a constant. If so, we can
- // actually use it as a load.
- if (!I.isInvariantLoad(AA))
- // FIXME: we should be able to hoist loads with no other side effects if
- // there are no other instructions which can change memory in this loop.
- // This is a trivial form of alias analysis.
- return false;
- }
+
return true;
}
@@ -754,6 +751,9 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI,
/// that are safe to hoist, this instruction is called to do the dirty work.
///
void MachineLICM::Hoist(MachineInstr *MI) {
+ MachineBasicBlock *Preheader = getCurPreheader();
+ if (!Preheader) return;
+
// First check whether we should hoist this instruction.
if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
// If not, try unfolding a hoistable load.
@@ -765,9 +765,9 @@ void MachineLICM::Hoist(MachineInstr *MI) {
// terminator instructions.
DEBUG({
dbgs() << "Hoisting " << *MI;
- if (CurPreheader->getBasicBlock())
+ if (Preheader->getBasicBlock())
dbgs() << " to MachineBasicBlock "
- << CurPreheader->getName();
+ << Preheader->getName();
if (MI->getParent()->getBasicBlock())
dbgs() << " from MachineBasicBlock "
<< MI->getParent()->getName();
@@ -776,7 +776,10 @@ void MachineLICM::Hoist(MachineInstr *MI) {
// If this is the first instruction being hoisted to the preheader,
// initialize the CSE map with potential common expressions.
- InitCSEMap(CurPreheader);
+ if (FirstInLoop) {
+ InitCSEMap(Preheader);
+ FirstInLoop = false;
+ }
// Look for opportunity to CSE the hoisted instruction.
unsigned Opcode = MI->getOpcode();
@@ -784,7 +787,7 @@ void MachineLICM::Hoist(MachineInstr *MI) {
CI = CSEMap.find(Opcode);
if (!EliminateCSE(MI, CI)) {
// Otherwise, splice the instruction to the preheader.
- CurPreheader->splice(CurPreheader->getFirstTerminator(),MI->getParent(),MI);
+ Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
// Clear the kill flags of any register this instruction defines,
// since they may need to be live throughout the entire loop
@@ -808,3 +811,30 @@ void MachineLICM::Hoist(MachineInstr *MI) {
++NumHoisted;
Changed = true;
}
+
+MachineBasicBlock *MachineLICM::getCurPreheader() {
+ // Determine the block to which to hoist instructions. If we can't find a
+ // suitable loop predecessor, we can't do any hoisting.
+
+ // If we've tried to get a preheader and failed, don't try again.
+ if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1))
+ return 0;
+
+ if (!CurPreheader) {
+ CurPreheader = CurLoop->getLoopPreheader();
+ if (!CurPreheader) {
+ MachineBasicBlock *Pred = CurLoop->getLoopPredecessor();
+ if (!Pred) {
+ CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
+ return 0;
+ }
+
+ CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this);
+ if (!CurPreheader) {
+ CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
+ return 0;
+ }
+ }
+ }
+ return CurPreheader;
+}
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 70bf7e5da5e8..5d852f26beda 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -20,7 +20,7 @@ using namespace llvm;
MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) {
VRegInfo.reserve(256);
RegAllocHints.reserve(256);
- RegClass2VRegMap.resize(TRI.getNumRegClasses()+1); // RC ID starts at 1.
+ RegClass2VRegMap = new std::vector<unsigned>[TRI.getNumRegClasses()];
UsedPhysRegs.resize(TRI.getNumRegs());
// Create the physreg use/def lists.
@@ -37,6 +37,7 @@ MachineRegisterInfo::~MachineRegisterInfo() {
"PhysRegUseDefLists has entries after all instructions are deleted");
#endif
delete [] PhysRegUseDefLists;
+ delete [] RegClass2VRegMap;
}
/// setRegClass - Set the register class of the specified virtual register.
@@ -52,7 +53,7 @@ MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) {
// Remove from old register class's vregs list. This may be slow but
// fortunately this operation is rarely needed.
std::vector<unsigned> &VRegs = RegClass2VRegMap[OldRC->getID()];
- std::vector<unsigned>::iterator I=std::find(VRegs.begin(), VRegs.end(), VR);
+ std::vector<unsigned>::iterator I = std::find(VRegs.begin(), VRegs.end(), VR);
VRegs.erase(I);
// Add to new register class's vregs list.
@@ -174,115 +175,36 @@ unsigned MachineRegisterInfo::getLiveInVirtReg(unsigned PReg) const {
return 0;
}
-static cl::opt<bool>
-SchedLiveInCopies("schedule-livein-copies", cl::Hidden,
- cl::desc("Schedule copies of livein registers"),
- cl::init(false));
-
-/// EmitLiveInCopy - Emit a copy for a live in physical register. If the
-/// physical register has only a single copy use, then coalesced the copy
-/// if possible.
-static void EmitLiveInCopy(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator &InsertPos,
- unsigned VirtReg, unsigned PhysReg,
- const TargetRegisterClass *RC,
- DenseMap<MachineInstr*, unsigned> &CopyRegMap,
- const MachineRegisterInfo &MRI,
- const TargetRegisterInfo &TRI,
- const TargetInstrInfo &TII) {
- unsigned NumUses = 0;
- MachineInstr *UseMI = NULL;
- for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(VirtReg),
- UE = MRI.use_end(); UI != UE; ++UI) {
- UseMI = &*UI;
- if (++NumUses > 1)
- break;
- }
-
- // If the number of uses is not one, or the use is not a move instruction,
- // don't coalesce. Also, only coalesce away a virtual register to virtual
- // register copy.
- bool Coalesced = false;
- unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
- if (NumUses == 1 &&
- TII.isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
- TargetRegisterInfo::isVirtualRegister(DstReg)) {
- VirtReg = DstReg;
- Coalesced = true;
- }
-
- // Now find an ideal location to insert the copy.
- MachineBasicBlock::iterator Pos = InsertPos;
- while (Pos != MBB->begin()) {
- MachineInstr *PrevMI = prior(Pos);
- DenseMap<MachineInstr*, unsigned>::iterator RI = CopyRegMap.find(PrevMI);
- // copyRegToReg might emit multiple instructions to do a copy.
- unsigned CopyDstReg = (RI == CopyRegMap.end()) ? 0 : RI->second;
- if (CopyDstReg && !TRI.regsOverlap(CopyDstReg, PhysReg))
- // This is what the BB looks like right now:
- // r1024 = mov r0
- // ...
- // r1 = mov r1024
- //
- // We want to insert "r1025 = mov r1". Inserting this copy below the
- // move to r1024 makes it impossible for that move to be coalesced.
- //
- // r1025 = mov r1
- // r1024 = mov r0
- // ...
- // r1 = mov 1024
- // r2 = mov 1025
- break; // Woot! Found a good location.
- --Pos;
- }
-
- bool Emitted = TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC,
- DebugLoc());
- assert(Emitted && "Unable to issue a live-in copy instruction!\n");
- (void) Emitted;
-
- CopyRegMap.insert(std::make_pair(prior(Pos), VirtReg));
- if (Coalesced) {
- if (&*InsertPos == UseMI) ++InsertPos;
- MBB->erase(UseMI);
- }
-}
-
/// EmitLiveInCopies - Emit copies to initialize livein virtual registers
/// into the given entry block.
void
MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB,
const TargetRegisterInfo &TRI,
const TargetInstrInfo &TII) {
- if (SchedLiveInCopies) {
- // Emit the copies at a heuristically-determined location in the block.
- DenseMap<MachineInstr*, unsigned> CopyRegMap;
- MachineBasicBlock::iterator InsertPos = EntryMBB->begin();
- for (MachineRegisterInfo::livein_iterator LI = livein_begin(),
- E = livein_end(); LI != E; ++LI)
- if (LI->second) {
- const TargetRegisterClass *RC = getRegClass(LI->second);
- EmitLiveInCopy(EntryMBB, InsertPos, LI->second, LI->first,
- RC, CopyRegMap, *this, TRI, TII);
+ // Emit the copies into the top of the block.
+ for (unsigned i = 0, e = LiveIns.size(); i != e; ++i)
+ if (LiveIns[i].second) {
+ if (use_empty(LiveIns[i].second)) {
+ // The livein has no uses. Drop it.
+ //
+ // It would be preferable to have isel avoid creating live-in
+ // records for unused arguments in the first place, but it's
+ // complicated by the debug info code for arguments.
+ LiveIns.erase(LiveIns.begin() + i);
+ --i; --e;
+ } else {
+ // Emit a copy.
+ BuildMI(*EntryMBB, EntryMBB->begin(), DebugLoc(),
+ TII.get(TargetOpcode::COPY), LiveIns[i].second)
+ .addReg(LiveIns[i].first);
+
+ // Add the register to the entry block live-in set.
+ EntryMBB->addLiveIn(LiveIns[i].first);
}
- } else {
- // Emit the copies into the top of the block.
- for (MachineRegisterInfo::livein_iterator LI = livein_begin(),
- E = livein_end(); LI != E; ++LI)
- if (LI->second) {
- const TargetRegisterClass *RC = getRegClass(LI->second);
- bool Emitted = TII.copyRegToReg(*EntryMBB, EntryMBB->begin(),
- LI->second, LI->first, RC, RC,
- DebugLoc());
- assert(Emitted && "Unable to issue a live-in copy instruction!\n");
- (void) Emitted;
- }
- }
-
- // Add function live-ins to entry block live-in set.
- for (MachineRegisterInfo::livein_iterator I = livein_begin(),
- E = livein_end(); I != E; ++I)
- EntryMBB->addLiveIn(I->first);
+ } else {
+ // Add the register to the entry block live-in set.
+ EntryMBB->addLiveIn(LiveIns[i].first);
+ }
}
void MachineRegisterInfo::closePhysRegsUsed(const TargetRegisterInfo &TRI) {
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index 1610e6c9610c..61334fc1790a 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This pass moves instructions into successor blocks, when possible, so that
+// This pass moves instructions into successor blocks when possible, so that
// they aren't executed on paths where their results aren't needed.
//
// This pass is not intended to be a replacement or a complete alternative
@@ -45,9 +45,9 @@ namespace {
public:
static char ID; // Pass identification
MachineSinking() : MachineFunctionPass(&ID) {}
-
+
virtual bool runOnMachineFunction(MachineFunction &MF);
-
+
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
@@ -63,7 +63,7 @@ namespace {
bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB) const;
};
} // end anonymous namespace
-
+
char MachineSinking::ID = 0;
static RegisterPass<MachineSinking>
X("machine-sink", "Machine code sinking");
@@ -72,7 +72,7 @@ FunctionPass *llvm::createMachineSinkingPass() { return new MachineSinking(); }
/// AllUsesDominatedByBlock - Return true if all uses of the specified register
/// occur in blocks dominated by the specified block.
-bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
+bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
MachineBasicBlock *MBB) const {
assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
"Only makes sense for vregs");
@@ -80,27 +80,30 @@ bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
// This may leave a referencing dbg_value in the original block, before
// the definition of the vreg. Dwarf generator handles this although the
// user might not get the right info at runtime.
- for (MachineRegisterInfo::use_nodbg_iterator I =
- RegInfo->use_nodbg_begin(Reg),
- E = RegInfo->use_nodbg_end(); I != E; ++I) {
+ for (MachineRegisterInfo::use_nodbg_iterator
+ I = RegInfo->use_nodbg_begin(Reg), E = RegInfo->use_nodbg_end();
+ I != E; ++I) {
// Determine the block of the use.
MachineInstr *UseInst = &*I;
MachineBasicBlock *UseBlock = UseInst->getParent();
+
if (UseInst->isPHI()) {
// PHI nodes use the operand in the predecessor block, not the block with
// the PHI.
UseBlock = UseInst->getOperand(I.getOperandNo()+1).getMBB();
}
+
// Check that it dominates.
if (!DT->dominates(MBB, UseBlock))
return false;
}
+
return true;
}
bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "******** Machine Sinking ********\n");
-
+
const TargetMachine &TM = MF.getTarget();
TII = TM.getInstrInfo();
TRI = TM.getRegisterInfo();
@@ -111,19 +114,19 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
AllocatableSet = TRI->getAllocatableSet(MF);
bool EverMadeChange = false;
-
+
while (1) {
bool MadeChange = false;
// Process all basic blocks.
- for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end();
I != E; ++I)
MadeChange |= ProcessBlock(*I);
-
+
// If this iteration over the code changed anything, keep iterating.
if (!MadeChange) break;
EverMadeChange = true;
- }
+ }
return EverMadeChange;
}
@@ -132,8 +135,8 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
if (MBB.succ_size() <= 1 || MBB.empty()) return false;
// Don't bother sinking code out of unreachable blocks. In addition to being
- // unprofitable, it can also lead to infinite looping, because in an unreachable
- // loop there may be nowhere to stop.
+ // unprofitable, it can also lead to infinite looping, because in an
+ // unreachable loop there may be nowhere to stop.
if (!DT->isReachableFromEntry(&MBB)) return false;
bool MadeChange = false;
@@ -144,7 +147,7 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
bool ProcessedBegin, SawStore = false;
do {
MachineInstr *MI = I; // The instruction to sink.
-
+
// Predecrement I (if it's not begin) so that it isn't invalidated by
// sinking.
ProcessedBegin = I == MBB.begin();
@@ -156,10 +159,10 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
if (SinkInstruction(MI, SawStore))
++NumSunk, MadeChange = true;
-
+
// If we just processed the first instruction in the block, we're done.
} while (!ProcessedBegin);
-
+
return MadeChange;
}
@@ -169,7 +172,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// Check if it's safe to move the instruction.
if (!MI->isSafeToMove(TII, AA, SawStore))
return false;
-
+
// FIXME: This should include support for sinking instructions within the
// block they are currently in to shorten the live ranges. We often get
// instructions sunk into the top of a large block, but it would be better to
@@ -177,22 +180,22 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// be careful not to *increase* register pressure though, e.g. sinking
// "x = y + z" down if it kills y and z would increase the live ranges of y
// and z and only shrink the live range of x.
-
+
// Loop over all the operands of the specified instruction. If there is
// anything we can't handle, bail out.
MachineBasicBlock *ParentBlock = MI->getParent();
-
+
// SuccToSinkTo - This is the successor to sink this instruction to, once we
// decide.
MachineBasicBlock *SuccToSinkTo = 0;
-
+
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg()) continue; // Ignore non-register operands.
-
+
unsigned Reg = MO.getReg();
if (Reg == 0) continue;
-
+
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
if (MO.isUse()) {
// If the physreg has no defs anywhere, it's just an ambient register
@@ -200,13 +203,16 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// it could get allocated to something with a def during allocation.
if (!RegInfo->def_empty(Reg))
return false;
+
if (AllocatableSet.test(Reg))
return false;
+
// Check for a def among the register's aliases too.
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
unsigned AliasReg = *Alias;
if (!RegInfo->def_empty(AliasReg))
return false;
+
if (AllocatableSet.test(AliasReg))
return false;
}
@@ -221,28 +227,31 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// If it's not safe to move defs of the register class, then abort.
if (!TII->isSafeToMoveRegClassDefs(RegInfo->getRegClass(Reg)))
return false;
-
+
// FIXME: This picks a successor to sink into based on having one
// successor that dominates all the uses. However, there are cases where
// sinking can happen but where the sink point isn't a successor. For
// example:
+ //
// x = computation
// if () {} else {}
// use x
- // the instruction could be sunk over the whole diamond for the
+ //
+ // the instruction could be sunk over the whole diamond for the
// if/then/else (or loop, etc), allowing it to be sunk into other blocks
// after that.
-
+
// Virtual register defs can only be sunk if all their uses are in blocks
// dominated by one of the successors.
if (SuccToSinkTo) {
// If a previous operand picked a block to sink to, then this operand
// must be sinkable to the same block.
- if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo))
+ if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo))
return false;
+
continue;
}
-
+
// Otherwise, we should look at all the successors and decide which one
// we should sink to.
for (MachineBasicBlock::succ_iterator SI = ParentBlock->succ_begin(),
@@ -252,13 +261,13 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
break;
}
}
-
+
// If we couldn't find a block to sink to, ignore this instruction.
if (SuccToSinkTo == 0)
return false;
}
}
-
+
// If there are no outputs, it must have side-effects.
if (SuccToSinkTo == 0)
return false;
@@ -267,15 +276,26 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// landing pad is implicitly defined.
if (SuccToSinkTo->isLandingPad())
return false;
-
+
// It is not possible to sink an instruction into its own block. This can
// happen with loops.
if (MI->getParent() == SuccToSinkTo)
return false;
-
- DEBUG(dbgs() << "Sink instr " << *MI);
- DEBUG(dbgs() << "to block " << *SuccToSinkTo);
-
+
+ // If the instruction to move defines a dead physical register which is live
+ // when leaving the basic block, don't move it because it could turn into a
+ // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)
+ for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = MI->getOperand(I);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ if (SuccToSinkTo->isLiveIn(Reg))
+ return false;
+ }
+
+ DEBUG(dbgs() << "Sink instr " << *MI << "\tinto block " << *SuccToSinkTo);
+
// If the block has multiple predecessors, this would introduce computation on
// a path that it doesn't already exist. We could split the critical edge,
// but for now we just punt.
@@ -305,18 +325,18 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// Otherwise we are OK with sinking along a critical edge.
DEBUG(dbgs() << "Sinking along critical edge.\n");
}
-
- // Determine where to insert into. Skip phi nodes.
+
+ // Determine where to insert into. Skip phi nodes.
MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin();
while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI())
++InsertPos;
-
+
// Move the instruction.
SuccToSinkTo->splice(InsertPos, ParentBlock, MI,
++MachineBasicBlock::iterator(MI));
- // Conservatively, clear any kill flags, since it's possible that
- // they are no longer correct.
+ // Conservatively, clear any kill flags, since it's possible that they are no
+ // longer correct.
MI->clearKillInfo();
return true;
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 8baf01c90736..2297c908b1e0 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -390,7 +390,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
report("MBB exits via unconditional fall-through but its successor "
"differs from its CFG successor!", MBB);
}
- if (!MBB->empty() && MBB->back().getDesc().isBarrier()) {
+ if (!MBB->empty() && MBB->back().getDesc().isBarrier() &&
+ !TII->isPredicated(&MBB->back())) {
report("MBB exits via unconditional fall-through but ends with a "
"barrier instruction!", MBB);
}
diff --git a/lib/CodeGen/OptimizeExts.cpp b/lib/CodeGen/OptimizeExts.cpp
index 41fc20407441..dcdc243e5db3 100644
--- a/lib/CodeGen/OptimizeExts.cpp
+++ b/lib/CodeGen/OptimizeExts.cpp
@@ -118,6 +118,26 @@ bool OptimizeExts::OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB,
continue;
}
+ // It's an error to translate this:
+ //
+ // %reg1025 = <sext> %reg1024
+ // ...
+ // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
+ //
+ // into this:
+ //
+ // %reg1025 = <sext> %reg1024
+ // ...
+ // %reg1027 = COPY %reg1025:4
+ // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
+ //
+ // The problem here is that SUBREG_TO_REG is there to assert that an
+ // implicit zext occurs. It doesn't insert a zext instruction. If we allow
+ // the COPY here, it will give us the value after the <sext>,
+ // not the original value of %reg1024 before <sext>.
+ if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
+ continue;
+
MachineBasicBlock *UseMBB = UseMI->getParent();
if (UseMBB == MBB) {
// Local uses that come after the extension.
@@ -165,8 +185,8 @@ bool OptimizeExts::OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB,
continue;
unsigned NewVR = MRI->createVirtualRegister(RC);
BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
- TII->get(TargetOpcode::EXTRACT_SUBREG), NewVR)
- .addReg(DstReg).addImm(SubIdx);
+ TII->get(TargetOpcode::COPY), NewVR)
+ .addReg(DstReg, 0, SubIdx);
UseMO->setReg(NewVR);
++NumReuse;
Changed = true;
diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp
index 2717d4d5cefc..1613fe21e42d 100644
--- a/lib/CodeGen/OptimizePHIs.cpp
+++ b/lib/CodeGen/OptimizePHIs.cpp
@@ -107,6 +107,11 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI,
SrcSubIdx == 0 && DstSubIdx == 0 &&
TargetRegisterInfo::isVirtualRegister(MvSrcReg))
SrcMI = MRI->getVRegDef(MvSrcReg);
+ else if (SrcMI && SrcMI->isCopy() &&
+ !SrcMI->getOperand(0).getSubReg() &&
+ !SrcMI->getOperand(1).getSubReg() &&
+ TargetRegisterInfo::isVirtualRegister(SrcMI->getOperand(1).getReg()))
+ SrcMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
if (!SrcMI)
return false;
diff --git a/lib/CodeGen/PBQP/HeuristicSolver.h b/lib/CodeGen/PBQP/HeuristicSolver.h
index bd18b5216f37..02938df00700 100644
--- a/lib/CodeGen/PBQP/HeuristicSolver.h
+++ b/lib/CodeGen/PBQP/HeuristicSolver.h
@@ -406,7 +406,7 @@ namespace PBQP {
// Create node data objects.
for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd();
- nItr != nEnd; ++nItr) {
+ nItr != nEnd; ++nItr) {
nodeDataList.push_back(NodeData());
g.setNodeData(nItr, &nodeDataList.back());
}
diff --git a/lib/CodeGen/PBQP/Heuristics/Briggs.h b/lib/CodeGen/PBQP/Heuristics/Briggs.h
index 30d34d9e3e92..4c1ce119ed05 100644
--- a/lib/CodeGen/PBQP/Heuristics/Briggs.h
+++ b/lib/CodeGen/PBQP/Heuristics/Briggs.h
@@ -18,7 +18,6 @@
#ifndef LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
#define LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
-#include "llvm/Support/Compiler.h"
#include "../HeuristicSolver.h"
#include "../HeuristicBase.h"
@@ -267,8 +266,8 @@ namespace PBQP {
if (!nd.isHeuristic)
return;
- EdgeData &ed ATTRIBUTE_UNUSED = getHeuristicEdgeData(eItr);
-
+ EdgeData &ed = getHeuristicEdgeData(eItr);
+ (void)ed;
assert(ed.isUpToDate && "Edge data is not up to date.");
// Update node.
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index edbc13f3ff7f..ea6b094d7efe 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -34,7 +34,6 @@
using namespace llvm;
STATISTIC(NumAtomic, "Number of atomic phis lowered");
-STATISTIC(NumSplits, "Number of critical edges split on demand");
STATISTIC(NumReused, "Number of reused lowered phis");
char PHIElimination::ID = 0;
@@ -184,7 +183,6 @@ void llvm::PHIElimination::LowerAtomicPHINode(
// Create a new register for the incoming PHI arguments.
MachineFunction &MF = *MBB.getParent();
- const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg);
unsigned IncomingReg = 0;
bool reusedIncoming = false; // Is IncomingReg reused from an earlier PHI?
@@ -208,10 +206,12 @@ void llvm::PHIElimination::LowerAtomicPHINode(
++NumReused;
DEBUG(dbgs() << "Reusing %reg" << IncomingReg << " for " << *MPhi);
} else {
+ const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg);
entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC);
}
- TII->copyRegToReg(MBB, AfterPHIsIt, DestReg, IncomingReg, RC, RC,
- MPhi->getDebugLoc());
+ BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), DestReg)
+ .addReg(IncomingReg);
}
// Update live variable information if there is any.
@@ -293,8 +293,8 @@ void llvm::PHIElimination::LowerAtomicPHINode(
// Insert the copy.
if (!reusedIncoming && IncomingReg)
- TII->copyRegToReg(opBlock, InsertPos, IncomingReg, SrcReg, RC, RC,
- MPhi->getDebugLoc());
+ BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), IncomingReg).addReg(SrcReg);
// Now update live variable information if we have it. Otherwise we're done
if (!LV) continue;
@@ -391,57 +391,8 @@ bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF,
// (not considering PHI nodes). If the register is live in to this block
// anyway, we would gain nothing from splitting.
if (!LV.isLiveIn(Reg, MBB) && LV.isLiveOut(Reg, *PreMBB))
- SplitCriticalEdge(PreMBB, &MBB);
+ PreMBB->SplitCriticalEdge(&MBB, this);
}
}
return true;
}
-
-MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A,
- MachineBasicBlock *B) {
- assert(A && B && "Missing MBB end point");
-
- MachineFunction *MF = A->getParent();
-
- // We may need to update A's terminator, but we can't do that if AnalyzeBranch
- // fails. If A uses a jump table, we won't touch it.
- const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
- MachineBasicBlock *TBB = 0, *FBB = 0;
- SmallVector<MachineOperand, 4> Cond;
- if (TII->AnalyzeBranch(*A, TBB, FBB, Cond))
- return NULL;
-
- ++NumSplits;
-
- MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
- MF->insert(llvm::next(MachineFunction::iterator(A)), NMBB);
- DEBUG(dbgs() << "PHIElimination splitting critical edge:"
- " BB#" << A->getNumber()
- << " -- BB#" << NMBB->getNumber()
- << " -- BB#" << B->getNumber() << '\n');
-
- A->ReplaceUsesOfBlockWith(B, NMBB);
- A->updateTerminator();
-
- // Insert unconditional "jump B" instruction in NMBB if necessary.
- NMBB->addSuccessor(B);
- if (!NMBB->isLayoutSuccessor(B)) {
- Cond.clear();
- MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, B, NULL, Cond);
- }
-
- // Fix PHI nodes in B so they refer to NMBB instead of A
- for (MachineBasicBlock::iterator i = B->begin(), e = B->end();
- i != e && i->isPHI(); ++i)
- for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2)
- if (i->getOperand(ni+1).getMBB() == A)
- i->getOperand(ni+1).setMBB(NMBB);
-
- if (LiveVariables *LV=getAnalysisIfAvailable<LiveVariables>())
- LV->addNewBlock(NMBB, A, B);
-
- if (MachineDominatorTree *MDT=getAnalysisIfAvailable<MachineDominatorTree>())
- MDT->addNewBlock(NMBB, A);
-
- return NMBB;
-}
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index 5ea2941b483c..3489db2e9f4f 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -24,6 +24,11 @@ using namespace llvm;
//===---------------------------------------------------------------------===//
MachinePassRegistry RegisterRegAlloc::Registry;
+static FunctionPass *createDefaultRegisterAllocator() { return 0; }
+static RegisterRegAlloc
+defaultRegAlloc("default",
+ "pick register allocator based on -O option",
+ createDefaultRegisterAllocator);
//===---------------------------------------------------------------------===//
///
@@ -33,8 +38,8 @@ MachinePassRegistry RegisterRegAlloc::Registry;
static cl::opt<RegisterRegAlloc::FunctionPassCtor, false,
RegisterPassParser<RegisterRegAlloc> >
RegAlloc("regalloc",
- cl::init(&createLinearScanRegisterAllocator),
- cl::desc("Register allocator to use (default=linearscan)"));
+ cl::init(&createDefaultRegisterAllocator),
+ cl::desc("Register allocator to use"));
//===---------------------------------------------------------------------===//
@@ -42,13 +47,22 @@ RegAlloc("regalloc",
/// createRegisterAllocator - choose the appropriate register allocator.
///
//===---------------------------------------------------------------------===//
-FunctionPass *llvm::createRegisterAllocator() {
+FunctionPass *llvm::createRegisterAllocator(CodeGenOpt::Level OptLevel) {
RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault();
-
+
if (!Ctor) {
Ctor = RegAlloc;
RegisterRegAlloc::setDefault(RegAlloc);
}
-
- return Ctor();
+
+ if (Ctor != createDefaultRegisterAllocator)
+ return Ctor();
+
+ // When the 'default' allocator is requested, pick one based on OptLevel.
+ switch (OptLevel) {
+ case CodeGenOpt::None:
+ return createFastRegisterAllocator();
+ default:
+ return createLinearScanRegisterAllocator();
+ }
}
diff --git a/lib/CodeGen/ExactHazardRecognizer.cpp b/lib/CodeGen/PostRAHazardRecognizer.cpp
index af5f2892c2f0..cbde2b01eeaf 100644
--- a/lib/CodeGen/ExactHazardRecognizer.cpp
+++ b/lib/CodeGen/PostRAHazardRecognizer.cpp
@@ -1,4 +1,4 @@
-//===----- ExactHazardRecognizer.cpp - hazard recognizer -------- ---------===//
+//===----- PostRAHazardRecognizer.cpp - hazard recognizer -------- ---------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,8 +13,8 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "post-RA-sched"
-#include "ExactHazardRecognizer.h"
-#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/PostRAHazardRecognizer.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -22,10 +22,9 @@
using namespace llvm;
-ExactHazardRecognizer::
-ExactHazardRecognizer(const InstrItineraryData &LItinData) :
- ScheduleHazardRecognizer(), ItinData(LItinData)
-{
+PostRAHazardRecognizer::
+PostRAHazardRecognizer(const InstrItineraryData &LItinData) :
+ ScheduleHazardRecognizer(), ItinData(LItinData) {
// Determine the maximum depth of any itinerary. This determines the
// depth of the scoreboard. We always make the scoreboard at least 1
// cycle deep to avoid dealing with the boundary condition.
@@ -48,16 +47,16 @@ ExactHazardRecognizer(const InstrItineraryData &LItinData) :
ReservedScoreboard.reset(ScoreboardDepth);
RequiredScoreboard.reset(ScoreboardDepth);
- DEBUG(dbgs() << "Using exact hazard recognizer: ScoreboardDepth = "
+ DEBUG(dbgs() << "Using post-ra hazard recognizer: ScoreboardDepth = "
<< ScoreboardDepth << '\n');
}
-void ExactHazardRecognizer::Reset() {
+void PostRAHazardRecognizer::Reset() {
RequiredScoreboard.reset();
ReservedScoreboard.reset();
}
-void ExactHazardRecognizer::ScoreBoard::dump() const {
+void PostRAHazardRecognizer::ScoreBoard::dump() const {
dbgs() << "Scoreboard:\n";
unsigned last = Depth - 1;
@@ -73,7 +72,8 @@ void ExactHazardRecognizer::ScoreBoard::dump() const {
}
}
-ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU) {
+ScheduleHazardRecognizer::HazardType
+PostRAHazardRecognizer::getHazardType(SUnit *SU) {
if (ItinData.isEmpty())
return NoHazard;
@@ -120,7 +120,7 @@ ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU
return NoHazard;
}
-void ExactHazardRecognizer::EmitInstruction(SUnit *SU) {
+void PostRAHazardRecognizer::EmitInstruction(SUnit *SU) {
if (ItinData.isEmpty())
return;
@@ -174,7 +174,7 @@ void ExactHazardRecognizer::EmitInstruction(SUnit *SU) {
DEBUG(RequiredScoreboard.dump());
}
-void ExactHazardRecognizer::AdvanceCycle() {
+void PostRAHazardRecognizer::AdvanceCycle() {
ReservedScoreboard[0] = 0; ReservedScoreboard.advance();
RequiredScoreboard[0] = 0; RequiredScoreboard.advance();
}
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 9714ea653b59..4af8e07f3480 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -22,8 +22,6 @@
#include "AntiDepBreaker.h"
#include "AggressiveAntiDepBreaker.h"
#include "CriticalAntiDepBreaker.h"
-#include "ExactHazardRecognizer.h"
-#include "SimpleHazardRecognizer.h"
#include "ScheduleDAGInstrs.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/LatencyPriorityQueue.h"
@@ -65,10 +63,6 @@ EnableAntiDepBreaking("break-anti-dependencies",
cl::desc("Break post-RA scheduling anti-dependencies: "
"\"critical\", \"all\", or \"none\""),
cl::init("none"), cl::Hidden);
-static cl::opt<bool>
-EnablePostRAHazardAvoidance("avoid-hazards",
- cl::desc("Enable exact hazard avoidance"),
- cl::init(true), cl::Hidden);
// If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
static cl::opt<int>
@@ -85,6 +79,7 @@ AntiDepBreaker::~AntiDepBreaker() { }
namespace {
class PostRAScheduler : public MachineFunctionPass {
AliasAnalysis *AA;
+ const TargetInstrInfo *TII;
CodeGenOpt::Level OptLevel;
public:
@@ -187,30 +182,9 @@ namespace {
};
}
-/// isSchedulingBoundary - Test if the given instruction should be
-/// considered a scheduling boundary. This primarily includes labels
-/// and terminators.
-///
-static bool isSchedulingBoundary(const MachineInstr *MI,
- const MachineFunction &MF) {
- // Terminators and labels can't be scheduled around.
- if (MI->getDesc().isTerminator() || MI->isLabel())
- return true;
-
- // Don't attempt to schedule around any instruction that defines
- // a stack-oriented pointer, as it's unlikely to be profitable. This
- // saves compile time, because it doesn't require every single
- // stack slot reference to depend on the instruction that does the
- // modification.
- const TargetLowering &TLI = *MF.getTarget().getTargetLowering();
- if (MI->definesRegister(TLI.getStackPointerRegisterToSaveRestore()))
- return true;
-
- return false;
-}
-
bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
AA = &getAnalysis<AliasAnalysis>();
+ TII = Fn.getTarget().getInstrInfo();
// Check for explicit enable/disable of post-ra scheduling.
TargetSubtarget::AntiDepBreakMode AntiDepMode = TargetSubtarget::ANTIDEP_NONE;
@@ -237,10 +211,10 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
- const InstrItineraryData &InstrItins = Fn.getTarget().getInstrItineraryData();
- ScheduleHazardRecognizer *HR = EnablePostRAHazardAvoidance ?
- (ScheduleHazardRecognizer *)new ExactHazardRecognizer(InstrItins) :
- (ScheduleHazardRecognizer *)new SimpleHazardRecognizer();
+ const TargetMachine &TM = Fn.getTarget();
+ const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
+ ScheduleHazardRecognizer *HR =
+ TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins);
AntiDepBreaker *ADB =
((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ?
(AntiDepBreaker *)new AggressiveAntiDepBreaker(Fn, CriticalPathRCs) :
@@ -271,8 +245,8 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
MachineBasicBlock::iterator Current = MBB->end();
unsigned Count = MBB->size(), CurrentCount = Count;
for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) {
- MachineInstr *MI = prior(I);
- if (isSchedulingBoundary(MI, Fn)) {
+ MachineInstr *MI = llvm::prior(I);
+ if (TII->isSchedulingBoundary(MI, MBB, Fn)) {
Scheduler.Run(MBB, I, Current, CurrentCount);
Scheduler.EmitSchedule();
Current = MI;
@@ -680,15 +654,6 @@ void SchedulePostRATDList::ListScheduleTopDown() {
ScheduleNodeTopDown(FoundSUnit, CurCycle);
HazardRec->EmitInstruction(FoundSUnit);
CycleHasInsts = true;
-
- // If we are using the target-specific hazards, then don't
- // advance the cycle time just because we schedule a node. If
- // the target allows it we can schedule multiple nodes in the
- // same cycle.
- if (!EnablePostRAHazardAvoidance) {
- if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops!
- ++CurCycle;
- }
} else {
if (CycleHasInsts) {
DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n');
diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp
index 96e7327a7eb7..fb2f90935551 100644
--- a/lib/CodeGen/PreAllocSplitting.cpp
+++ b/lib/CodeGen/PreAllocSplitting.cpp
@@ -512,9 +512,6 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI,
LI->addRange(LiveRange(UseIndex, EndIndex, RetVNI));
// FIXME: Need to set kills properly for inter-block stuff.
- if (RetVNI->isKill(UseIndex)) RetVNI->removeKill(UseIndex);
- if (IsIntraBlock)
- RetVNI->addKill(EndIndex);
} else if (ContainsDefs && ContainsUses) {
SmallPtrSet<MachineInstr*, 2>& BlockDefs = Defs[MBB];
SmallPtrSet<MachineInstr*, 2>& BlockUses = Uses[MBB];
@@ -556,12 +553,6 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI,
NewVNs, LiveOut, Phis, false, true);
LI->addRange(LiveRange(StartIndex, EndIndex, RetVNI));
-
- if (foundUse && RetVNI->isKill(StartIndex))
- RetVNI->removeKill(StartIndex);
- if (IsIntraBlock) {
- RetVNI->addKill(EndIndex);
- }
}
// Memoize results so we don't have to recompute them.
@@ -636,9 +627,6 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us
for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator I =
IncomingVNs.begin(), E = IncomingVNs.end(); I != E; ++I) {
I->second->setHasPHIKill(true);
- SlotIndex KillIndex(LIs->getMBBEndIdx(I->first), true);
- if (!I->second->isKill(KillIndex))
- I->second->addKill(KillIndex);
}
}
@@ -648,8 +636,6 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us
} else
EndIndex = LIs->getMBBEndIdx(MBB);
LI->addRange(LiveRange(StartIndex, EndIndex, RetVNI));
- if (IsIntraBlock)
- RetVNI->addKill(EndIndex);
// Memoize results so we don't have to recompute them.
if (!IsIntraBlock)
@@ -691,10 +677,12 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) {
// If the def is a move, set the copy field.
unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
- if (TII->isMoveInstr(*DI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
+ if (TII->isMoveInstr(*DI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
if (DstReg == LI->reg)
NewVN->setCopy(&*DI);
-
+ } else if (DI->isCopyLike() && DI->getOperand(0).getReg() == LI->reg)
+ NewVN->setCopy(&*DI);
+
NewVNs[&*DI] = NewVN;
}
@@ -725,25 +713,6 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) {
VNInfo* DeadVN = NewVNs[&*DI];
LI->addRange(LiveRange(DefIdx, DefIdx.getNextSlot(), DeadVN));
- DeadVN->addKill(DefIdx);
- }
-
- // Update kill markers.
- for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end();
- VI != VE; ++VI) {
- VNInfo* VNI = *VI;
- for (unsigned i = 0, e = VNI->kills.size(); i != e; ++i) {
- SlotIndex KillIdx = VNI->kills[i];
- if (KillIdx.isPHI())
- continue;
- MachineInstr *KillMI = LIs->getInstructionFromIndex(KillIdx);
- if (KillMI) {
- MachineOperand *KillMO = KillMI->findRegisterUseOperand(CurrLI->reg);
- if (KillMO)
- // It could be a dead def.
- KillMO->setIsKill();
- }
- }
}
}
@@ -773,19 +742,14 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) {
VNsToCopy.push_back(OldVN);
// Locate two-address redefinitions
- for (VNInfo::KillSet::iterator KI = OldVN->kills.begin(),
- KE = OldVN->kills.end(); KI != KE; ++KI) {
- assert(!KI->isPHI() &&
- "VN previously reported having no PHI kills.");
- MachineInstr* MI = LIs->getInstructionFromIndex(*KI);
- unsigned DefIdx = MI->findRegisterDefOperandIdx(CurrLI->reg);
- if (DefIdx == ~0U) continue;
- if (MI->isRegTiedToUseOperand(DefIdx)) {
- VNInfo* NextVN =
- CurrLI->findDefinedVNInfoForRegInt(KI->getDefIndex());
- if (NextVN == OldVN) continue;
+ for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(CurrLI->reg),
+ DE = MRI->def_end(); DI != DE; ++DI) {
+ if (!DI->isRegTiedToUseOperand(DI.getOperandNo())) continue;
+ SlotIndex DefIdx = LIs->getInstructionIndex(&*DI).getDefIndex();
+ VNInfo* NextVN = CurrLI->findDefinedVNInfoForRegInt(DefIdx);
+ if (std::find(VNsToCopy.begin(), VNsToCopy.end(), NextVN) !=
+ VNsToCopy.end())
Stack.push_back(NextVN);
- }
}
}
@@ -836,7 +800,7 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) {
if (IntervalSSMap.count(CurrLI->reg))
IntervalSSMap[NewVReg] = IntervalSSMap[CurrLI->reg];
- NumRenumbers++;
+ ++NumRenumbers;
}
bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo,
@@ -854,7 +818,7 @@ bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo,
if (KillPt == DefMI->getParent()->end())
return false;
- TII->reMaterialize(MBB, RestorePt, VReg, 0, DefMI, TRI);
+ TII->reMaterialize(MBB, RestorePt, VReg, 0, DefMI, *TRI);
SlotIndex RematIdx = LIs->InsertMachineInstrInMaps(prior(RestorePt));
ReconstructLiveInterval(CurrLI);
@@ -899,12 +863,11 @@ MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg,
SS = MFI->CreateSpillStackObject(RC->getSize(), RC->getAlignment());
}
- MachineInstr* FMI = TII->foldMemoryOperand(*MBB->getParent(),
- FoldPt, Ops, SS);
+ MachineInstr* FMI = TII->foldMemoryOperand(FoldPt, Ops, SS);
if (FMI) {
LIs->ReplaceMachineInstrInMaps(FoldPt, FMI);
- FMI = MBB->insert(MBB->erase(FoldPt), FMI);
+ FoldPt->eraseFromParent();
++NumFolds;
IntervalSSMap[vreg] = SS;
@@ -980,12 +943,11 @@ MachineInstr* PreAllocSplitting::FoldRestore(unsigned vreg,
if (!TII->canFoldMemoryOperand(FoldPt, Ops))
return 0;
- MachineInstr* FMI = TII->foldMemoryOperand(*MBB->getParent(),
- FoldPt, Ops, SS);
+ MachineInstr* FMI = TII->foldMemoryOperand(FoldPt, Ops, SS);
if (FMI) {
LIs->ReplaceMachineInstrInMaps(FoldPt, FMI);
- FMI = MBB->insert(MBB->erase(FoldPt), FMI);
+ FoldPt->eraseFromParent();
++NumRestoreFolds;
}
@@ -1192,7 +1154,7 @@ unsigned PreAllocSplitting::getNumberOfNonSpills(
int StoreFrameIndex;
unsigned StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex);
if (StoreVReg != Reg || StoreFrameIndex != FrameIndex)
- NonSpills++;
+ ++NonSpills;
int DefIdx = (*UI)->findRegisterDefOperandIdx(Reg);
if (DefIdx != -1 && (*UI)->isRegTiedToUseOperand(DefIdx))
@@ -1255,7 +1217,7 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) {
(*LI)->removeValNo(CurrVN);
DefMI->eraseFromParent();
VNUseCount.erase(CurrVN);
- NumDeadSpills++;
+ ++NumDeadSpills;
changed = true;
continue;
}
@@ -1291,9 +1253,7 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) {
Ops.push_back(OpIdx);
if (!TII->canFoldMemoryOperand(use, Ops)) continue;
- MachineInstr* NewMI =
- TII->foldMemoryOperand(*use->getParent()->getParent(),
- use, Ops, FrameIndex);
+ MachineInstr* NewMI = TII->foldMemoryOperand(use, Ops, FrameIndex);
if (!NewMI) continue;
@@ -1303,10 +1263,9 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) {
(*LI)->removeValNo(CurrVN);
DefMI->eraseFromParent();
- MachineBasicBlock* MBB = use->getParent();
- NewMI = MBB->insert(MBB->erase(use), NewMI);
+ use->eraseFromParent();
VNUseCount[CurrVN].erase(use);
-
+
// Remove deleted instructions. Note that we need to remove them from
// the VNInfo->use map as well, just to be safe.
for (SmallPtrSet<MachineInstr*, 4>::iterator II =
@@ -1328,7 +1287,7 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) {
if (VI->second.erase(use))
VI->second.insert(NewMI);
- NumDeadSpills++;
+ ++NumDeadSpills;
changed = true;
continue;
}
@@ -1350,7 +1309,7 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) {
LIs->RemoveMachineInstrFromMaps(DefMI);
(*LI)->removeValNo(CurrVN);
DefMI->eraseFromParent();
- NumDeadSpills++;
+ ++NumDeadSpills;
changed = true;
}
}
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index 62f525fa1d97..ca4c47716875 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -46,14 +46,14 @@ bool ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI,
const TargetInstrInfo *tii_) {
unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
- Reg == SrcReg && SrcSubReg == 0 && DstSubReg == 0)
+ Reg == SrcReg && DstSubReg == 0)
return true;
- if (OpIdx == 2 && MI->isSubregToReg())
- return true;
- if (OpIdx == 1 && MI->isExtractSubreg())
- return true;
- return false;
+ switch(OpIdx) {
+ case 1: return MI->isCopy() && MI->getOperand(0).getSubReg() == 0;
+ case 2: return MI->isSubregToReg() && MI->getOperand(0).getSubReg() == 0;
+ default: return false;
+ }
}
/// processImplicitDefs - Process IMPLICIT_DEF instructions and make sure
@@ -101,11 +101,10 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
continue;
}
- if (MI->isInsertSubreg()) {
- MachineOperand &MO = MI->getOperand(2);
+ // Eliminate %reg1032:sub<def> = COPY undef.
+ if (MI->isCopy() && MI->getOperand(0).getSubReg()) {
+ MachineOperand &MO = MI->getOperand(1);
if (ImpDefRegs.count(MO.getReg())) {
- // %reg1032<def> = INSERT_SUBREG %reg1032, undef, 2
- // This is an identity copy, eliminate it now.
if (MO.isKill()) {
LiveVariables::VarInfo& vi = lv_->getVarInfo(MO.getReg());
vi.removeKill(MI);
@@ -119,7 +118,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
bool ChangedToImpDef = false;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand& MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isUse() || MO.isUndef())
+ if (!MO.isReg() || (MO.isDef() && !MO.getSubReg()) || MO.isUndef())
continue;
unsigned Reg = MO.getReg();
if (!Reg)
@@ -144,6 +143,12 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
Changed = true;
MO.setIsUndef();
+ // This is a partial register redef of an implicit def.
+ // Make sure the whole register is defined by the instruction.
+ if (MO.isDef()) {
+ MI->addRegisterDefined(Reg);
+ continue;
+ }
if (MO.isKill() || MI->isRegTiedToDefOperand(i)) {
// Make sure other uses of
for (unsigned j = i+1; j != e; ++j) {
@@ -219,8 +224,10 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
// Turn a copy use into an implicit_def.
unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
- if (tii_->isMoveInstr(*RMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
- Reg == SrcReg && SrcSubReg == 0 && DstSubReg == 0) {
+ if ((RMI->isCopy() && RMI->getOperand(1).getReg() == Reg &&
+ RMI->getOperand(0).getSubReg() == 0) ||
+ (tii_->isMoveInstr(*RMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
+ Reg == SrcReg && DstSubReg == 0)) {
RMI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF));
bool isKill = false;
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index e778024c6a3a..3843b2537051 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -158,9 +158,9 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
AdjustsStack = true;
FrameSDOps.push_back(I);
} else if (I->isInlineAsm()) {
- // An InlineAsm might be a call; assume it is to get the stack frame
- // aligned correctly for calls.
- AdjustsStack = true;
+ // Some inline asm's need a stack frame, as indicated by operand 1.
+ if (I->getOperand(1).getImm())
+ AdjustsStack = true;
}
MFI->setAdjustsStack(AdjustsStack);
@@ -202,22 +202,17 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
if (Fn.getFunction()->hasFnAttr(Attribute::Naked))
return;
- // Figure out which *callee saved* registers are modified by the current
- // function, thus needing to be saved and restored in the prolog/epilog.
- const TargetRegisterClass * const *CSRegClasses =
- RegInfo->getCalleeSavedRegClasses(&Fn);
-
std::vector<CalleeSavedInfo> CSI;
for (unsigned i = 0; CSRegs[i]; ++i) {
unsigned Reg = CSRegs[i];
if (Fn.getRegInfo().isPhysRegUsed(Reg)) {
// If the reg is modified, save it!
- CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i]));
+ CSI.push_back(CalleeSavedInfo(Reg));
} else {
for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg);
*AliasSet; ++AliasSet) { // Check alias registers too.
if (Fn.getRegInfo().isPhysRegUsed(*AliasSet)) {
- CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i]));
+ CSI.push_back(CalleeSavedInfo(Reg));
break;
}
}
@@ -236,7 +231,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
for (std::vector<CalleeSavedInfo>::iterator
I = CSI.begin(), E = CSI.end(); I != E; ++I) {
unsigned Reg = I->getReg();
- const TargetRegisterClass *RC = I->getRegClass();
+ const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
int FrameIdx;
if (RegInfo->hasReservedSpillSlot(Fn, Reg, FrameIdx)) {
@@ -265,8 +260,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
} else {
// Spill it to the stack where we must.
- FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset,
- true, false);
+ FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, true);
}
I->setFrameIdx(FrameIdx);
@@ -303,8 +297,10 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
EntryBlock->addLiveIn(CSI[i].getReg());
// Insert the spill to the stack frame.
- TII.storeRegToStackSlot(*EntryBlock, I, CSI[i].getReg(), true,
- CSI[i].getFrameIdx(), CSI[i].getRegClass(),TRI);
+ unsigned Reg = CSI[i].getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(*EntryBlock, I, Reg, true,
+ CSI[i].getFrameIdx(), RC, TRI);
}
}
@@ -328,9 +324,11 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
// terminators that preceed it.
if (!TII.restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) {
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- TII.loadRegFromStackSlot(*MBB, I, CSI[i].getReg(),
+ unsigned Reg = CSI[i].getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(*MBB, I, Reg,
CSI[i].getFrameIdx(),
- CSI[i].getRegClass(), TRI);
+ RC, TRI);
assert(I != MBB->begin() &&
"loadRegFromStackSlot didn't insert any code!");
// Insert in reverse order. loadRegFromStackSlot can insert
@@ -374,10 +372,12 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
MBB->addLiveIn(blockCSI[i].getReg());
// Insert the spill to the stack frame.
- TII.storeRegToStackSlot(*MBB, I, blockCSI[i].getReg(),
+ unsigned Reg = blockCSI[i].getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(*MBB, I, Reg,
true,
blockCSI[i].getFrameIdx(),
- blockCSI[i].getRegClass(), TRI);
+ RC, TRI);
}
}
@@ -423,9 +423,11 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
// Restore all registers immediately before the return and any
// terminators that preceed it.
for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
- TII.loadRegFromStackSlot(*MBB, I, blockCSI[i].getReg(),
+ unsigned Reg = blockCSI[i].getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(*MBB, I, Reg,
blockCSI[i].getFrameIdx(),
- blockCSI[i].getRegClass(), TRI);
+ RC, TRI);
assert(I != MBB->begin() &&
"loadRegFromStackSlot didn't insert any code!");
// Insert in reverse order. loadRegFromStackSlot can insert
@@ -639,6 +641,9 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
for (MachineFunction::iterator BB = Fn.begin(),
E = Fn.end(); BB != E; ++BB) {
+#ifndef NDEBUG
+ int SPAdjCount = 0; // frame setup / destroy count.
+#endif
int SPAdj = 0; // SP offset due to call frame setup / destroy.
if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB);
@@ -646,6 +651,10 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
if (I->getOpcode() == FrameSetupOpcode ||
I->getOpcode() == FrameDestroyOpcode) {
+#ifndef NDEBUG
+ // Track whether we see even pairs of them
+ SPAdjCount += I->getOpcode() == FrameSetupOpcode ? 1 : -1;
+#endif
// Remember how much SP has been adjusted to create the call
// frame.
int Size = I->getOperand(0).getImm();
@@ -712,7 +721,13 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI);
}
- assert(SPAdj == 0 && "Unbalanced call frame setup / destroy pairs?");
+ // If we have evenly matched pairs of frame setup / destroy instructions,
+ // make sure the adjustments come out to zero. If we don't have matched
+ // pairs, we can't be sure the missing bit isn't in another basic block
+ // due to a custom inserter playing tricks, so just asserting SPAdj==0
+ // isn't sufficient. See tMOVCC on Thumb1, for example.
+ assert((SPAdjCount || SPAdj == 0) &&
+ "Unbalanced call frame setup / destroy pairs?");
}
}
@@ -870,11 +885,7 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
// Scavenge a new scratch register
CurrentVirtReg = Reg;
const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg);
- CurrentScratchReg = RS->FindUnusedReg(RC);
- if (CurrentScratchReg == 0)
- // No register is "free". Scavenge a register.
- CurrentScratchReg = RS->scavengeRegister(RC, I, SPAdj);
-
+ CurrentScratchReg = RS->scavengeRegister(RC, I, SPAdj);
PrevValue = Value;
}
// replace this reference to the virtual register with the
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index b3b576070863..f44478e5dd0b 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -110,6 +110,11 @@ namespace {
// Allocatable - vector of allocatable physical registers.
BitVector Allocatable;
+ // SkippedInstrs - Descriptors of instructions whose clobber list was ignored
+ // because all registers were spilled. It is still necessary to mark all the
+ // clobbered registers as used by the function.
+ SmallPtrSet<const TargetInstrDesc*, 4> SkippedInstrs;
+
// isBulkSpilling - This flag is set when LiveRegMap will be cleared
// completely after spilling all live registers. LiveRegMap entries should
// not be erased.
@@ -135,6 +140,8 @@ namespace {
private:
bool runOnMachineFunction(MachineFunction &Fn);
void AllocateBasicBlock();
+ void handleThroughOperands(MachineInstr *MI,
+ SmallVectorImpl<unsigned> &VirtDead);
int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC);
bool isLastUseOfLocalReg(MachineOperand&);
@@ -508,27 +515,20 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum,
bool New;
tie(LRI, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg()));
LiveReg &LR = LRI->second;
- bool PartialRedef = MI->getOperand(OpNum).getSubReg();
if (New) {
// If there is no hint, peek at the only use of this register.
if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) &&
MRI->hasOneNonDBGUse(VirtReg)) {
+ const MachineInstr &UseMI = *MRI->use_nodbg_begin(VirtReg);
unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
// It's a copy, use the destination register as a hint.
- if (TII->isMoveInstr(*MRI->use_nodbg_begin(VirtReg),
- SrcReg, DstReg, SrcSubReg, DstSubReg))
+ if (UseMI.isCopyLike())
+ Hint = UseMI.getOperand(0).getReg();
+ else if (TII->isMoveInstr(UseMI, SrcReg, DstReg, SrcSubReg, DstSubReg))
Hint = DstReg;
}
allocVirtReg(MI, *LRI, Hint);
- // If this is only a partial redefinition, we must reload the other parts.
- if (PartialRedef && MI->readsVirtualRegister(VirtReg)) {
- const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
- int FI = getStackSpaceFor(VirtReg, RC);
- DEBUG(dbgs() << "Reloading for partial redef: %reg" << VirtReg << "\n");
- TII->loadRegFromStackSlot(*MBB, MI, LR.PhysReg, FI, RC, TRI);
- ++NumLoads;
- }
- } else if (LR.LastUse && !PartialRedef) {
+ } else if (LR.LastUse) {
// Redefining a live register - kill at the last use, unless it is this
// instruction defining VirtReg multiple times.
if (LR.LastUse != MI || LR.LastUse->getOperand(LR.LastOpNum).isUse())
@@ -564,10 +564,16 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum,
} else if (LR.Dirty) {
if (isLastUseOfLocalReg(MO)) {
DEBUG(dbgs() << "Killing last use: " << MO << "\n");
- MO.setIsKill();
+ if (MO.isUse())
+ MO.setIsKill();
+ else
+ MO.setIsDead();
} else if (MO.isKill()) {
DEBUG(dbgs() << "Clearing dubious kill: " << MO << "\n");
MO.setIsKill(false);
+ } else if (MO.isDead()) {
+ DEBUG(dbgs() << "Clearing dubious dead: " << MO << "\n");
+ MO.setIsDead(false);
}
} else if (MO.isKill()) {
// We must remove kill flags from uses of reloaded registers because the
@@ -576,6 +582,9 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum,
// This would cause a second reload of %x into a different register.
DEBUG(dbgs() << "Clearing clean kill: " << MO << "\n");
MO.setIsKill(false);
+ } else if (MO.isDead()) {
+ DEBUG(dbgs() << "Clearing clean dead: " << MO << "\n");
+ MO.setIsDead(false);
}
assert(LR.PhysReg && "Register not assigned");
LR.LastUse = MI;
@@ -607,6 +616,91 @@ bool RAFast::setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg) {
return MO.isDead();
}
+// Handle special instruction operand like early clobbers and tied ops when
+// there are additional physreg defines.
+void RAFast::handleThroughOperands(MachineInstr *MI,
+ SmallVectorImpl<unsigned> &VirtDead) {
+ DEBUG(dbgs() << "Scanning for through registers:");
+ SmallSet<unsigned, 8> ThroughRegs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ if (MO.isEarlyClobber() || MI->isRegTiedToDefOperand(i) ||
+ (MO.getSubReg() && MI->readsVirtualRegister(Reg))) {
+ if (ThroughRegs.insert(Reg))
+ DEBUG(dbgs() << " %reg" << Reg);
+ }
+ }
+
+ // If any physreg defines collide with preallocated through registers,
+ // we must spill and reallocate.
+ DEBUG(dbgs() << "\nChecking for physdef collisions.\n");
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ UsedInInstr.set(Reg);
+ if (ThroughRegs.count(PhysRegState[Reg]))
+ definePhysReg(MI, Reg, regFree);
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+ UsedInInstr.set(*AS);
+ if (ThroughRegs.count(PhysRegState[*AS]))
+ definePhysReg(MI, *AS, regFree);
+ }
+ }
+
+ SmallVector<unsigned, 8> PartialDefs;
+ DEBUG(dbgs() << "Allocating tied uses and early clobbers.\n");
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ if (MO.isUse()) {
+ unsigned DefIdx = 0;
+ if (!MI->isRegTiedToDefOperand(i, &DefIdx)) continue;
+ DEBUG(dbgs() << "Operand " << i << "("<< MO << ") is tied to operand "
+ << DefIdx << ".\n");
+ LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0);
+ unsigned PhysReg = LRI->second.PhysReg;
+ setPhysReg(MI, i, PhysReg);
+ // Note: we don't update the def operand yet. That would cause the normal
+ // def-scan to attempt spilling.
+ } else if (MO.getSubReg() && MI->readsVirtualRegister(Reg)) {
+ DEBUG(dbgs() << "Partial redefine: " << MO << "\n");
+ // Reload the register, but don't assign to the operand just yet.
+ // That would confuse the later phys-def processing pass.
+ LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0);
+ PartialDefs.push_back(LRI->second.PhysReg);
+ } else if (MO.isEarlyClobber()) {
+ // Note: defineVirtReg may invalidate MO.
+ LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0);
+ unsigned PhysReg = LRI->second.PhysReg;
+ if (setPhysReg(MI, i, PhysReg))
+ VirtDead.push_back(Reg);
+ }
+ }
+
+ // Restore UsedInInstr to a state usable for allocating normal virtual uses.
+ UsedInInstr.reset();
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ UsedInInstr.set(Reg);
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+ UsedInInstr.set(*AS);
+ }
+
+ // Also mark PartialDefs as used to avoid reallocation.
+ for (unsigned i = 0, e = PartialDefs.size(); i != e; ++i)
+ UsedInInstr.set(PartialDefs[i]);
+}
+
void RAFast::AllocateBasicBlock() {
DEBUG(dbgs() << "\nAllocating " << *MBB);
@@ -620,7 +714,7 @@ void RAFast::AllocateBasicBlock() {
E = MBB->livein_end(); I != E; ++I)
definePhysReg(MII, *I, regReserved);
- SmallVector<unsigned, 8> PhysECs, VirtDead;
+ SmallVector<unsigned, 8> VirtDead;
SmallVector<MachineInstr*, 32> Coalesced;
// Otherwise, sequentially allocate each instruction in the MBB.
@@ -670,8 +764,25 @@ void RAFast::AllocateBasicBlock() {
LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg);
if (LRI != LiveVirtRegs.end())
setPhysReg(MI, i, LRI->second.PhysReg);
- else
- MO.setReg(0); // We can't allocate a physreg for a DebugValue, sorry!
+ else {
+ int SS = StackSlotForVirtReg[Reg];
+ if (SS == -1)
+ MO.setReg(0); // We can't allocate a physreg for a DebugValue, sorry!
+ else {
+ // Modify DBG_VALUE now that the value is in a spill slot.
+ uint64_t Offset = MI->getOperand(1).getImm();
+ const MDNode *MDPtr =
+ MI->getOperand(MI->getNumOperands()-1).getMetadata();
+ DebugLoc DL = MI->getDebugLoc();
+ if (MachineInstr *NewDV =
+ TII->emitFrameIndexDebugValue(*MF, SS, Offset, MDPtr, DL)) {
+ DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
+ MachineBasicBlock *MBB = MI->getParent();
+ MBB->insert(MBB->erase(MI), NewDV);
+ } else
+ MO.setReg(0); // We can't allocate a physreg for a DebugValue, sorry!
+ }
+ }
}
// Next instruction.
continue;
@@ -679,17 +790,25 @@ void RAFast::AllocateBasicBlock() {
// If this is a copy, we may be able to coalesce.
unsigned CopySrc, CopyDst, CopySrcSub, CopyDstSub;
- if (!TII->isMoveInstr(*MI, CopySrc, CopyDst, CopySrcSub, CopyDstSub))
+ if (MI->isCopy()) {
+ CopyDst = MI->getOperand(0).getReg();
+ CopySrc = MI->getOperand(1).getReg();
+ CopyDstSub = MI->getOperand(0).getSubReg();
+ CopySrcSub = MI->getOperand(1).getSubReg();
+ } else if (!TII->isMoveInstr(*MI, CopySrc, CopyDst, CopySrcSub, CopyDstSub))
CopySrc = CopyDst = 0;
// Track registers used by instruction.
UsedInInstr.reset();
- PhysECs.clear();
// First scan.
// Mark physreg uses and early clobbers as used.
// Find the end of the virtreg operands
unsigned VirtOpEnd = 0;
+ bool hasTiedOps = false;
+ bool hasEarlyClobbers = false;
+ bool hasPartialRedefs = false;
+ bool hasPhysDefs = false;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg()) continue;
@@ -697,20 +816,44 @@ void RAFast::AllocateBasicBlock() {
if (!Reg) continue;
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
VirtOpEnd = i+1;
+ if (MO.isUse()) {
+ hasTiedOps = hasTiedOps ||
+ TID.getOperandConstraint(i, TOI::TIED_TO) != -1;
+ } else {
+ if (MO.isEarlyClobber())
+ hasEarlyClobbers = true;
+ if (MO.getSubReg() && MI->readsVirtualRegister(Reg))
+ hasPartialRedefs = true;
+ }
continue;
}
if (!Allocatable.test(Reg)) continue;
if (MO.isUse()) {
usePhysReg(MO);
} else if (MO.isEarlyClobber()) {
- definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved);
- PhysECs.push_back(Reg);
- }
+ definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ?
+ regFree : regReserved);
+ hasEarlyClobbers = true;
+ } else
+ hasPhysDefs = true;
+ }
+
+ // The instruction may have virtual register operands that must be allocated
+ // the same register at use-time and def-time: early clobbers and tied
+ // operands. If there are also physical defs, these registers must avoid
+ // both physical defs and uses, making them more constrained than normal
+ // operands.
+ // We didn't detect inline asm tied operands above, so just make this extra
+ // pass for all inline asm.
+ if (MI->isInlineAsm() || hasEarlyClobbers || hasPartialRedefs ||
+ (hasTiedOps && hasPhysDefs)) {
+ handleThroughOperands(MI, VirtDead);
+ // Don't attempt coalescing when we have funny stuff going on.
+ CopyDst = 0;
}
// Second scan.
- // Allocate virtreg uses and early clobbers.
- // Collect VirtKills
+ // Allocate virtreg uses.
for (unsigned i = 0; i != VirtOpEnd; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg()) continue;
@@ -722,12 +865,6 @@ void RAFast::AllocateBasicBlock() {
CopySrc = (CopySrc == Reg || CopySrc == PhysReg) ? PhysReg : 0;
if (setPhysReg(MI, i, PhysReg))
killVirtReg(LRI);
- } else if (MO.isEarlyClobber()) {
- // Note: defineVirtReg may invalidate MO.
- LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0);
- unsigned PhysReg = LRI->second.PhysReg;
- setPhysReg(MI, i, PhysReg);
- PhysECs.push_back(PhysReg);
}
}
@@ -735,12 +872,16 @@ void RAFast::AllocateBasicBlock() {
// Track registers defined by instruction - early clobbers at this point.
UsedInInstr.reset();
- for (unsigned i = 0, e = PhysECs.size(); i != e; ++i) {
- unsigned PhysReg = PhysECs[i];
- UsedInInstr.set(PhysReg);
- for (const unsigned *AS = TRI->getAliasSet(PhysReg);
- unsigned Alias = *AS; ++AS)
- UsedInInstr.set(Alias);
+ if (hasEarlyClobbers) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ UsedInInstr.set(Reg);
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+ UsedInInstr.set(*AS);
+ }
}
unsigned DefOpEnd = MI->getNumOperands();
@@ -752,13 +893,18 @@ void RAFast::AllocateBasicBlock() {
DefOpEnd = VirtOpEnd;
DEBUG(dbgs() << " Spilling remaining registers before call.\n");
spillAll(MI);
+
+ // The imp-defs are skipped below, but we still need to mark those
+ // registers as used by the function.
+ SkippedInstrs.insert(&TID);
}
// Third scan.
// Allocate defs and collect dead defs.
for (unsigned i = 0; i != DefOpEnd; ++i) {
MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isDef() || !MO.getReg()) continue;
+ if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber())
+ continue;
unsigned Reg = MO.getReg();
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
@@ -837,6 +983,14 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
// Make sure the set of used physregs is closed under subreg operations.
MRI->closePhysRegsUsed(*TRI);
+ // Add the clobber lists for all the instructions we skipped earlier.
+ for (SmallPtrSet<const TargetInstrDesc*, 4>::const_iterator
+ I = SkippedInstrs.begin(), E = SkippedInstrs.end(); I != E; ++I)
+ if (const unsigned *Defs = (*I)->getImplicitDefs())
+ while (*Defs)
+ MRI->setPhysRegUsed(*Defs++);
+
+ SkippedInstrs.clear();
StackSlotForVirtReg.clear();
return true;
}
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index bc331f0ff81d..044672d6d7a5 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -83,7 +83,8 @@ namespace {
// pressure, it can caused fewer GPRs to be held in the queue.
static cl::opt<unsigned>
NumRecentlyUsedRegs("linearscan-skip-count",
- cl::desc("Number of registers for linearscan to remember to skip."),
+ cl::desc("Number of registers for linearscan to remember"
+ "to skip."),
cl::init(0),
cl::Hidden);
@@ -421,9 +422,10 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
if (vni->def != SlotIndex() && vni->isDefAccurate() &&
(CopyMI = li_->getInstructionFromIndex(vni->def)) &&
- tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg))
+ (CopyMI->isCopy() ||
+ tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg)))
// Defined by a copy, try to extend SrcReg forward
- CandReg = SrcReg;
+ CandReg = CopyMI->isCopy() ? CopyMI->getOperand(1).getReg() : SrcReg;
else if (TrivCoalesceEnds &&
(CopyMI =
li_->getInstructionFromIndex(range.end.getBaseIndex())) &&
@@ -992,6 +994,24 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
if (Reg && allocatableRegs_[Reg] && RC->contains(Reg))
mri_->setRegAllocationHint(cur->reg, 0, Reg);
}
+ } else if (CopyMI && CopyMI->isCopy()) {
+ DstReg = CopyMI->getOperand(0).getReg();
+ DstSubReg = CopyMI->getOperand(0).getSubReg();
+ SrcReg = CopyMI->getOperand(1).getReg();
+ SrcSubReg = CopyMI->getOperand(1).getSubReg();
+ unsigned Reg = 0;
+ if (TargetRegisterInfo::isPhysicalRegister(SrcReg))
+ Reg = SrcReg;
+ else if (vrm_->isAssignedReg(SrcReg))
+ Reg = vrm_->getPhys(SrcReg);
+ if (Reg) {
+ if (SrcSubReg)
+ Reg = tri_->getSubReg(Reg, SrcSubReg);
+ if (DstSubReg)
+ Reg = tri_->getMatchingSuperReg(Reg, DstSubReg, RC);
+ if (Reg && allocatableRegs_[Reg] && RC->contains(Reg))
+ mri_->setRegAllocationHint(cur->reg, 0, Reg);
+ }
}
}
}
@@ -1206,8 +1226,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
DEBUG(dbgs() << "\t\t\tspilling(c): " << *cur << '\n');
SmallVector<LiveInterval*, 8> spillIs;
std::vector<LiveInterval*> added;
-
- added = spiller_->spill(cur, spillIs);
+ spiller_->spill(cur, added, spillIs);
std::sort(added.begin(), added.end(), LISorter());
addStackInterval(cur, ls_, li_, mri_, *vrm_);
@@ -1285,10 +1304,8 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
if (sli->beginIndex() < earliestStart)
earliestStart = sli->beginIndex();
- std::vector<LiveInterval*> newIs;
- newIs = spiller_->spill(sli, spillIs, &earliestStart);
+ spiller_->spill(sli, added, spillIs, &earliestStart);
addStackInterval(sli, ls_, li_, mri_, *vrm_);
- std::copy(newIs.begin(), newIs.end(), std::back_inserter(added));
spilled.insert(sli->reg);
}
diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp
deleted file mode 100644
index 321ae12def57..000000000000
--- a/lib/CodeGen/RegAllocLocal.cpp
+++ /dev/null
@@ -1,1254 +0,0 @@
-//===-- RegAllocLocal.cpp - A BasicBlock generic register allocator -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This register allocator allocates registers to a basic block at a time,
-// attempting to keep values in registers and reusing registers as appropriate.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "regalloc"
-#include "llvm/BasicBlock.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/RegAllocRegistry.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/IndexedMap.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
-#include <algorithm>
-using namespace llvm;
-
-STATISTIC(NumStores, "Number of stores added");
-STATISTIC(NumLoads , "Number of loads added");
-STATISTIC(NumCopies, "Number of copies coalesced");
-
-static RegisterRegAlloc
- localRegAlloc("local", "local register allocator",
- createLocalRegisterAllocator);
-
-namespace {
- class RALocal : public MachineFunctionPass {
- public:
- static char ID;
- RALocal() : MachineFunctionPass(&ID), StackSlotForVirtReg(-1) {}
- private:
- const TargetMachine *TM;
- MachineFunction *MF;
- MachineRegisterInfo *MRI;
- const TargetRegisterInfo *TRI;
- const TargetInstrInfo *TII;
-
- // StackSlotForVirtReg - Maps virtual regs to the frame index where these
- // values are spilled.
- IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
-
- // Virt2PhysRegMap - This map contains entries for each virtual register
- // that is currently available in a physical register.
- IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysRegMap;
-
- unsigned &getVirt2PhysRegMapSlot(unsigned VirtReg) {
- return Virt2PhysRegMap[VirtReg];
- }
-
- // PhysRegsUsed - This array is effectively a map, containing entries for
- // each physical register that currently has a value (ie, it is in
- // Virt2PhysRegMap). The value mapped to is the virtual register
- // corresponding to the physical register (the inverse of the
- // Virt2PhysRegMap), or 0. The value is set to 0 if this register is pinned
- // because it is used by a future instruction, and to -2 if it is not
- // allocatable. If the entry for a physical register is -1, then the
- // physical register is "not in the map".
- //
- std::vector<int> PhysRegsUsed;
-
- // PhysRegsUseOrder - This contains a list of the physical registers that
- // currently have a virtual register value in them. This list provides an
- // ordering of registers, imposing a reallocation order. This list is only
- // used if all registers are allocated and we have to spill one, in which
- // case we spill the least recently used register. Entries at the front of
- // the list are the least recently used registers, entries at the back are
- // the most recently used.
- //
- std::vector<unsigned> PhysRegsUseOrder;
-
- // Virt2LastUseMap - This maps each virtual register to its last use
- // (MachineInstr*, operand index pair).
- IndexedMap<std::pair<MachineInstr*, unsigned>, VirtReg2IndexFunctor>
- Virt2LastUseMap;
-
- std::pair<MachineInstr*,unsigned>& getVirtRegLastUse(unsigned Reg) {
- assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
- return Virt2LastUseMap[Reg];
- }
-
- // VirtRegModified - This bitset contains information about which virtual
- // registers need to be spilled back to memory when their registers are
- // scavenged. If a virtual register has simply been rematerialized, there
- // is no reason to spill it to memory when we need the register back.
- //
- BitVector VirtRegModified;
-
- // UsedInMultipleBlocks - Tracks whether a particular register is used in
- // more than one block.
- BitVector UsedInMultipleBlocks;
-
- void markVirtRegModified(unsigned Reg, bool Val = true) {
- assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
- Reg -= TargetRegisterInfo::FirstVirtualRegister;
- if (Val)
- VirtRegModified.set(Reg);
- else
- VirtRegModified.reset(Reg);
- }
-
- bool isVirtRegModified(unsigned Reg) const {
- assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
- assert(Reg - TargetRegisterInfo::FirstVirtualRegister <
- VirtRegModified.size() && "Illegal virtual register!");
- return VirtRegModified[Reg - TargetRegisterInfo::FirstVirtualRegister];
- }
-
- void AddToPhysRegsUseOrder(unsigned Reg) {
- std::vector<unsigned>::iterator It =
- std::find(PhysRegsUseOrder.begin(), PhysRegsUseOrder.end(), Reg);
- if (It != PhysRegsUseOrder.end())
- PhysRegsUseOrder.erase(It);
- PhysRegsUseOrder.push_back(Reg);
- }
-
- void MarkPhysRegRecentlyUsed(unsigned Reg) {
- if (PhysRegsUseOrder.empty() ||
- PhysRegsUseOrder.back() == Reg) return; // Already most recently used
-
- for (unsigned i = PhysRegsUseOrder.size(); i != 0; --i) {
- unsigned RegMatch = PhysRegsUseOrder[i-1]; // remove from middle
- if (!areRegsEqual(Reg, RegMatch)) continue;
-
- PhysRegsUseOrder.erase(PhysRegsUseOrder.begin()+i-1);
- // Add it to the end of the list
- PhysRegsUseOrder.push_back(RegMatch);
- if (RegMatch == Reg)
- return; // Found an exact match, exit early
- }
- }
-
- public:
- virtual const char *getPassName() const {
- return "Local Register Allocator";
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- AU.addRequiredID(PHIEliminationID);
- AU.addRequiredID(TwoAddressInstructionPassID);
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- private:
- /// runOnMachineFunction - Register allocate the whole function
- bool runOnMachineFunction(MachineFunction &Fn);
-
- /// AllocateBasicBlock - Register allocate the specified basic block.
- void AllocateBasicBlock(MachineBasicBlock &MBB);
-
-
- /// areRegsEqual - This method returns true if the specified registers are
- /// related to each other. To do this, it checks to see if they are equal
- /// or if the first register is in the alias set of the second register.
- ///
- bool areRegsEqual(unsigned R1, unsigned R2) const {
- if (R1 == R2) return true;
- for (const unsigned *AliasSet = TRI->getAliasSet(R2);
- *AliasSet; ++AliasSet) {
- if (*AliasSet == R1) return true;
- }
- return false;
- }
-
- /// getStackSpaceFor - This returns the frame index of the specified virtual
- /// register on the stack, allocating space if necessary.
- int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC);
-
- /// removePhysReg - This method marks the specified physical register as no
- /// longer being in use.
- ///
- void removePhysReg(unsigned PhysReg);
-
- void storeVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned VirtReg, unsigned PhysReg, bool isKill);
-
- /// spillVirtReg - This method spills the value specified by PhysReg into
- /// the virtual register slot specified by VirtReg. It then updates the RA
- /// data structures to indicate the fact that PhysReg is now available.
- ///
- void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- unsigned VirtReg, unsigned PhysReg);
-
- /// spillPhysReg - This method spills the specified physical register into
- /// the virtual register slot associated with it. If OnlyVirtRegs is set to
- /// true, then the request is ignored if the physical register does not
- /// contain a virtual register.
- ///
- void spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I,
- unsigned PhysReg, bool OnlyVirtRegs = false);
-
- /// assignVirtToPhysReg - This method updates local state so that we know
- /// that PhysReg is the proper container for VirtReg now. The physical
- /// register must not be used for anything else when this is called.
- ///
- void assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg);
-
- /// isPhysRegAvailable - Return true if the specified physical register is
- /// free and available for use. This also includes checking to see if
- /// aliased registers are all free...
- ///
- bool isPhysRegAvailable(unsigned PhysReg) const;
-
- /// getFreeReg - Look to see if there is a free register available in the
- /// specified register class. If not, return 0.
- ///
- unsigned getFreeReg(const TargetRegisterClass *RC);
-
- /// getReg - Find a physical register to hold the specified virtual
- /// register. If all compatible physical registers are used, this method
- /// spills the last used virtual register to the stack, and uses that
- /// register. If NoFree is true, that means the caller knows there isn't
- /// a free register, do not call getFreeReg().
- unsigned getReg(MachineBasicBlock &MBB, MachineInstr *MI,
- unsigned VirtReg, bool NoFree = false);
-
- /// reloadVirtReg - This method transforms the specified virtual
- /// register use to refer to a physical register. This method may do this
- /// in one of several ways: if the register is available in a physical
- /// register already, it uses that physical register. If the value is not
- /// in a physical register, and if there are physical registers available,
- /// it loads it into a register: PhysReg if that is an available physical
- /// register, otherwise any physical register of the right class.
- /// If register pressure is high, and it is possible, it tries to fold the
- /// load of the virtual register into the instruction itself. It avoids
- /// doing this if register pressure is low to improve the chance that
- /// subsequent instructions can use the reloaded value. This method
- /// returns the modified instruction.
- ///
- MachineInstr *reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
- unsigned OpNum, SmallSet<unsigned, 4> &RRegs,
- unsigned PhysReg);
-
- /// ComputeLocalLiveness - Computes liveness of registers within a basic
- /// block, setting the killed/dead flags as appropriate.
- void ComputeLocalLiveness(MachineBasicBlock& MBB);
-
- void reloadPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I,
- unsigned PhysReg);
- };
- char RALocal::ID = 0;
-}
-
-/// getStackSpaceFor - This allocates space for the specified virtual register
-/// to be held on the stack.
-int RALocal::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
- // Find the location Reg would belong...
- int SS = StackSlotForVirtReg[VirtReg];
- if (SS != -1)
- return SS; // Already has space allocated?
-
- // Allocate a new stack object for this spill location...
- int FrameIdx = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
- RC->getAlignment());
-
- // Assign the slot.
- StackSlotForVirtReg[VirtReg] = FrameIdx;
- return FrameIdx;
-}
-
-
-/// removePhysReg - This method marks the specified physical register as no
-/// longer being in use.
-///
-void RALocal::removePhysReg(unsigned PhysReg) {
- PhysRegsUsed[PhysReg] = -1; // PhyReg no longer used
-
- std::vector<unsigned>::iterator It =
- std::find(PhysRegsUseOrder.begin(), PhysRegsUseOrder.end(), PhysReg);
- if (It != PhysRegsUseOrder.end())
- PhysRegsUseOrder.erase(It);
-}
-
-/// storeVirtReg - Store a virtual register to its assigned stack slot.
-void RALocal::storeVirtReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned VirtReg, unsigned PhysReg,
- bool isKill) {
- const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
- int FrameIndex = getStackSpaceFor(VirtReg, RC);
- DEBUG(dbgs() << " to stack slot #" << FrameIndex);
- TII->storeRegToStackSlot(MBB, I, PhysReg, isKill, FrameIndex, RC, TRI);
- ++NumStores; // Update statistics
-
- // Mark the spill instruction as last use if we're not killing the register.
- if (!isKill) {
- MachineInstr *Spill = llvm::prior(I);
- int OpNum = Spill->findRegisterUseOperandIdx(PhysReg);
- if (OpNum < 0)
- getVirtRegLastUse(VirtReg) = std::make_pair((MachineInstr*)0, 0);
- else
- getVirtRegLastUse(VirtReg) = std::make_pair(Spill, OpNum);
- }
-}
-
-/// spillVirtReg - This method spills the value specified by PhysReg into the
-/// virtual register slot specified by VirtReg. It then updates the RA data
-/// structures to indicate the fact that PhysReg is now available.
-///
-void RALocal::spillVirtReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned VirtReg, unsigned PhysReg) {
- assert(VirtReg && "Spilling a physical register is illegal!"
- " Must not have appropriate kill for the register or use exists beyond"
- " the intended one.");
- DEBUG(dbgs() << " Spilling register " << TRI->getName(PhysReg)
- << " containing %reg" << VirtReg);
-
- if (!isVirtRegModified(VirtReg)) {
- DEBUG(dbgs() << " which has not been modified, so no store necessary!");
- std::pair<MachineInstr*, unsigned> &LastUse = getVirtRegLastUse(VirtReg);
- if (LastUse.first)
- LastUse.first->getOperand(LastUse.second).setIsKill();
- } else {
- // Otherwise, there is a virtual register corresponding to this physical
- // register. We only need to spill it into its stack slot if it has been
- // modified.
- // If the instruction reads the register that's spilled, (e.g. this can
- // happen if it is a move to a physical register), then the spill
- // instruction is not a kill.
- bool isKill = !(I != MBB.end() && I->readsRegister(PhysReg));
- storeVirtReg(MBB, I, VirtReg, PhysReg, isKill);
- }
-
- getVirt2PhysRegMapSlot(VirtReg) = 0; // VirtReg no longer available
-
- DEBUG(dbgs() << '\n');
- removePhysReg(PhysReg);
-}
-
-
-/// spillPhysReg - This method spills the specified physical register into the
-/// virtual register slot associated with it. If OnlyVirtRegs is set to true,
-/// then the request is ignored if the physical register does not contain a
-/// virtual register.
-///
-void RALocal::spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I,
- unsigned PhysReg, bool OnlyVirtRegs) {
- if (PhysRegsUsed[PhysReg] != -1) { // Only spill it if it's used!
- assert(PhysRegsUsed[PhysReg] != -2 && "Non allocable reg used!");
- if (PhysRegsUsed[PhysReg] || !OnlyVirtRegs)
- spillVirtReg(MBB, I, PhysRegsUsed[PhysReg], PhysReg);
- return;
- }
-
- // If the selected register aliases any other registers, we must make
- // sure that one of the aliases isn't alive.
- for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
- *AliasSet; ++AliasSet) {
- if (PhysRegsUsed[*AliasSet] == -1 || // Spill aliased register.
- PhysRegsUsed[*AliasSet] == -2) // If allocatable.
- continue;
-
- if (PhysRegsUsed[*AliasSet])
- spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet);
- }
-}
-
-
-/// assignVirtToPhysReg - This method updates local state so that we know
-/// that PhysReg is the proper container for VirtReg now. The physical
-/// register must not be used for anything else when this is called.
-///
-void RALocal::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) {
- assert(PhysRegsUsed[PhysReg] == -1 && "Phys reg already assigned!");
- // Update information to note the fact that this register was just used, and
- // it holds VirtReg.
- PhysRegsUsed[PhysReg] = VirtReg;
- getVirt2PhysRegMapSlot(VirtReg) = PhysReg;
- AddToPhysRegsUseOrder(PhysReg); // New use of PhysReg
-}
-
-
-/// isPhysRegAvailable - Return true if the specified physical register is free
-/// and available for use. This also includes checking to see if aliased
-/// registers are all free...
-///
-bool RALocal::isPhysRegAvailable(unsigned PhysReg) const {
- if (PhysRegsUsed[PhysReg] != -1) return false;
-
- // If the selected register aliases any other allocated registers, it is
- // not free!
- for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
- *AliasSet; ++AliasSet)
- if (PhysRegsUsed[*AliasSet] >= 0) // Aliased register in use?
- return false; // Can't use this reg then.
- return true;
-}
-
-
-/// getFreeReg - Look to see if there is a free register available in the
-/// specified register class. If not, return 0.
-///
-unsigned RALocal::getFreeReg(const TargetRegisterClass *RC) {
- // Get iterators defining the range of registers that are valid to allocate in
- // this class, which also specifies the preferred allocation order.
- TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF);
- TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF);
-
- for (; RI != RE; ++RI)
- if (isPhysRegAvailable(*RI)) { // Is reg unused?
- assert(*RI != 0 && "Cannot use register!");
- return *RI; // Found an unused register!
- }
- return 0;
-}
-
-
-/// getReg - Find a physical register to hold the specified virtual
-/// register. If all compatible physical registers are used, this method spills
-/// the last used virtual register to the stack, and uses that register.
-///
-unsigned RALocal::getReg(MachineBasicBlock &MBB, MachineInstr *I,
- unsigned VirtReg, bool NoFree) {
- const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
-
- // First check to see if we have a free register of the requested type...
- unsigned PhysReg = NoFree ? 0 : getFreeReg(RC);
-
- if (PhysReg != 0) {
- // Assign the register.
- assignVirtToPhysReg(VirtReg, PhysReg);
- return PhysReg;
- }
-
- // If we didn't find an unused register, scavenge one now!
- assert(!PhysRegsUseOrder.empty() && "No allocated registers??");
-
- // Loop over all of the preallocated registers from the least recently used
- // to the most recently used. When we find one that is capable of holding
- // our register, use it.
- for (unsigned i = 0; PhysReg == 0; ++i) {
- assert(i != PhysRegsUseOrder.size() &&
- "Couldn't find a register of the appropriate class!");
-
- unsigned R = PhysRegsUseOrder[i];
-
- // We can only use this register if it holds a virtual register (ie, it
- // can be spilled). Do not use it if it is an explicitly allocated
- // physical register!
- assert(PhysRegsUsed[R] != -1 &&
- "PhysReg in PhysRegsUseOrder, but is not allocated?");
- if (PhysRegsUsed[R] && PhysRegsUsed[R] != -2) {
- // If the current register is compatible, use it.
- if (RC->contains(R)) {
- PhysReg = R;
- break;
- }
-
- // If one of the registers aliased to the current register is
- // compatible, use it.
- for (const unsigned *AliasIt = TRI->getAliasSet(R);
- *AliasIt; ++AliasIt) {
- if (!RC->contains(*AliasIt)) continue;
-
- // If this is pinned down for some reason, don't use it. For
- // example, if CL is pinned, and we run across CH, don't use
- // CH as justification for using scavenging ECX (which will
- // fail).
- if (PhysRegsUsed[*AliasIt] == 0) continue;
-
- // Make sure the register is allocatable. Don't allocate SIL on
- // x86-32.
- if (PhysRegsUsed[*AliasIt] == -2) continue;
-
- PhysReg = *AliasIt; // Take an aliased register
- break;
- }
- }
- }
-
- assert(PhysReg && "Physical register not assigned!?!?");
-
- // At this point PhysRegsUseOrder[i] is the least recently used register of
- // compatible register class. Spill it to memory and reap its remains.
- spillPhysReg(MBB, I, PhysReg);
-
- // Now that we know which register we need to assign this to, do it now!
- assignVirtToPhysReg(VirtReg, PhysReg);
- return PhysReg;
-}
-
-
-/// reloadVirtReg - This method transforms the specified virtual
-/// register use to refer to a physical register. This method may do this in
-/// one of several ways: if the register is available in a physical register
-/// already, it uses that physical register. If the value is not in a physical
-/// register, and if there are physical registers available, it loads it into a
-/// register: PhysReg if that is an available physical register, otherwise any
-/// register. If register pressure is high, and it is possible, it tries to
-/// fold the load of the virtual register into the instruction itself. It
-/// avoids doing this if register pressure is low to improve the chance that
-/// subsequent instructions can use the reloaded value. This method returns
-/// the modified instruction.
-///
-MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
- unsigned OpNum,
- SmallSet<unsigned, 4> &ReloadedRegs,
- unsigned PhysReg) {
- unsigned VirtReg = MI->getOperand(OpNum).getReg();
- unsigned SubIdx = MI->getOperand(OpNum).getSubReg();
-
- // If the virtual register is already available, just update the instruction
- // and return.
- if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) {
- if (SubIdx) {
- PR = TRI->getSubReg(PR, SubIdx);
- MI->getOperand(OpNum).setSubReg(0);
- }
- MI->getOperand(OpNum).setReg(PR); // Assign the input register
- if (!MI->isDebugValue()) {
- // Do not do these for DBG_VALUE as they can affect codegen.
- MarkPhysRegRecentlyUsed(PR); // Already have this value available!
- getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum);
- }
- return MI;
- }
-
- // Otherwise, we need to fold it into the current instruction, or reload it.
- // If we have registers available to hold the value, use them.
- const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
- // If we already have a PhysReg (this happens when the instruction is a
- // reg-to-reg copy with a PhysReg destination) use that.
- if (!PhysReg || !TargetRegisterInfo::isPhysicalRegister(PhysReg) ||
- !isPhysRegAvailable(PhysReg))
- PhysReg = getFreeReg(RC);
- int FrameIndex = getStackSpaceFor(VirtReg, RC);
-
- if (PhysReg) { // Register is available, allocate it!
- assignVirtToPhysReg(VirtReg, PhysReg);
- } else { // No registers available.
- // Force some poor hapless value out of the register file to
- // make room for the new register, and reload it.
- PhysReg = getReg(MBB, MI, VirtReg, true);
- }
-
- markVirtRegModified(VirtReg, false); // Note that this reg was just reloaded
-
- DEBUG(dbgs() << " Reloading %reg" << VirtReg << " into "
- << TRI->getName(PhysReg) << "\n");
-
- // Add move instruction(s)
- TII->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC, TRI);
- ++NumLoads; // Update statistics
-
- MF->getRegInfo().setPhysRegUsed(PhysReg);
- // Assign the input register.
- if (SubIdx) {
- MI->getOperand(OpNum).setSubReg(0);
- MI->getOperand(OpNum).setReg(TRI->getSubReg(PhysReg, SubIdx));
- } else
- MI->getOperand(OpNum).setReg(PhysReg); // Assign the input register
- getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum);
-
- if (!ReloadedRegs.insert(PhysReg)) {
- std::string msg;
- raw_string_ostream Msg(msg);
- Msg << "Ran out of registers during register allocation!";
- if (MI->isInlineAsm()) {
- Msg << "\nPlease check your inline asm statement for invalid "
- << "constraints:\n";
- MI->print(Msg, TM);
- }
- report_fatal_error(Msg.str());
- }
- for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg);
- *SubRegs; ++SubRegs) {
- if (ReloadedRegs.insert(*SubRegs)) continue;
-
- std::string msg;
- raw_string_ostream Msg(msg);
- Msg << "Ran out of registers during register allocation!";
- if (MI->isInlineAsm()) {
- Msg << "\nPlease check your inline asm statement for invalid "
- << "constraints:\n";
- MI->print(Msg, TM);
- }
- report_fatal_error(Msg.str());
- }
-
- return MI;
-}
-
-/// isReadModWriteImplicitKill - True if this is an implicit kill for a
-/// read/mod/write register, i.e. update partial register.
-static bool isReadModWriteImplicitKill(MachineInstr *MI, unsigned Reg) {
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() &&
- MO.isDef() && !MO.isDead())
- return true;
- }
- return false;
-}
-
-/// isReadModWriteImplicitDef - True if this is an implicit def for a
-/// read/mod/write register, i.e. update partial register.
-static bool isReadModWriteImplicitDef(MachineInstr *MI, unsigned Reg) {
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() &&
- !MO.isDef() && MO.isKill())
- return true;
- }
- return false;
-}
-
-// precedes - Helper function to determine with MachineInstr A
-// precedes MachineInstr B within the same MBB.
-static bool precedes(MachineBasicBlock::iterator A,
- MachineBasicBlock::iterator B) {
- if (A == B)
- return false;
-
- MachineBasicBlock::iterator I = A->getParent()->begin();
- while (I != A->getParent()->end()) {
- if (I == A)
- return true;
- else if (I == B)
- return false;
-
- ++I;
- }
-
- return false;
-}
-
-/// ComputeLocalLiveness - Computes liveness of registers within a basic
-/// block, setting the killed/dead flags as appropriate.
-void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
- // Keep track of the most recently seen previous use or def of each reg,
- // so that we can update them with dead/kill markers.
- DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > LastUseDef;
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I) {
- if (I->isDebugValue())
- continue;
-
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = I->getOperand(i);
- // Uses don't trigger any flags, but we need to save
- // them for later. Also, we have to process these
- // _before_ processing the defs, since an instr
- // uses regs before it defs them.
- if (!MO.isReg() || !MO.getReg() || !MO.isUse())
- continue;
-
- // Ignore helpful kill flags from earlier passes.
- MO.setIsKill(false);
-
- LastUseDef[MO.getReg()] = std::make_pair(I, i);
-
- if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue;
-
- const unsigned *Aliases = TRI->getAliasSet(MO.getReg());
- if (Aliases == 0)
- continue;
-
- while (*Aliases) {
- DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator
- alias = LastUseDef.find(*Aliases);
-
- if (alias != LastUseDef.end() && alias->second.first != I)
- LastUseDef[*Aliases] = std::make_pair(I, i);
-
- ++Aliases;
- }
- }
-
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = I->getOperand(i);
- // Defs others than 2-addr redefs _do_ trigger flag changes:
- // - A def followed by a def is dead
- // - A use followed by a def is a kill
- if (!MO.isReg() || !MO.getReg() || !MO.isDef()) continue;
-
- unsigned SubIdx = MO.getSubReg();
- DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator
- last = LastUseDef.find(MO.getReg());
- if (last != LastUseDef.end()) {
- // Check if this is a two address instruction. If so, then
- // the def does not kill the use.
- if (last->second.first == I && I->isRegTiedToUseOperand(i))
- continue;
-
- MachineOperand &lastUD =
- last->second.first->getOperand(last->second.second);
- if (SubIdx && lastUD.getSubReg() != SubIdx)
- // Partial re-def, the last def is not dead.
- // %reg1024:5<def> =
- // %reg1024:6<def> =
- // or
- // %reg1024:5<def> = op %reg1024, 5
- continue;
-
- if (lastUD.isDef())
- lastUD.setIsDead(true);
- else
- lastUD.setIsKill(true);
- }
-
- LastUseDef[MO.getReg()] = std::make_pair(I, i);
- }
- }
-
- // Live-out (of the function) registers contain return values of the function,
- // so we need to make sure they are alive at return time.
- MachineBasicBlock::iterator Ret = MBB.getFirstTerminator();
- bool BBEndsInReturn = (Ret != MBB.end() && Ret->getDesc().isReturn());
-
- if (BBEndsInReturn)
- for (MachineRegisterInfo::liveout_iterator
- I = MF->getRegInfo().liveout_begin(),
- E = MF->getRegInfo().liveout_end(); I != E; ++I)
- if (!Ret->readsRegister(*I)) {
- Ret->addOperand(MachineOperand::CreateReg(*I, false, true));
- LastUseDef[*I] = std::make_pair(Ret, Ret->getNumOperands()-1);
- }
-
- // Finally, loop over the final use/def of each reg
- // in the block and determine if it is dead.
- for (DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator
- I = LastUseDef.begin(), E = LastUseDef.end(); I != E; ++I) {
- MachineInstr *MI = I->second.first;
- unsigned idx = I->second.second;
- MachineOperand &MO = MI->getOperand(idx);
-
- bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(MO.getReg());
-
- // A crude approximation of "live-out" calculation
- bool usedOutsideBlock = isPhysReg ? false :
- UsedInMultipleBlocks.test(MO.getReg() -
- TargetRegisterInfo::FirstVirtualRegister);
-
- // If the machine BB ends in a return instruction, then the value isn't used
- // outside of the BB.
- if (!isPhysReg && (!usedOutsideBlock || BBEndsInReturn)) {
- // DBG_VALUE complicates this: if the only refs of a register outside
- // this block are DBG_VALUE, we can't keep the reg live just for that,
- // as it will cause the reg to be spilled at the end of this block when
- // it wouldn't have been otherwise. Nullify the DBG_VALUEs when that
- // happens.
- bool UsedByDebugValueOnly = false;
- for (MachineRegisterInfo::reg_iterator UI = MRI->reg_begin(MO.getReg()),
- UE = MRI->reg_end(); UI != UE; ++UI) {
- // Two cases:
- // - used in another block
- // - used in the same block before it is defined (loop)
- if (UI->getParent() == &MBB &&
- !(MO.isDef() && UI.getOperand().isUse() && precedes(&*UI, MI)))
- continue;
-
- if (UI->isDebugValue()) {
- UsedByDebugValueOnly = true;
- continue;
- }
-
- // A non-DBG_VALUE use means we can leave DBG_VALUE uses alone.
- UsedInMultipleBlocks.set(MO.getReg() -
- TargetRegisterInfo::FirstVirtualRegister);
- usedOutsideBlock = true;
- UsedByDebugValueOnly = false;
- break;
- }
-
- if (UsedByDebugValueOnly)
- for (MachineRegisterInfo::reg_iterator UI = MRI->reg_begin(MO.getReg()),
- UE = MRI->reg_end(); UI != UE; ++UI)
- if (UI->isDebugValue() &&
- (UI->getParent() != &MBB ||
- (MO.isDef() && precedes(&*UI, MI))))
- UI.getOperand().setReg(0U);
- }
-
- // Physical registers and those that are not live-out of the block are
- // killed/dead at their last use/def within this block.
- if (isPhysReg || !usedOutsideBlock || BBEndsInReturn) {
- if (MO.isUse()) {
- // Don't mark uses that are tied to defs as kills.
- if (!MI->isRegTiedToDefOperand(idx))
- MO.setIsKill(true);
- } else {
- MO.setIsDead(true);
- }
- }
- }
-}
-
-void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
- // loop over each instruction
- MachineBasicBlock::iterator MII = MBB.begin();
-
- DEBUG({
- const BasicBlock *LBB = MBB.getBasicBlock();
- if (LBB)
- dbgs() << "\nStarting RegAlloc of BB: " << LBB->getName();
- });
-
- // Add live-in registers as active.
- for (MachineBasicBlock::livein_iterator I = MBB.livein_begin(),
- E = MBB.livein_end(); I != E; ++I) {
- unsigned Reg = *I;
- MF->getRegInfo().setPhysRegUsed(Reg);
- PhysRegsUsed[Reg] = 0; // It is free and reserved now
- AddToPhysRegsUseOrder(Reg);
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
- *SubRegs; ++SubRegs) {
- if (PhysRegsUsed[*SubRegs] == -2) continue;
-
- AddToPhysRegsUseOrder(*SubRegs);
- PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now
- MF->getRegInfo().setPhysRegUsed(*SubRegs);
- }
- }
-
- ComputeLocalLiveness(MBB);
-
- // Otherwise, sequentially allocate each instruction in the MBB.
- while (MII != MBB.end()) {
- MachineInstr *MI = MII++;
- const TargetInstrDesc &TID = MI->getDesc();
- DEBUG({
- dbgs() << "\nStarting RegAlloc of: " << *MI;
- dbgs() << " Regs have values: ";
- for (unsigned i = 0; i != TRI->getNumRegs(); ++i)
- if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) {
- if (PhysRegsUsed[i] && isVirtRegModified(PhysRegsUsed[i]))
- dbgs() << "*";
- dbgs() << "[" << TRI->getName(i)
- << ",%reg" << PhysRegsUsed[i] << "] ";
- }
- dbgs() << '\n';
- });
-
- // Determine whether this is a copy instruction. The cases where the
- // source or destination are phys regs are handled specially.
- unsigned SrcCopyReg, DstCopyReg, SrcCopySubReg, DstCopySubReg;
- unsigned SrcCopyPhysReg = 0U;
- bool isCopy = TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg,
- SrcCopySubReg, DstCopySubReg) &&
- SrcCopySubReg == DstCopySubReg;
- if (isCopy && TargetRegisterInfo::isVirtualRegister(SrcCopyReg))
- SrcCopyPhysReg = getVirt2PhysRegMapSlot(SrcCopyReg);
-
- // Loop over the implicit uses, making sure that they are at the head of the
- // use order list, so they don't get reallocated.
- if (TID.ImplicitUses) {
- for (const unsigned *ImplicitUses = TID.ImplicitUses;
- *ImplicitUses; ++ImplicitUses)
- MarkPhysRegRecentlyUsed(*ImplicitUses);
- }
-
- SmallVector<unsigned, 8> Kills;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isKill()) continue;
-
- if (!MO.isImplicit())
- Kills.push_back(MO.getReg());
- else if (!isReadModWriteImplicitKill(MI, MO.getReg()))
- // These are extra physical register kills when a sub-register
- // is defined (def of a sub-register is a read/mod/write of the
- // larger registers). Ignore.
- Kills.push_back(MO.getReg());
- }
-
- // If any physical regs are earlyclobber, spill any value they might
- // have in them, then mark them unallocatable.
- // If any virtual regs are earlyclobber, allocate them now (before
- // freeing inputs that are killed).
- if (MI->isInlineAsm()) {
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isDef() || !MO.isEarlyClobber() ||
- !MO.getReg())
- continue;
-
- if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
- unsigned DestVirtReg = MO.getReg();
- unsigned DestPhysReg;
-
- // If DestVirtReg already has a value, use it.
- if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg)))
- DestPhysReg = getReg(MBB, MI, DestVirtReg);
- MF->getRegInfo().setPhysRegUsed(DestPhysReg);
- markVirtRegModified(DestVirtReg);
- getVirtRegLastUse(DestVirtReg) =
- std::make_pair((MachineInstr*)0, 0);
- DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg)
- << " to %reg" << DestVirtReg << "\n");
- if (unsigned DestSubIdx = MO.getSubReg()) {
- MO.setSubReg(0);
- DestPhysReg = TRI->getSubReg(DestPhysReg, DestSubIdx);
- }
- MO.setReg(DestPhysReg); // Assign the earlyclobber register
- } else {
- unsigned Reg = MO.getReg();
- if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP.
- // These are extra physical register defs when a sub-register
- // is defined (def of a sub-register is a read/mod/write of the
- // larger registers). Ignore.
- if (isReadModWriteImplicitDef(MI, MO.getReg())) continue;
-
- MF->getRegInfo().setPhysRegUsed(Reg);
- spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg
- PhysRegsUsed[Reg] = 0; // It is free and reserved now
- AddToPhysRegsUseOrder(Reg);
-
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
- *SubRegs; ++SubRegs) {
- if (PhysRegsUsed[*SubRegs] == -2) continue;
- MF->getRegInfo().setPhysRegUsed(*SubRegs);
- PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now
- AddToPhysRegsUseOrder(*SubRegs);
- }
- }
- }
- }
-
- // If a DBG_VALUE says something is located in a spilled register,
- // change the DBG_VALUE to be undef, which prevents the register
- // from being reloaded here. Doing that would change the generated
- // code, unless another use immediately follows this instruction.
- if (MI->isDebugValue() &&
- MI->getNumOperands()==3 && MI->getOperand(0).isReg()) {
- unsigned VirtReg = MI->getOperand(0).getReg();
- if (VirtReg && TargetRegisterInfo::isVirtualRegister(VirtReg) &&
- !getVirt2PhysRegMapSlot(VirtReg))
- MI->getOperand(0).setReg(0U);
- }
-
- // Get the used operands into registers. This has the potential to spill
- // incoming values if we are out of registers. Note that we completely
- // ignore physical register uses here. We assume that if an explicit
- // physical register is referenced by the instruction, that it is guaranteed
- // to be live-in, or the input is badly hosed.
- //
- SmallSet<unsigned, 4> ReloadedRegs;
- for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
- MachineOperand &MO = MI->getOperand(i);
- // here we are looking for only used operands (never def&use)
- if (MO.isReg() && !MO.isDef() && MO.getReg() && !MO.isImplicit() &&
- TargetRegisterInfo::isVirtualRegister(MO.getReg()))
- MI = reloadVirtReg(MBB, MI, i, ReloadedRegs,
- isCopy ? DstCopyReg : 0);
- }
-
- // If this instruction is the last user of this register, kill the
- // value, freeing the register being used, so it doesn't need to be
- // spilled to memory.
- //
- for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
- unsigned VirtReg = Kills[i];
- unsigned PhysReg = VirtReg;
- if (TargetRegisterInfo::isVirtualRegister(VirtReg)) {
- // If the virtual register was never materialized into a register, it
- // might not be in the map, but it won't hurt to zero it out anyway.
- unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg);
- PhysReg = PhysRegSlot;
- PhysRegSlot = 0;
- } else if (PhysRegsUsed[PhysReg] == -2) {
- // Unallocatable register dead, ignore.
- continue;
- } else {
- assert((!PhysRegsUsed[PhysReg] || PhysRegsUsed[PhysReg] == -1) &&
- "Silently clearing a virtual register?");
- }
-
- if (!PhysReg) continue;
-
- DEBUG(dbgs() << " Last use of " << TRI->getName(PhysReg)
- << "[%reg" << VirtReg <<"], removing it from live set\n");
- removePhysReg(PhysReg);
- for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg);
- *SubRegs; ++SubRegs) {
- if (PhysRegsUsed[*SubRegs] != -2) {
- DEBUG(dbgs() << " Last use of "
- << TRI->getName(*SubRegs) << "[%reg" << VirtReg
- <<"], removing it from live set\n");
- removePhysReg(*SubRegs);
- }
- }
- }
-
- // Loop over all of the operands of the instruction, spilling registers that
- // are defined, and marking explicit destinations in the PhysRegsUsed map.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isDef() || MO.isImplicit() || !MO.getReg() ||
- MO.isEarlyClobber() ||
- !TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
- continue;
-
- unsigned Reg = MO.getReg();
- if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP.
- // These are extra physical register defs when a sub-register
- // is defined (def of a sub-register is a read/mod/write of the
- // larger registers). Ignore.
- if (isReadModWriteImplicitDef(MI, MO.getReg())) continue;
-
- MF->getRegInfo().setPhysRegUsed(Reg);
- spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg
- PhysRegsUsed[Reg] = 0; // It is free and reserved now
- AddToPhysRegsUseOrder(Reg);
-
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
- *SubRegs; ++SubRegs) {
- if (PhysRegsUsed[*SubRegs] == -2) continue;
-
- MF->getRegInfo().setPhysRegUsed(*SubRegs);
- PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now
- AddToPhysRegsUseOrder(*SubRegs);
- }
- }
-
- // Loop over the implicit defs, spilling them as well.
- if (TID.ImplicitDefs) {
- for (const unsigned *ImplicitDefs = TID.ImplicitDefs;
- *ImplicitDefs; ++ImplicitDefs) {
- unsigned Reg = *ImplicitDefs;
- if (PhysRegsUsed[Reg] != -2) {
- spillPhysReg(MBB, MI, Reg, true);
- AddToPhysRegsUseOrder(Reg);
- PhysRegsUsed[Reg] = 0; // It is free and reserved now
- }
- MF->getRegInfo().setPhysRegUsed(Reg);
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
- *SubRegs; ++SubRegs) {
- if (PhysRegsUsed[*SubRegs] == -2) continue;
-
- AddToPhysRegsUseOrder(*SubRegs);
- PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now
- MF->getRegInfo().setPhysRegUsed(*SubRegs);
- }
- }
- }
-
- SmallVector<unsigned, 8> DeadDefs;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.isDead())
- DeadDefs.push_back(MO.getReg());
- }
-
- // Okay, we have allocated all of the source operands and spilled any values
- // that would be destroyed by defs of this instruction. Loop over the
- // explicit defs and assign them to a register, spilling incoming values if
- // we need to scavenge a register.
- //
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isDef() || !MO.getReg() ||
- MO.isEarlyClobber() ||
- !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
- continue;
-
- unsigned DestVirtReg = MO.getReg();
- unsigned DestPhysReg;
-
- // If DestVirtReg already has a value, use it.
- if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) {
- // If this is a copy try to reuse the input as the output;
- // that will make the copy go away.
- // If this is a copy, the source reg is a phys reg, and
- // that reg is available, use that phys reg for DestPhysReg.
- // If this is a copy, the source reg is a virtual reg, and
- // the phys reg that was assigned to that virtual reg is now
- // available, use that phys reg for DestPhysReg. (If it's now
- // available that means this was the last use of the source.)
- if (isCopy &&
- TargetRegisterInfo::isPhysicalRegister(SrcCopyReg) &&
- isPhysRegAvailable(SrcCopyReg)) {
- DestPhysReg = SrcCopyReg;
- assignVirtToPhysReg(DestVirtReg, DestPhysReg);
- } else if (isCopy &&
- TargetRegisterInfo::isVirtualRegister(SrcCopyReg) &&
- SrcCopyPhysReg && isPhysRegAvailable(SrcCopyPhysReg) &&
- MF->getRegInfo().getRegClass(DestVirtReg)->
- contains(SrcCopyPhysReg)) {
- DestPhysReg = SrcCopyPhysReg;
- assignVirtToPhysReg(DestVirtReg, DestPhysReg);
- } else
- DestPhysReg = getReg(MBB, MI, DestVirtReg);
- }
- MF->getRegInfo().setPhysRegUsed(DestPhysReg);
- markVirtRegModified(DestVirtReg);
- getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0);
- DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg)
- << " to %reg" << DestVirtReg << "\n");
-
- if (unsigned DestSubIdx = MO.getSubReg()) {
- MO.setSubReg(0);
- DestPhysReg = TRI->getSubReg(DestPhysReg, DestSubIdx);
- }
- MO.setReg(DestPhysReg); // Assign the output register
- }
-
- // If this instruction defines any registers that are immediately dead,
- // kill them now.
- //
- for (unsigned i = 0, e = DeadDefs.size(); i != e; ++i) {
- unsigned VirtReg = DeadDefs[i];
- unsigned PhysReg = VirtReg;
- if (TargetRegisterInfo::isVirtualRegister(VirtReg)) {
- unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg);
- PhysReg = PhysRegSlot;
- assert(PhysReg != 0);
- PhysRegSlot = 0;
- } else if (PhysRegsUsed[PhysReg] == -2) {
- // Unallocatable register dead, ignore.
- continue;
- } else if (!PhysReg)
- continue;
-
- DEBUG(dbgs() << " Register " << TRI->getName(PhysReg)
- << " [%reg" << VirtReg
- << "] is never used, removing it from live set\n");
- removePhysReg(PhysReg);
- for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
- *AliasSet; ++AliasSet) {
- if (PhysRegsUsed[*AliasSet] != -2) {
- DEBUG(dbgs() << " Register " << TRI->getName(*AliasSet)
- << " [%reg" << *AliasSet
- << "] is never used, removing it from live set\n");
- removePhysReg(*AliasSet);
- }
- }
- }
-
- // If this instruction is a call, make sure there are no dirty registers. The
- // call might throw an exception, and the landing pad expects to find all
- // registers in stack slots.
- if (TID.isCall())
- for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) {
- if (PhysRegsUsed[i] <= 0) continue;
- unsigned VirtReg = PhysRegsUsed[i];
- if (!isVirtRegModified(VirtReg)) continue;
- DEBUG(dbgs() << " Storing dirty %reg" << VirtReg);
- storeVirtReg(MBB, MI, VirtReg, i, false);
- markVirtRegModified(VirtReg, false);
- DEBUG(dbgs() << " because the call might throw\n");
- }
-
- // Finally, if this is a noop copy instruction, zap it. (Except that if
- // the copy is dead, it must be kept to avoid messing up liveness info for
- // the register scavenger. See pr4100.)
- if (TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg,
- SrcCopySubReg, DstCopySubReg) &&
- SrcCopyReg == DstCopyReg && SrcCopySubReg == DstCopySubReg &&
- DeadDefs.empty()) {
- ++NumCopies;
- MBB.erase(MI);
- }
- }
-
- MachineBasicBlock::iterator MI = MBB.getFirstTerminator();
-
- // Spill all physical registers holding virtual registers now.
- for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i)
- if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) {
- if (unsigned VirtReg = PhysRegsUsed[i])
- spillVirtReg(MBB, MI, VirtReg, i);
- else
- removePhysReg(i);
- }
-
-#if 0
- // This checking code is very expensive.
- bool AllOk = true;
- for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
- e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i)
- if (unsigned PR = Virt2PhysRegMap[i]) {
- cerr << "Register still mapped: " << i << " -> " << PR << "\n";
- AllOk = false;
- }
- assert(AllOk && "Virtual registers still in phys regs?");
-#endif
-
- // Clear any physical register which appear live at the end of the basic
- // block, but which do not hold any virtual registers. e.g., the stack
- // pointer.
- PhysRegsUseOrder.clear();
-}
-
-/// runOnMachineFunction - Register allocate the whole function
-///
-bool RALocal::runOnMachineFunction(MachineFunction &Fn) {
- DEBUG(dbgs() << "Machine Function\n");
- MF = &Fn;
- MRI = &Fn.getRegInfo();
- TM = &Fn.getTarget();
- TRI = TM->getRegisterInfo();
- TII = TM->getInstrInfo();
-
- PhysRegsUsed.assign(TRI->getNumRegs(), -1);
-
- // At various places we want to efficiently check to see whether a register
- // is allocatable. To handle this, we mark all unallocatable registers as
- // being pinned down, permanently.
- {
- BitVector Allocable = TRI->getAllocatableSet(Fn);
- for (unsigned i = 0, e = Allocable.size(); i != e; ++i)
- if (!Allocable[i])
- PhysRegsUsed[i] = -2; // Mark the reg unallocable.
- }
-
- // initialize the virtual->physical register map to have a 'null'
- // mapping for all virtual registers
- unsigned LastVirtReg = MF->getRegInfo().getLastVirtReg();
- StackSlotForVirtReg.grow(LastVirtReg);
- Virt2PhysRegMap.grow(LastVirtReg);
- Virt2LastUseMap.grow(LastVirtReg);
- VirtRegModified.resize(LastVirtReg+1 -
- TargetRegisterInfo::FirstVirtualRegister);
- UsedInMultipleBlocks.resize(LastVirtReg+1 -
- TargetRegisterInfo::FirstVirtualRegister);
-
- // Loop over all of the basic blocks, eliminating virtual register references
- for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
- MBB != MBBe; ++MBB)
- AllocateBasicBlock(*MBB);
-
- StackSlotForVirtReg.clear();
- PhysRegsUsed.clear();
- VirtRegModified.clear();
- UsedInMultipleBlocks.clear();
- Virt2PhysRegMap.clear();
- Virt2LastUseMap.clear();
- return true;
-}
-
-FunctionPass *llvm::createLocalRegisterAllocator() {
- return new RALocal();
-}
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 4fafd2818a12..7e61a12a7eea 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -396,28 +396,23 @@ PBQPRegAlloc::CoalesceMap PBQPRegAlloc::findCoalesces() {
if (srcRegIsPhysical && dstRegIsPhysical)
continue;
- // If it's a copy that includes a virtual register but the source and
- // destination classes differ then we can't coalesce, so continue with
- // the next instruction.
- const TargetRegisterClass *srcRegClass = srcRegIsPhysical ?
- tri->getPhysicalRegisterRegClass(srcReg) : mri->getRegClass(srcReg);
-
- const TargetRegisterClass *dstRegClass = dstRegIsPhysical ?
- tri->getPhysicalRegisterRegClass(dstReg) : mri->getRegClass(dstReg);
-
- if (srcRegClass != dstRegClass)
+ // If it's a copy that includes two virtual register but the source and
+ // destination classes differ then we can't coalesce.
+ if (!srcRegIsPhysical && !dstRegIsPhysical &&
+ mri->getRegClass(srcReg) != mri->getRegClass(dstReg))
continue;
- // We also need any physical regs to be allocable, coalescing with
- // a non-allocable register is invalid.
- if (srcRegIsPhysical) {
+ // If one is physical and one is virtual, check that the physical is
+ // allocatable in the class of the virtual.
+ if (srcRegIsPhysical && !dstRegIsPhysical) {
+ const TargetRegisterClass *dstRegClass = mri->getRegClass(dstReg);
if (std::find(dstRegClass->allocation_order_begin(*mf),
dstRegClass->allocation_order_end(*mf), srcReg) ==
dstRegClass->allocation_order_end(*mf))
continue;
}
-
- if (dstRegIsPhysical) {
+ if (!srcRegIsPhysical && dstRegIsPhysical) {
+ const TargetRegisterClass *srcRegClass = mri->getRegClass(srcReg);
if (std::find(srcRegClass->allocation_order_begin(*mf),
srcRegClass->allocation_order_end(*mf), dstReg) ==
srcRegClass->allocation_order_end(*mf))
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 1131e3db4e7d..ab0bc2d78a60 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -16,6 +16,8 @@
#include "llvm/CodeGen/RegisterCoalescer.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Pass.h"
@@ -33,6 +35,160 @@ char RegisterCoalescer::ID = 0;
//
RegisterCoalescer::~RegisterCoalescer() {}
+unsigned CoalescerPair::compose(unsigned a, unsigned b) const {
+ if (!a) return b;
+ if (!b) return a;
+ return tri_.composeSubRegIndices(a, b);
+}
+
+bool CoalescerPair::isMoveInstr(const MachineInstr *MI,
+ unsigned &Src, unsigned &Dst,
+ unsigned &SrcSub, unsigned &DstSub) const {
+ if (MI->isCopy()) {
+ Dst = MI->getOperand(0).getReg();
+ DstSub = MI->getOperand(0).getSubReg();
+ Src = MI->getOperand(1).getReg();
+ SrcSub = MI->getOperand(1).getSubReg();
+ } else if (MI->isSubregToReg()) {
+ Dst = MI->getOperand(0).getReg();
+ DstSub = compose(MI->getOperand(0).getSubReg(), MI->getOperand(3).getImm());
+ Src = MI->getOperand(2).getReg();
+ SrcSub = MI->getOperand(2).getSubReg();
+ } else if (!tii_.isMoveInstr(*MI, Src, Dst, SrcSub, DstSub)) {
+ return false;
+ }
+ return true;
+}
+
+bool CoalescerPair::setRegisters(const MachineInstr *MI) {
+ srcReg_ = dstReg_ = subIdx_ = 0;
+ newRC_ = 0;
+ flipped_ = crossClass_ = false;
+
+ unsigned Src, Dst, SrcSub, DstSub;
+ if (!isMoveInstr(MI, Src, Dst, SrcSub, DstSub))
+ return false;
+ partial_ = SrcSub || DstSub;
+
+ // If one register is a physreg, it must be Dst.
+ if (TargetRegisterInfo::isPhysicalRegister(Src)) {
+ if (TargetRegisterInfo::isPhysicalRegister(Dst))
+ return false;
+ std::swap(Src, Dst);
+ std::swap(SrcSub, DstSub);
+ flipped_ = true;
+ }
+
+ const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+
+ if (TargetRegisterInfo::isPhysicalRegister(Dst)) {
+ // Eliminate DstSub on a physreg.
+ if (DstSub) {
+ Dst = tri_.getSubReg(Dst, DstSub);
+ if (!Dst) return false;
+ DstSub = 0;
+ }
+
+ // Eliminate SrcSub by picking a corresponding Dst superregister.
+ if (SrcSub) {
+ Dst = tri_.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src));
+ if (!Dst) return false;
+ SrcSub = 0;
+ } else if (!MRI.getRegClass(Src)->contains(Dst)) {
+ return false;
+ }
+ } else {
+ // Both registers are virtual.
+
+ // Both registers have subreg indices.
+ if (SrcSub && DstSub) {
+ // For now we only handle the case of identical indices in commensurate
+ // registers: Dreg:ssub_1 + Dreg:ssub_1 -> Dreg
+ // FIXME: Handle Qreg:ssub_3 + Dreg:ssub_1 as QReg:dsub_1 + Dreg.
+ if (SrcSub != DstSub)
+ return false;
+ const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
+ const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
+ if (!getCommonSubClass(DstRC, SrcRC))
+ return false;
+ SrcSub = DstSub = 0;
+ }
+
+ // There can be no SrcSub.
+ if (SrcSub) {
+ std::swap(Src, Dst);
+ DstSub = SrcSub;
+ SrcSub = 0;
+ assert(!flipped_ && "Unexpected flip");
+ flipped_ = true;
+ }
+
+ // Find the new register class.
+ const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
+ const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
+ if (DstSub)
+ newRC_ = tri_.getMatchingSuperRegClass(DstRC, SrcRC, DstSub);
+ else
+ newRC_ = getCommonSubClass(DstRC, SrcRC);
+ if (!newRC_)
+ return false;
+ crossClass_ = newRC_ != DstRC || newRC_ != SrcRC;
+ }
+ // Check our invariants
+ assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual");
+ assert(!(TargetRegisterInfo::isPhysicalRegister(Dst) && DstSub) &&
+ "Cannot have a physical SubIdx");
+ srcReg_ = Src;
+ dstReg_ = Dst;
+ subIdx_ = DstSub;
+ return true;
+}
+
+bool CoalescerPair::flip() {
+ if (subIdx_ || TargetRegisterInfo::isPhysicalRegister(dstReg_))
+ return false;
+ std::swap(srcReg_, dstReg_);
+ flipped_ = !flipped_;
+ return true;
+}
+
+bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
+ if (!MI)
+ return false;
+ unsigned Src, Dst, SrcSub, DstSub;
+ if (!isMoveInstr(MI, Src, Dst, SrcSub, DstSub))
+ return false;
+
+ // Find the virtual register that is srcReg_.
+ if (Dst == srcReg_) {
+ std::swap(Src, Dst);
+ std::swap(SrcSub, DstSub);
+ } else if (Src != srcReg_) {
+ return false;
+ }
+
+ // Now check that Dst matches dstReg_.
+ if (TargetRegisterInfo::isPhysicalRegister(dstReg_)) {
+ if (!TargetRegisterInfo::isPhysicalRegister(Dst))
+ return false;
+ assert(!subIdx_ && "Inconsistent CoalescerPair state.");
+ // DstSub could be set for a physreg from INSERT_SUBREG.
+ if (DstSub)
+ Dst = tri_.getSubReg(Dst, DstSub);
+ // Full copy of Src.
+ if (!SrcSub)
+ return dstReg_ == Dst;
+ // This is a partial register copy. Check that the parts match.
+ return tri_.getSubReg(dstReg_, SrcSub) == Dst;
+ } else {
+ // dstReg_ is virtual.
+ if (dstReg_ != Dst)
+ return false;
+ // Registers match, do the subregisters line up?
+ return compose(subIdx_, SrcSub) == DstSub;
+ }
+}
+
// Because of the way .a files work, we must force the SimpleRC
// implementation to be pulled in if the RegisterCoalescer classes are
// pulled in. Otherwise we run the risk of RegisterCoalescer being
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 690e59f96ebc..43b3fb642635 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -141,6 +141,10 @@ void RegScavenger::forward() {
// Find out which registers are early clobbered, killed, defined, and marked
// def-dead in this instruction.
+ // FIXME: The scavenger is not predication aware. If the instruction is
+ // predicated, conservatively assume "kill" markers do not actually kill the
+ // register. Similarly ignores "dead" markers.
+ bool isPred = TII->isPredicated(MI);
BitVector EarlyClobberRegs(NumPhysRegs);
BitVector KillRegs(NumPhysRegs);
BitVector DefRegs(NumPhysRegs);
@@ -155,11 +159,11 @@ void RegScavenger::forward() {
if (MO.isUse()) {
// Two-address operands implicitly kill.
- if (MO.isKill() || MI->isRegTiedToDefOperand(i))
+ if (!isPred && (MO.isKill() || MI->isRegTiedToDefOperand(i)))
addRegWithSubRegs(KillRegs, Reg);
} else {
assert(MO.isDef());
- if (MO.isDead())
+ if (!isPred && MO.isDead())
addRegWithSubRegs(DeadRegs, Reg);
else
addRegWithSubRegs(DefRegs, Reg);
@@ -238,8 +242,18 @@ unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
return 0;
}
+/// getRegsAvailable - Return all available registers in the register class
+/// in Mask.
+void RegScavenger::getRegsAvailable(const TargetRegisterClass *RC,
+ BitVector &Mask) {
+ for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+ I != E; ++I)
+ if (!isAliasUsed(*I))
+ Mask.set(*I);
+}
+
/// findSurvivorReg - Return the candidate register that is unused for the
-/// longest after MBBI. UseMI is set to the instruction where the search
+/// longest after StargMII. UseMI is set to the instruction where the search
/// stopped.
///
/// No more than InstrLimit instructions are inspected.
@@ -258,6 +272,10 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
bool inVirtLiveRange = false;
for (++MI; InstrLimit > 0 && MI != ME; ++MI, --InstrLimit) {
+ if (MI->isDebugValue()) {
+ ++InstrLimit; // Don't count debug instructions
+ continue;
+ }
bool isVirtKillInsn = false;
bool isVirtDefInsn = false;
// Remove any candidates touched by instruction.
@@ -321,13 +339,16 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
Candidates.reset(MO.getReg());
}
+ // Try to find a register that's unused if there is one, as then we won't
+ // have to spill.
+ if ((Candidates & RegsAvailable).any())
+ Candidates &= RegsAvailable;
+
// Find the register whose use is furthest away.
MachineBasicBlock::iterator UseMI;
unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI);
- // If we found an unused register there is no reason to spill it. We have
- // probably found a callee-saved register that has been saved in the
- // prologue, but happens to be unused at this point.
+ // If we found an unused register there is no reason to spill it.
if (!isAliasUsed(SReg))
return SReg;
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index da20c12c360a..7d39dc496afe 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -380,26 +380,26 @@ void ScheduleDAG::VerifySchedule(bool isBottomUp) {
}
#endif
-/// InitDAGTopologicalSorting - create the initial topological
+/// InitDAGTopologicalSorting - create the initial topological
/// ordering from the DAG to be scheduled.
///
-/// The idea of the algorithm is taken from
+/// The idea of the algorithm is taken from
/// "Online algorithms for managing the topological order of
/// a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly
-/// This is the MNR algorithm, which was first introduced by
-/// A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in
+/// This is the MNR algorithm, which was first introduced by
+/// A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in
/// "Maintaining a topological order under edge insertions".
///
-/// Short description of the algorithm:
+/// Short description of the algorithm:
///
/// Topological ordering, ord, of a DAG maps each node to a topological
/// index so that for all edges X->Y it is the case that ord(X) < ord(Y).
///
-/// This means that if there is a path from the node X to the node Z,
+/// This means that if there is a path from the node X to the node Z,
/// then ord(X) < ord(Z).
///
/// This property can be used to check for reachability of nodes:
-/// if Z is reachable from X, then an insertion of the edge Z->X would
+/// if Z is reachable from X, then an insertion of the edge Z->X would
/// create a cycle.
///
/// The algorithm first computes a topological ordering for the DAG by
@@ -431,7 +431,7 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
// Collect leaf nodes.
WorkList.push_back(SU);
}
- }
+ }
int Id = DAGSize;
while (!WorkList.empty()) {
@@ -456,7 +456,7 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
SUnit *SU = &SUnits[i];
for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
- assert(Node2Index[SU->NodeNum] > Node2Index[I->getSUnit()->NodeNum] &&
+ assert(Node2Index[SU->NodeNum] > Node2Index[I->getSUnit()->NodeNum] &&
"Wrong topological sorting");
}
}
@@ -494,7 +494,7 @@ void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) {
void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound,
bool& HasLoop) {
std::vector<const SUnit*> WorkList;
- WorkList.reserve(SUnits.size());
+ WorkList.reserve(SUnits.size());
WorkList.push_back(SU);
do {
@@ -504,20 +504,20 @@ void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound,
for (int I = SU->Succs.size()-1; I >= 0; --I) {
int s = SU->Succs[I].getSUnit()->NodeNum;
if (Node2Index[s] == UpperBound) {
- HasLoop = true;
+ HasLoop = true;
return;
}
// Visit successors if not already and in affected region.
if (!Visited.test(s) && Node2Index[s] < UpperBound) {
WorkList.push_back(SU->Succs[I].getSUnit());
- }
- }
+ }
+ }
} while (!WorkList.empty());
}
-/// Shift - Renumber the nodes so that the topological ordering is
+/// Shift - Renumber the nodes so that the topological ordering is
/// preserved.
-void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound,
+void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound,
int UpperBound) {
std::vector<int> L;
int shift = 0;
@@ -568,7 +568,7 @@ bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU,
// Is Ord(TargetSU) < Ord(SU) ?
if (LowerBound < UpperBound) {
Visited.reset();
- // There may be a path from TargetSU to SU. Check for it.
+ // There may be a path from TargetSU to SU. Check for it.
DFS(TargetSU, UpperBound, HasLoop);
}
return HasLoop;
@@ -580,8 +580,7 @@ void ScheduleDAGTopologicalSort::Allocate(int n, int index) {
Index2Node[index] = n;
}
-ScheduleDAGTopologicalSort::ScheduleDAGTopologicalSort(
- std::vector<SUnit> &sunits)
- : SUnits(sunits) {}
+ScheduleDAGTopologicalSort::
+ScheduleDAGTopologicalSort(std::vector<SUnit> &sunits) : SUnits(sunits) {}
ScheduleHazardRecognizer::~ScheduleHazardRecognizer() {}
diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp
index ee08e1dc0eea..0a2fb3796a42 100644
--- a/lib/CodeGen/ScheduleDAGEmit.cpp
+++ b/lib/CodeGen/ScheduleDAGEmit.cpp
@@ -50,11 +50,8 @@ void ScheduleDAG::EmitPhysRegCopy(SUnit *SU,
break;
}
}
- bool Success = TII->copyRegToReg(*BB, InsertPos, Reg, VRI->second,
- SU->CopyDstRC, SU->CopySrcRC,
- DebugLoc());
- (void)Success;
- assert(Success && "copyRegToReg failed!");
+ BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg)
+ .addReg(VRI->second);
} else {
// Copy from physical register.
assert(I->getReg() && "Unknown physical register!");
@@ -62,11 +59,8 @@ void ScheduleDAG::EmitPhysRegCopy(SUnit *SU,
bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
isNew = isNew; // Silence compiler warning.
assert(isNew && "Node emitted out of order - early");
- bool Success = TII->copyRegToReg(*BB, InsertPos, VRBase, I->getReg(),
- SU->CopyDstRC, SU->CopySrcRC,
- DebugLoc());
- (void)Success;
- assert(Success && "copyRegToReg failed!");
+ BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase)
+ .addReg(I->getReg());
}
break;
}
diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h
index ad82db28f88b..d90659bb163e 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.h
+++ b/lib/CodeGen/ScheduleDAGInstrs.h
@@ -69,8 +69,10 @@ namespace llvm {
const SmallSet<unsigned, 8> &LoopLiveIns) {
unsigned Count = 0;
for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
- I != E; ++I, ++Count) {
+ I != E; ++I) {
const MachineInstr *MI = I;
+ if (MI->isDebugValue())
+ continue;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || !MO.isUse())
@@ -79,6 +81,7 @@ namespace llvm {
if (LoopLiveIns.count(MOReg))
Deps.insert(std::make_pair(MOReg, std::make_pair(&MO, Count)));
}
+ ++Count; // Not every iteration due to dbg_value above.
}
const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
index 0cfd5e1d7e21..799988a4c862 100644
--- a/lib/CodeGen/SelectionDAG/CMakeLists.txt
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -1,5 +1,4 @@
add_llvm_library(LLVMSelectionDAG
- CallingConvLower.cpp
DAGCombiner.cpp
FastISel.cpp
FunctionLoweringInfo.cpp
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6bddd784fe86..e67175246457 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -211,6 +211,7 @@ namespace {
SDValue visitBUILD_VECTOR(SDNode *N);
SDValue visitCONCAT_VECTORS(SDNode *N);
SDValue visitVECTOR_SHUFFLE(SDNode *N);
+ SDValue visitMEMBARRIER(SDNode *N);
SDValue XformToShuffleWithZero(SDNode *N);
SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS);
@@ -668,7 +669,7 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD)
: LD->getExtensionType();
Replace = true;
- return DAG.getExtLoad(ExtType, dl, PVT,
+ return DAG.getExtLoad(ExtType, PVT, dl,
LD->getChain(), LD->getBasePtr(),
LD->getSrcValue(), LD->getSrcValueOffset(),
MemVT, LD->isVolatile(),
@@ -890,7 +891,7 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD)
: LD->getExtensionType();
- SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
+ SDValue NewLD = DAG.getExtLoad(ExtType, PVT, dl,
LD->getChain(), LD->getBasePtr(),
LD->getSrcValue(), LD->getSrcValueOffset(),
MemVT, LD->isVolatile(),
@@ -1079,6 +1080,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
+ case ISD::MEMBARRIER: return visitMEMBARRIER(N);
}
return SDValue();
}
@@ -1313,7 +1315,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C &&
GA->getOpcode() == ISD::GlobalAddress)
- return DAG.getGlobalAddress(GA->getGlobal(), VT,
+ return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT,
GA->getOffset() +
(uint64_t)N1C->getSExtValue());
// fold ((c1-A)+c2) -> (c1+c2)-A
@@ -1550,7 +1552,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
// fold (sub Sym, c) -> Sym-c
if (N1C && GA->getOpcode() == ISD::GlobalAddress)
- return DAG.getGlobalAddress(GA->getGlobal(), VT,
+ return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT,
GA->getOffset() -
(uint64_t)N1C->getSExtValue());
// fold (sub Sym+c1, Sym+c2) -> c1-c2
@@ -2028,7 +2030,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
// fold (OP (zext x), (zext y)) -> (zext (OP x, y))
// fold (OP (sext x), (sext y)) -> (sext (OP x, y))
// fold (OP (aext x), (aext y)) -> (aext (OP x, y))
- // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y))
+ // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
//
// do not sink logical op inside of a vector extend, since it may combine
// into a vsetcc.
@@ -2038,7 +2040,10 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
// Avoid infinite looping with PromoteIntBinOp.
(N0.getOpcode() == ISD::ANY_EXTEND &&
(!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
- (N0.getOpcode() == ISD::TRUNCATE && TLI.isTypeLegal(Op0VT))) &&
+ (N0.getOpcode() == ISD::TRUNCATE &&
+ (!TLI.isZExtFree(VT, Op0VT) ||
+ !TLI.isTruncateFree(Op0VT, VT)) &&
+ TLI.isTypeLegal(Op0VT))) &&
!VT.isVector() &&
Op0VT == N1.getOperand(0).getValueType() &&
(!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
@@ -2193,7 +2198,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
BitWidth - MemVT.getScalarType().getSizeInBits())) &&
((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N0.getDebugLoc(),
LN0->getChain(), LN0->getBasePtr(),
LN0->getSrcValue(),
LN0->getSrcValueOffset(), MemVT,
@@ -2216,7 +2221,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
BitWidth - MemVT.getScalarType().getSizeInBits())) &&
((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N0.getDebugLoc(),
LN0->getChain(),
LN0->getBasePtr(), LN0->getSrcValue(),
LN0->getSrcValueOffset(), MemVT,
@@ -2250,7 +2255,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
SDValue NewLoad =
- DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
+ DAG.getExtLoad(ISD::ZEXTLOAD, LoadResultTy, LN0->getDebugLoc(),
LN0->getChain(), LN0->getBasePtr(),
LN0->getSrcValue(), LN0->getSrcValueOffset(),
ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
@@ -2286,7 +2291,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
SDValue Load =
- DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
+ DAG.getExtLoad(ISD::ZEXTLOAD, LoadResultTy, LN0->getDebugLoc(),
LN0->getChain(), NewPtr,
LN0->getSrcValue(), LN0->getSrcValueOffset(),
ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
@@ -2317,7 +2322,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
}
// fold (or x, undef) -> -1
- if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) {
+ if (!LegalOperations &&
+ (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) {
EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
}
@@ -2425,6 +2431,11 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc()))
return SDValue(Rot, 0);
+ // Simplify the operands using demanded-bits information.
+ if (!VT.isVector() &&
+ SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
return SDValue();
}
@@ -3158,6 +3169,11 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return NewSRL;
}
+ // Attempt to convert a srl of a load into a narrower zero-extending load.
+ SDValue NarrowLoad = ReduceLoadWidth(N);
+ if (NarrowLoad.getNode())
+ return NarrowLoad;
+
// Here is a common situation. We want to optimize:
//
// %a = ...
@@ -3487,7 +3503,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(),
LN0->getChain(),
LN0->getBasePtr(), LN0->getSrcValue(),
LN0->getSrcValueOffset(),
@@ -3531,7 +3547,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
EVT MemVT = LN0->getMemoryVT();
if ((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(),
LN0->getChain(),
LN0->getBasePtr(), LN0->getSrcValue(),
LN0->getSrcValueOffset(), MemVT,
@@ -3557,24 +3573,24 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// we know that the element size of the sext'd result matches the
// element size of the compare operands.
if (VT.getSizeInBits() == N0VT.getSizeInBits())
- return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
- N0.getOperand(1),
- cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
// If the desired elements are smaller or larger than the source
// elements we can use a matching integer vector type and then
// truncate/sign extend
else {
- EVT MatchingElementType =
- EVT::getIntegerVT(*DAG.getContext(),
- N0VT.getScalarType().getSizeInBits());
- EVT MatchingVectorType =
- EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
- N0VT.getVectorNumElements());
- SDValue VsetCC =
- DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
- N0.getOperand(1),
- cast<CondCodeSDNode>(N0.getOperand(2))->get());
- return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
+ EVT MatchingElementType =
+ EVT::getIntegerVT(*DAG.getContext(),
+ N0VT.getScalarType().getSizeInBits());
+ EVT MatchingVectorType =
+ EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
+ N0VT.getVectorNumElements());
+ SDValue VsetCC =
+ DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
}
}
@@ -3635,10 +3651,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// fold (zext (truncate x)) -> (and x, mask)
if (N0.getOpcode() == ISD::TRUNCATE &&
- (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) &&
- (!TLI.isTruncateFree(N0.getOperand(0).getValueType(),
- N0.getValueType()) ||
- !TLI.isZExtFree(N0.getValueType(), VT))) {
+ (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
SDValue Op = N0.getOperand(0);
if (Op.getValueType().bitsLT(VT)) {
Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op);
@@ -3679,7 +3692,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N->getDebugLoc(),
LN0->getChain(),
LN0->getBasePtr(), LN0->getSrcValue(),
LN0->getSrcValueOffset(),
@@ -3723,7 +3736,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
EVT MemVT = LN0->getMemoryVT();
if ((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N->getDebugLoc(),
LN0->getChain(),
LN0->getBasePtr(), LN0->getSrcValue(),
LN0->getSrcValueOffset(), MemVT,
@@ -3881,7 +3894,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
+ SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, N->getDebugLoc(),
LN0->getChain(),
LN0->getBasePtr(), LN0->getSrcValue(),
LN0->getSrcValueOffset(),
@@ -3925,8 +3938,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
N0.hasOneUse()) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
EVT MemVT = LN0->getMemoryVT();
- SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(),
- VT, LN0->getChain(), LN0->getBasePtr(),
+ SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), VT,
+ N->getDebugLoc(),
+ LN0->getChain(), LN0->getBasePtr(),
LN0->getSrcValue(),
LN0->getSrcValueOffset(), MemVT,
LN0->isVolatile(), LN0->isNonTemporal(),
@@ -3950,24 +3964,24 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
// we know that the element size of the sext'd result matches the
// element size of the compare operands.
if (VT.getSizeInBits() == N0VT.getSizeInBits())
- return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
- N0.getOperand(1),
- cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
// If the desired elements are smaller or larger than the source
// elements we can use a matching integer vector type and then
// truncate/sign extend
else {
- EVT MatchingElementType =
- EVT::getIntegerVT(*DAG.getContext(),
- N0VT.getScalarType().getSizeInBits());
- EVT MatchingVectorType =
- EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
- N0VT.getVectorNumElements());
- SDValue VsetCC =
- DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
- N0.getOperand(1),
- cast<CondCodeSDNode>(N0.getOperand(2))->get());
- return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
+ EVT MatchingElementType =
+ EVT::getIntegerVT(*DAG.getContext(),
+ N0VT.getScalarType().getSizeInBits());
+ EVT MatchingVectorType =
+ EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
+ N0VT.getVectorNumElements());
+ SDValue VsetCC =
+ DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
}
}
@@ -4024,6 +4038,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
/// extended, also fold the extension to form a extending load.
SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
unsigned Opc = N->getOpcode();
+
ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -4040,6 +4055,15 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, ExtVT))
return SDValue();
+ } else if (Opc == ISD::SRL) {
+ // Annother special-case: SRL is basically zero-extending a narrower
+ // value.
+ ExtType = ISD::ZEXTLOAD;
+ N0 = SDValue(N, 0);
+ ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!N01) return SDValue();
+ ExtVT = EVT::getIntegerVT(*DAG.getContext(),
+ VT.getSizeInBits() - N01->getZExtValue());
}
unsigned EVTBits = ExtVT.getSizeInBits();
@@ -4085,7 +4109,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
LN0->isVolatile(), LN0->isNonTemporal(), NewAlign)
- : DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(), NewPtr,
+ : DAG.getExtLoad(ExtType, VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
NewAlign);
@@ -4172,7 +4196,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(),
LN0->getChain(),
LN0->getBasePtr(), LN0->getSrcValue(),
LN0->getSrcValueOffset(), EVT,
@@ -4189,7 +4213,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(),
LN0->getChain(),
LN0->getBasePtr(), LN0->getSrcValue(),
LN0->getSrcValueOffset(), EVT,
@@ -4243,8 +4267,17 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// fold (truncate (load x)) -> (smaller load x)
// fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
- if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT))
- return ReduceLoadWidth(N);
+ if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
+ SDValue Reduced = ReduceLoadWidth(N);
+ if (Reduced.getNode())
+ return Reduced;
+ }
+
+ // Simplify the operands using demanded-bits information.
+ if (!VT.isVector() &&
+ SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
return SDValue();
}
@@ -4943,7 +4976,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
+ SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, N->getDebugLoc(),
LN0->getChain(),
LN0->getBasePtr(), LN0->getSrcValue(),
LN0->getSrcValueOffset(),
@@ -5527,8 +5560,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
if (Align > LD->getAlignment())
- return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
- LD->getValueType(0),
+ return DAG.getExtLoad(LD->getExtensionType(), LD->getValueType(0),
+ N->getDebugLoc(),
Chain, Ptr, LD->getSrcValue(),
LD->getSrcValueOffset(), LD->getMemoryVT(),
LD->isVolatile(), LD->isNonTemporal(), Align);
@@ -5551,8 +5584,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
LD->isVolatile(), LD->isNonTemporal(),
LD->getAlignment());
} else {
- ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(),
- LD->getValueType(0),
+ ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getValueType(0),
+ LD->getDebugLoc(),
BetterChain, Ptr, LD->getSrcValue(),
LD->getSrcValueOffset(),
LD->getMemoryVT(),
@@ -6077,7 +6110,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// Check if the result type doesn't match the inserted element type. A
// SCALAR_TO_VECTOR may truncate the inserted element and the
// EXTRACT_VECTOR_ELT may widen the extracted vector.
- EVT EltVT = InVec.getValueType().getVectorElementType();
SDValue InOp = InVec.getOperand(0);
EVT NVT = N->getValueType(0);
if (InOp.getValueType() != NVT) {
@@ -6277,8 +6309,6 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
}
SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
- return SDValue();
-
EVT VT = N->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
@@ -6334,6 +6364,59 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitMEMBARRIER(SDNode* N) {
+ if (!TLI.getShouldFoldAtomicFences())
+ return SDValue();
+
+ SDValue atomic = N->getOperand(0);
+ switch (atomic.getOpcode()) {
+ case ISD::ATOMIC_CMP_SWAP:
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ break;
+ default:
+ return SDValue();
+ }
+
+ SDValue fence = atomic.getOperand(0);
+ if (fence.getOpcode() != ISD::MEMBARRIER)
+ return SDValue();
+
+ switch (atomic.getOpcode()) {
+ case ISD::ATOMIC_CMP_SWAP:
+ return SDValue(DAG.UpdateNodeOperands(atomic.getNode(),
+ fence.getOperand(0),
+ atomic.getOperand(1), atomic.getOperand(2),
+ atomic.getOperand(3)), atomic.getResNo());
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ return SDValue(DAG.UpdateNodeOperands(atomic.getNode(),
+ fence.getOperand(0),
+ atomic.getOperand(1), atomic.getOperand(2)),
+ atomic.getResNo());
+ default:
+ return SDValue();
+ }
+}
+
/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform
/// an AND to a vector_shuffle with the destination vector and a zero vector.
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
@@ -6565,8 +6648,8 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
LLD->getAlignment());
} else {
Load = DAG.getExtLoad(LLD->getExtensionType(),
- TheSelect->getDebugLoc(),
TheSelect->getValueType(0),
+ TheSelect->getDebugLoc(),
LLD->getChain(), Addr, 0, 0,
LLD->getMemoryVT(),
LLD->isVolatile(),
@@ -6807,38 +6890,34 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
}
}
- // Check to see if this is an integer abs. select_cc setl[te] X, 0, -X, X ->
+ // Check to see if this is an integer abs.
+ // select_cc setg[te] X, 0, X, -X ->
+ // select_cc setgt X, -1, X, -X ->
+ // select_cc setl[te] X, 0, -X, X ->
+ // select_cc setlt X, 1, -X, X ->
// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
- if (N1C && N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE) &&
- N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1) &&
- N2.getOperand(0) == N1 && N0.getValueType().isInteger()) {
+ if (N1C) {
+ ConstantSDNode *SubC = NULL;
+ if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
+ (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
+ N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
+ SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
+ else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
+ (N1C->isOne() && CC == ISD::SETLT)) &&
+ N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
+ SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
+
EVT XType = N0.getValueType();
- SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, N0,
- DAG.getConstant(XType.getSizeInBits()-1,
- getShiftAmountTy()));
- SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), XType,
- N0, Shift);
- AddToWorkList(Shift.getNode());
- AddToWorkList(Add.getNode());
- return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
- }
- // Check to see if this is an integer abs. select_cc setgt X, -1, X, -X ->
- // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
- if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT &&
- N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) {
- if (ConstantSDNode *SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0))) {
- EVT XType = N0.getValueType();
- if (SubC->isNullValue() && XType.isInteger()) {
- SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType,
- N0,
- DAG.getConstant(XType.getSizeInBits()-1,
- getShiftAmountTy()));
- SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(),
- XType, N0, Shift);
- AddToWorkList(Shift.getNode());
- AddToWorkList(Add.getNode());
- return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
- }
+ if (SubC && SubC->isNullValue() && XType.isInteger()) {
+ SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType,
+ N0,
+ DAG.getConstant(XType.getSizeInBits()-1,
+ getShiftAmountTy()));
+ SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(),
+ XType, N0, Shift);
+ AddToWorkList(Shift.getNode());
+ AddToWorkList(Add.getNode());
+ return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
}
}
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 95f4d072e01f..3f7e4a5fac42 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -44,18 +44,38 @@
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/ErrorHandling.h"
-#include "FunctionLoweringInfo.h"
using namespace llvm;
+/// startNewBlock - Set the current block to which generated machine
+/// instructions will be appended, and clear the local CSE map.
+///
+void FastISel::startNewBlock() {
+ LocalValueMap.clear();
+
+ // Start out as null, meaining no local-value instructions have
+ // been emitted.
+ LastLocalValue = 0;
+
+ // Advance the last local value past any EH_LABEL instructions.
+ MachineBasicBlock::iterator
+ I = FuncInfo.MBB->begin(), E = FuncInfo.MBB->end();
+ while (I != E && I->getOpcode() == TargetOpcode::EH_LABEL) {
+ LastLocalValue = I;
+ ++I;
+ }
+}
+
bool FastISel::hasTrivialKill(const Value *V) const {
// Don't consider constants or arguments to have trivial kills.
const Instruction *I = dyn_cast<Instruction>(V);
@@ -99,25 +119,31 @@ unsigned FastISel::getRegForValue(const Value *V) {
// cache values defined by Instructions across blocks, and other values
// only locally. This is because Instructions already have the SSA
// def-dominates-use requirement enforced.
- DenseMap<const Value *, unsigned>::iterator I = ValueMap.find(V);
- if (I != ValueMap.end())
- return I->second;
+ DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V);
+ if (I != FuncInfo.ValueMap.end()) {
+ unsigned Reg = I->second;
+ return Reg;
+ }
unsigned Reg = LocalValueMap[V];
if (Reg != 0)
return Reg;
// In bottom-up mode, just create the virtual register which will be used
// to hold the value. It will be materialized later.
- if (IsBottomUp) {
- Reg = createResultReg(TLI.getRegClassFor(VT));
- if (isa<Instruction>(V))
- ValueMap[V] = Reg;
- else
- LocalValueMap[V] = Reg;
- return Reg;
- }
+ if (isa<Instruction>(V) &&
+ (!isa<AllocaInst>(V) ||
+ !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(V))))
+ return FuncInfo.InitializeRegForValue(V);
+
+ MachineBasicBlock::iterator SaveInsertPt = enterLocalValueArea();
+
+ // Materialize the value in a register. Emit any instructions in the
+ // local value area.
+ Reg = materializeRegForValue(V, VT);
- return materializeRegForValue(V, VT);
+ leaveLocalValueArea(SaveInsertPt);
+
+ return Reg;
}
/// materializeRegForValue - Helper for getRegForVale. This function is
@@ -161,11 +187,15 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
}
}
} else if (const Operator *Op = dyn_cast<Operator>(V)) {
- if (!SelectOperator(Op, Op->getOpcode())) return 0;
- Reg = LocalValueMap[Op];
+ if (!SelectOperator(Op, Op->getOpcode()))
+ if (!isa<Instruction>(Op) ||
+ !TargetSelectInstruction(cast<Instruction>(Op)))
+ return 0;
+ Reg = lookUpRegForValue(Op);
} else if (isa<UndefValue>(V)) {
Reg = createResultReg(TLI.getRegClassFor(VT));
- BuildMI(MBB, DL, TII.get(TargetOpcode::IMPLICIT_DEF), Reg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::IMPLICIT_DEF), Reg);
}
// If target-independent code couldn't handle the value, give target-specific
@@ -175,8 +205,10 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
// Don't cache constant materializations in the general ValueMap.
// To do so would require tracking what uses they dominate.
- if (Reg != 0)
+ if (Reg != 0) {
LocalValueMap[V] = Reg;
+ LastLocalValue = MRI.getVRegDef(Reg);
+ }
return Reg;
}
@@ -185,8 +217,9 @@ unsigned FastISel::lookUpRegForValue(const Value *V) {
// cache values defined by Instructions across blocks, and other values
// only locally. This is because Instructions already have the SSA
// def-dominates-use requirement enforced.
- if (ValueMap.count(V))
- return ValueMap[V];
+ DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V);
+ if (I != FuncInfo.ValueMap.end())
+ return I->second;
return LocalValueMap[V];
}
@@ -202,14 +235,17 @@ unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) {
return Reg;
}
- unsigned &AssignedReg = ValueMap[I];
+ unsigned &AssignedReg = FuncInfo.ValueMap[I];
if (AssignedReg == 0)
+ // Use the new register.
AssignedReg = Reg;
else if (Reg != AssignedReg) {
- const TargetRegisterClass *RegClass = MRI.getRegClass(Reg);
- TII.copyRegToReg(*MBB, MBB->end(), AssignedReg,
- Reg, RegClass, RegClass, DL);
+ // Arrange for uses of AssignedReg to be replaced by uses of Reg.
+ FuncInfo.RegFixups[AssignedReg] = Reg;
+
+ AssignedReg = Reg;
}
+
return AssignedReg;
}
@@ -237,6 +273,33 @@ std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) {
return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
}
+void FastISel::recomputeInsertPt() {
+ if (getLastLocalValue()) {
+ FuncInfo.InsertPt = getLastLocalValue();
+ ++FuncInfo.InsertPt;
+ } else
+ FuncInfo.InsertPt = FuncInfo.MBB->getFirstNonPHI();
+
+ // Now skip past any EH_LABELs, which must remain at the beginning.
+ while (FuncInfo.InsertPt != FuncInfo.MBB->end() &&
+ FuncInfo.InsertPt->getOpcode() == TargetOpcode::EH_LABEL)
+ ++FuncInfo.InsertPt;
+}
+
+MachineBasicBlock::iterator FastISel::enterLocalValueArea() {
+ MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt;
+ recomputeInsertPt();
+ return OldInsertPt;
+}
+
+void FastISel::leaveLocalValueArea(MachineBasicBlock::iterator OldInsertPt) {
+ if (FuncInfo.InsertPt != FuncInfo.MBB->begin())
+ LastLocalValue = llvm::prior(FuncInfo.InsertPt);
+
+ // Restore the previous insert position.
+ FuncInfo.InsertPt = OldInsertPt;
+}
+
/// SelectBinaryOp - Select and emit code for a binary operator instruction,
/// which has an opcode which directly corresponds to the given ISD opcode.
///
@@ -345,7 +408,7 @@ bool FastISel::SelectGetElementPtr(const User *I) {
// If this is a constant subscript, handle it quickly.
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
- if (CI->getZExtValue() == 0) continue;
+ if (CI->isZero()) continue;
uint64_t Offs =
TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT);
@@ -395,7 +458,7 @@ bool FastISel::SelectCall(const User *I) {
case Intrinsic::dbg_declare: {
const DbgDeclareInst *DI = cast<DbgDeclareInst>(I);
if (!DIVariable(DI->getVariable()).Verify() ||
- !MF.getMMI().hasDebugInfo())
+ !FuncInfo.MF->getMMI().hasDebugInfo())
return true;
const Value *Address = DI->getAddress();
@@ -409,11 +472,12 @@ bool FastISel::SelectCall(const User *I) {
// those are handled in SelectionDAGBuilder.
if (AI) {
DenseMap<const AllocaInst*, int>::iterator SI =
- StaticAllocaMap.find(AI);
- if (SI == StaticAllocaMap.end()) break; // VLAs.
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI == FuncInfo.StaticAllocaMap.end()) break; // VLAs.
int FI = SI->second;
if (!DI->getDebugLoc().isUnknown())
- MF.getMMI().setVariableDbgInfo(DI->getVariable(), FI, DI->getDebugLoc());
+ FuncInfo.MF->getMMI().setVariableDbgInfo(DI->getVariable(),
+ FI, DI->getDebugLoc());
} else
// Building the map above is target independent. Generating DBG_VALUE
// inline is target dependent; do this now.
@@ -428,23 +492,28 @@ bool FastISel::SelectCall(const User *I) {
if (!V) {
// Currently the optimizer can produce this; insert an undef to
// help debugging. Probably the optimizer should not do this.
- BuildMI(MBB, DL, II).addReg(0U).addImm(DI->getOffset()).
- addMetadata(DI->getVariable());
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(0U).addImm(DI->getOffset())
+ .addMetadata(DI->getVariable());
} else if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
- BuildMI(MBB, DL, II).addImm(CI->getZExtValue()).addImm(DI->getOffset()).
- addMetadata(DI->getVariable());
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addImm(CI->getZExtValue()).addImm(DI->getOffset())
+ .addMetadata(DI->getVariable());
} else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
- BuildMI(MBB, DL, II).addFPImm(CF).addImm(DI->getOffset()).
- addMetadata(DI->getVariable());
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addFPImm(CF).addImm(DI->getOffset())
+ .addMetadata(DI->getVariable());
} else if (unsigned Reg = lookUpRegForValue(V)) {
- BuildMI(MBB, DL, II).addReg(Reg, RegState::Debug).addImm(DI->getOffset()).
- addMetadata(DI->getVariable());
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Reg, RegState::Debug).addImm(DI->getOffset())
+ .addMetadata(DI->getVariable());
} else {
// We can't yet handle anything else here because it would require
// generating code, thus altering codegen because of debug info.
// Insert an undef so we can see what we dropped.
- BuildMI(MBB, DL, II).addReg(0U).addImm(DI->getOffset()).
- addMetadata(DI->getVariable());
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(0U).addImm(DI->getOffset())
+ .addMetadata(DI->getVariable());
}
return true;
}
@@ -453,14 +522,13 @@ bool FastISel::SelectCall(const User *I) {
switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) {
default: break;
case TargetLowering::Expand: {
- assert(MBB->isLandingPad() && "Call to eh.exception not in landing pad!");
+ assert(FuncInfo.MBB->isLandingPad() &&
+ "Call to eh.exception not in landing pad!");
unsigned Reg = TLI.getExceptionAddressRegister();
const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
unsigned ResultReg = createResultReg(RC);
- bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- Reg, RC, RC, DL);
- assert(InsertedCopy && "Can't copy address registers!");
- InsertedCopy = InsertedCopy;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(Reg);
UpdateValueMap(I, ResultReg);
return true;
}
@@ -472,25 +540,23 @@ bool FastISel::SelectCall(const User *I) {
switch (TLI.getOperationAction(ISD::EHSELECTION, VT)) {
default: break;
case TargetLowering::Expand: {
- if (MBB->isLandingPad())
- AddCatchInfo(*cast<CallInst>(I), &MF.getMMI(), MBB);
+ if (FuncInfo.MBB->isLandingPad())
+ AddCatchInfo(*cast<CallInst>(I), &FuncInfo.MF->getMMI(), FuncInfo.MBB);
else {
#ifndef NDEBUG
- CatchInfoLost.insert(cast<CallInst>(I));
+ FuncInfo.CatchInfoLost.insert(cast<CallInst>(I));
#endif
// FIXME: Mark exception selector register as live in. Hack for PR1508.
unsigned Reg = TLI.getExceptionSelectorRegister();
- if (Reg) MBB->addLiveIn(Reg);
+ if (Reg) FuncInfo.MBB->addLiveIn(Reg);
}
unsigned Reg = TLI.getExceptionSelectorRegister();
EVT SrcVT = TLI.getPointerTy();
const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT);
unsigned ResultReg = createResultReg(RC);
- bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, Reg,
- RC, RC, DL);
- assert(InsertedCopy && "Can't copy address registers!");
- InsertedCopy = InsertedCopy;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(Reg);
bool ResultRegIsKill = hasTrivialKill(I);
@@ -605,12 +671,12 @@ bool FastISel::SelectBitCast(const User *I) {
if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) {
TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT);
TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT);
- ResultReg = createResultReg(DstClass);
-
- bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- Op0, DstClass, SrcClass, DL);
- if (!InsertedCopy)
- ResultReg = 0;
+ // Don't attempt a cross-class copy. It will likely fail.
+ if (SrcClass == DstClass) {
+ ResultReg = createResultReg(DstClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(Op0);
+ }
}
// If the reg-reg copy failed, select a BIT_CONVERT opcode.
@@ -655,14 +721,15 @@ FastISel::SelectInstruction(const Instruction *I) {
/// unless it is the immediate (fall-through) successor, and update
/// the CFG.
void
-FastISel::FastEmitBranch(MachineBasicBlock *MSucc) {
- if (MBB->isLayoutSuccessor(MSucc)) {
+FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) {
+ if (FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
// The unconditional fall-through case, which needs no instructions.
} else {
// The unconditional branch case.
- TII.InsertBranch(*MBB, MSucc, NULL, SmallVector<MachineOperand, 0>());
+ TII.InsertBranch(*FuncInfo.MBB, MSucc, NULL,
+ SmallVector<MachineOperand, 0>(), DL);
}
- MBB->addSuccessor(MSucc);
+ FuncInfo.MBB->addSuccessor(MSucc);
}
/// SelectFNeg - Emit an FNeg operation.
@@ -712,8 +779,39 @@ FastISel::SelectFNeg(const User *I) {
}
bool
+FastISel::SelectLoad(const User *I) {
+ LoadInst *LI = const_cast<LoadInst *>(cast<LoadInst>(I));
+
+ // For a load from an alloca, make a limited effort to find the value
+ // already available in a register, avoiding redundant loads.
+ if (!LI->isVolatile() && isa<AllocaInst>(LI->getPointerOperand())) {
+ BasicBlock::iterator ScanFrom = LI;
+ if (const Value *V = FindAvailableLoadedValue(LI->getPointerOperand(),
+ LI->getParent(), ScanFrom)) {
+ if (!V->use_empty() &&
+ (!isa<Instruction>(V) ||
+ cast<Instruction>(V)->getParent() == LI->getParent() ||
+ (isa<AllocaInst>(V) &&
+ FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(V)))) &&
+ (!isa<Argument>(V) ||
+ LI->getParent() == &LI->getParent()->getParent()->getEntryBlock())) {
+ unsigned ResultReg = getRegForValue(V);
+ if (ResultReg != 0) {
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+bool
FastISel::SelectOperator(const User *I, unsigned Opcode) {
switch (Opcode) {
+ case Instruction::Load:
+ return SelectLoad(I);
case Instruction::Add:
return SelectBinaryOp(I, ISD::ADD);
case Instruction::FAdd:
@@ -762,8 +860,8 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) {
if (BI->isUnconditional()) {
const BasicBlock *LLVMSucc = BI->getSuccessor(0);
- MachineBasicBlock *MSucc = MBBMap[LLVMSucc];
- FastEmitBranch(MSucc);
+ MachineBasicBlock *MSucc = FuncInfo.MBBMap[LLVMSucc];
+ FastEmitBranch(MSucc, BI->getDebugLoc());
return true;
}
@@ -778,7 +876,7 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) {
case Instruction::Alloca:
// FunctionLowering has the static-sized case covered.
- if (StaticAllocaMap.count(cast<AllocaInst>(I)))
+ if (FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(I)))
return true;
// Dynamic-sized alloca is not handled yet.
@@ -824,32 +922,16 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) {
}
}
-FastISel::FastISel(MachineFunction &mf,
- DenseMap<const Value *, unsigned> &vm,
- DenseMap<const BasicBlock *, MachineBasicBlock *> &bm,
- DenseMap<const AllocaInst *, int> &am,
- std::vector<std::pair<MachineInstr*, unsigned> > &pn
-#ifndef NDEBUG
- , SmallSet<const Instruction *, 8> &cil
-#endif
- )
- : MBB(0),
- ValueMap(vm),
- MBBMap(bm),
- StaticAllocaMap(am),
- PHINodesToUpdate(pn),
-#ifndef NDEBUG
- CatchInfoLost(cil),
-#endif
- MF(mf),
- MRI(MF.getRegInfo()),
- MFI(*MF.getFrameInfo()),
- MCP(*MF.getConstantPool()),
- TM(MF.getTarget()),
+FastISel::FastISel(FunctionLoweringInfo &funcInfo)
+ : FuncInfo(funcInfo),
+ MRI(FuncInfo.MF->getRegInfo()),
+ MFI(*FuncInfo.MF->getFrameInfo()),
+ MCP(*FuncInfo.MF->getConstantPool()),
+ TM(FuncInfo.MF->getTarget()),
TD(*TM.getTargetData()),
TII(*TM.getInstrInfo()),
TLI(*TM.getTargetLowering()),
- IsBottomUp(false) {
+ TRI(*TM.getRegisterInfo()) {
}
FastISel::~FastISel() {}
@@ -978,7 +1060,7 @@ unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const TargetInstrDesc &II = TII.get(MachineInstOpcode);
- BuildMI(MBB, DL, II, ResultReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg);
return ResultReg;
}
@@ -989,13 +1071,13 @@ unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode,
const TargetInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
- BuildMI(MBB, DL, II, ResultReg).addReg(Op0, Op0IsKill * RegState::Kill);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill);
else {
- BuildMI(MBB, DL, II).addReg(Op0, Op0IsKill * RegState::Kill);
- bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- II.ImplicitDefs[0], RC, RC, DL);
- if (!InsertedCopy)
- ResultReg = 0;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
@@ -1009,17 +1091,15 @@ unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
const TargetInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
- BuildMI(MBB, DL, II, ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addReg(Op1, Op1IsKill * RegState::Kill);
else {
- BuildMI(MBB, DL, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addReg(Op1, Op1IsKill * RegState::Kill);
- bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- II.ImplicitDefs[0], RC, RC, DL);
- if (!InsertedCopy)
- ResultReg = 0;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
}
@@ -1032,17 +1112,15 @@ unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
const TargetInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
- BuildMI(MBB, DL, II, ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addImm(Imm);
else {
- BuildMI(MBB, DL, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addImm(Imm);
- bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- II.ImplicitDefs[0], RC, RC, DL);
- if (!InsertedCopy)
- ResultReg = 0;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
}
@@ -1055,17 +1133,15 @@ unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
const TargetInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
- BuildMI(MBB, DL, II, ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addFPImm(FPImm);
else {
- BuildMI(MBB, DL, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addFPImm(FPImm);
- bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- II.ImplicitDefs[0], RC, RC, DL);
- if (!InsertedCopy)
- ResultReg = 0;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
}
@@ -1079,19 +1155,17 @@ unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
const TargetInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
- BuildMI(MBB, DL, II, ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addReg(Op1, Op1IsKill * RegState::Kill)
.addImm(Imm);
else {
- BuildMI(MBB, DL, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addReg(Op1, Op1IsKill * RegState::Kill)
.addImm(Imm);
- bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- II.ImplicitDefs[0], RC, RC, DL);
- if (!InsertedCopy)
- ResultReg = 0;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
}
@@ -1103,13 +1177,11 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
const TargetInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
- BuildMI(MBB, DL, II, ResultReg).addImm(Imm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg).addImm(Imm);
else {
- BuildMI(MBB, DL, II).addImm(Imm);
- bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- II.ImplicitDefs[0], RC, RC, DL);
- if (!InsertedCopy)
- ResultReg = 0;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
}
@@ -1117,24 +1189,12 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT,
unsigned Op0, bool Op0IsKill,
uint32_t Idx) {
- const TargetRegisterClass* RC = MRI.getRegClass(Op0);
-
unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
- const TargetInstrDesc &II = TII.get(TargetOpcode::EXTRACT_SUBREG);
-
- if (II.getNumDefs() >= 1)
- BuildMI(MBB, DL, II, ResultReg)
- .addReg(Op0, Op0IsKill * RegState::Kill)
- .addImm(Idx);
- else {
- BuildMI(MBB, DL, II)
- .addReg(Op0, Op0IsKill * RegState::Kill)
- .addImm(Idx);
- bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- II.ImplicitDefs[0], RC, RC, DL);
- if (!InsertedCopy)
- ResultReg = 0;
- }
+ assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
+ "Cannot yet extract from physregs");
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill), Idx);
return ResultReg;
}
@@ -1154,14 +1214,14 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
const TerminatorInst *TI = LLVMBB->getTerminator();
SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
- unsigned OrigNumPHINodesToUpdate = PHINodesToUpdate.size();
+ unsigned OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size();
// Check successor nodes' PHI nodes that expect a constant to be available
// from this block.
for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
const BasicBlock *SuccBB = TI->getSuccessor(succ);
if (!isa<PHINode>(SuccBB->begin())) continue;
- MachineBasicBlock *SuccMBB = MBBMap[SuccBB];
+ MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
// If this terminator has multiple identical successors (common for
// switches), only handle each succ once.
@@ -1182,7 +1242,7 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
// by bailing out early, we may leave behind some dead instructions,
// since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its
// own moves. Second, this check is necessary becuase FastISel doesn't
- // use CreateRegForValue to create registers, so it always creates
+ // use CreateRegs to create registers, so it always creates
// exactly one register for each non-void instruction.
EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
@@ -1190,7 +1250,7 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
if (VT == MVT::i1)
VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT);
else {
- PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+ FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
return false;
}
}
@@ -1205,10 +1265,10 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
unsigned Reg = getRegForValue(PHIOp);
if (Reg == 0) {
- PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+ FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
return false;
}
- PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg));
+ FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg));
DL = DebugLoc();
}
}
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 65c36c1289db..928e1ecd4cf4 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -13,7 +13,7 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "function-lowering-info"
-#include "FunctionLoweringInfo.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/Instructions.h"
@@ -30,7 +30,6 @@
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetFrameInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/Debug.h"
@@ -47,9 +46,11 @@ static bool isUsedOutsideOfDefiningBlock(const Instruction *I) {
if (isa<PHINode>(I)) return true;
const BasicBlock *BB = I->getParent();
for (Value::const_use_iterator UI = I->use_begin(), E = I->use_end();
- UI != E; ++UI)
- if (cast<Instruction>(*UI)->getParent() != BB || isa<PHINode>(*UI))
+ UI != E; ++UI) {
+ const User *U = *UI;
+ if (cast<Instruction>(U)->getParent() != BB || isa<PHINode>(U))
return true;
+ }
return false;
}
@@ -64,9 +65,11 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool EnableFastISel) {
const BasicBlock *Entry = A->getParent()->begin();
for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end();
- UI != E; ++UI)
- if (cast<Instruction>(*UI)->getParent() != Entry || isa<SwitchInst>(*UI))
+ UI != E; ++UI) {
+ const User *U = *UI;
+ if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U))
return false; // Use not in entry block.
+ }
return true;
}
@@ -74,12 +77,18 @@ FunctionLoweringInfo::FunctionLoweringInfo(const TargetLowering &tli)
: TLI(tli) {
}
-void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
- bool EnableFastISel) {
+void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
Fn = &fn;
MF = &mf;
RegInfo = &MF->getRegInfo();
+ // Check whether the function can return without sret-demotion.
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(Fn->getReturnType(),
+ Fn->getAttributes().getRetAttributes(), Outs, TLI);
+ CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), Fn->isVarArg(),
+ Outs, Fn->getContext());
+
// Create a vreg for each argument register that is not dead and is used
// outside of the entry block for the function.
for (Function::const_arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end();
@@ -172,31 +181,33 @@ void FunctionLoweringInfo::clear() {
#endif
LiveOutRegInfo.clear();
ArgDbgValues.clear();
+ RegFixups.clear();
}
-unsigned FunctionLoweringInfo::MakeReg(EVT VT) {
+/// CreateReg - Allocate a single virtual register for the given type.
+unsigned FunctionLoweringInfo::CreateReg(EVT VT) {
return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT));
}
-/// CreateRegForValue - Allocate the appropriate number of virtual registers of
+/// CreateRegs - Allocate the appropriate number of virtual registers of
/// the correctly promoted or expanded types. Assign these registers
/// consecutive vreg numbers and return the first assigned number.
///
/// In the case that the given value has struct or array type, this function
/// will assign registers for each member or element.
///
-unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) {
+unsigned FunctionLoweringInfo::CreateRegs(const Type *Ty) {
SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(TLI, V->getType(), ValueVTs);
+ ComputeValueVTs(TLI, Ty, ValueVTs);
unsigned FirstReg = 0;
for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
EVT ValueVT = ValueVTs[Value];
- EVT RegisterVT = TLI.getRegisterType(V->getContext(), ValueVT);
+ EVT RegisterVT = TLI.getRegisterType(Ty->getContext(), ValueVT);
- unsigned NumRegs = TLI.getNumRegisters(V->getContext(), ValueVT);
+ unsigned NumRegs = TLI.getNumRegisters(Ty->getContext(), ValueVT);
for (unsigned i = 0; i != NumRegs; ++i) {
- unsigned R = MakeReg(RegisterVT);
+ unsigned R = CreateReg(RegisterVT);
if (!FirstReg) FirstReg = R;
}
}
@@ -208,7 +219,7 @@ unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) {
void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
MachineBasicBlock *MBB) {
// Inform the MachineModuleInfo of the personality for this landing pad.
- const ConstantExpr *CE = cast<ConstantExpr>(I.getOperand(2));
+ const ConstantExpr *CE = cast<ConstantExpr>(I.getArgOperand(1));
assert(CE->getOpcode() == Instruction::BitCast &&
isa<Function>(CE->getOperand(0)) &&
"Personality should be a function");
@@ -217,18 +228,18 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
// Gather all the type infos for this landing pad and pass them along to
// MachineModuleInfo.
std::vector<const GlobalVariable *> TyInfo;
- unsigned N = I.getNumOperands();
+ unsigned N = I.getNumArgOperands();
- for (unsigned i = N - 1; i > 2; --i) {
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(i))) {
+ for (unsigned i = N - 1; i > 1; --i) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(i))) {
unsigned FilterLength = CI->getZExtValue();
unsigned FirstCatch = i + FilterLength + !FilterLength;
- assert (FirstCatch <= N && "Invalid filter length");
+ assert(FirstCatch <= N && "Invalid filter length");
if (FirstCatch < N) {
TyInfo.reserve(N - FirstCatch);
for (unsigned j = FirstCatch; j < N; ++j)
- TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
+ TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j)));
MMI->addCatchTypeInfo(MBB, TyInfo);
TyInfo.clear();
}
@@ -240,7 +251,7 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
// Filter.
TyInfo.reserve(FilterLength - 1);
for (unsigned j = i + 1; j < FirstCatch; ++j)
- TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
+ TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j)));
MMI->addFilterTypeInfo(MBB, TyInfo);
TyInfo.clear();
}
@@ -249,10 +260,10 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
}
}
- if (N > 3) {
- TyInfo.reserve(N - 3);
- for (unsigned j = 3; j < N; ++j)
- TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
+ if (N > 2) {
+ TyInfo.reserve(N - 2);
+ for (unsigned j = 2; j < N; ++j)
+ TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j)));
MMI->addCatchTypeInfo(MBB, TyInfo);
}
}
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h
deleted file mode 100644
index 4067a5b33044..000000000000
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h
+++ /dev/null
@@ -1,144 +0,0 @@
-//===-- FunctionLoweringInfo.h - Lower functions from LLVM IR to CodeGen --===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This implements routines for translating functions from LLVM IR into
-// Machine IR.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef FUNCTIONLOWERINGINFO_H
-#define FUNCTIONLOWERINGINFO_H
-
-#include "llvm/InlineAsm.h"
-#include "llvm/Instructions.h"
-#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
-#ifndef NDEBUG
-#include "llvm/ADT/SmallSet.h"
-#endif
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/Support/CallSite.h"
-#include <vector>
-
-namespace llvm {
-
-class AllocaInst;
-class BasicBlock;
-class CallInst;
-class Function;
-class GlobalVariable;
-class Instruction;
-class MachineInstr;
-class MachineBasicBlock;
-class MachineFunction;
-class MachineModuleInfo;
-class MachineRegisterInfo;
-class TargetLowering;
-class Value;
-
-//===--------------------------------------------------------------------===//
-/// FunctionLoweringInfo - This contains information that is global to a
-/// function that is used when lowering a region of the function.
-///
-class FunctionLoweringInfo {
-public:
- const TargetLowering &TLI;
- const Function *Fn;
- MachineFunction *MF;
- MachineRegisterInfo *RegInfo;
-
- /// CanLowerReturn - true iff the function's return value can be lowered to
- /// registers.
- bool CanLowerReturn;
-
- /// DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg
- /// allocated to hold a pointer to the hidden sret parameter.
- unsigned DemoteRegister;
-
- /// MBBMap - A mapping from LLVM basic blocks to their machine code entry.
- DenseMap<const BasicBlock*, MachineBasicBlock *> MBBMap;
-
- /// ValueMap - Since we emit code for the function a basic block at a time,
- /// we must remember which virtual registers hold the values for
- /// cross-basic-block values.
- DenseMap<const Value*, unsigned> ValueMap;
-
- /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in
- /// the entry block. This allows the allocas to be efficiently referenced
- /// anywhere in the function.
- DenseMap<const AllocaInst*, int> StaticAllocaMap;
-
- /// ArgDbgValues - A list of DBG_VALUE instructions created during isel for
- /// function arguments that are inserted after scheduling is completed.
- SmallVector<MachineInstr*, 8> ArgDbgValues;
-
-#ifndef NDEBUG
- SmallSet<const Instruction *, 8> CatchInfoLost;
- SmallSet<const Instruction *, 8> CatchInfoFound;
-#endif
-
- struct LiveOutInfo {
- unsigned NumSignBits;
- APInt KnownOne, KnownZero;
- LiveOutInfo() : NumSignBits(0), KnownOne(1, 0), KnownZero(1, 0) {}
- };
-
- /// LiveOutRegInfo - Information about live out vregs, indexed by their
- /// register number offset by 'FirstVirtualRegister'.
- std::vector<LiveOutInfo> LiveOutRegInfo;
-
- /// PHINodesToUpdate - A list of phi instructions whose operand list will
- /// be updated after processing the current basic block.
- /// TODO: This isn't per-function state, it's per-basic-block state. But
- /// there's no other convenient place for it to live right now.
- std::vector<std::pair<MachineInstr*, unsigned> > PHINodesToUpdate;
-
- explicit FunctionLoweringInfo(const TargetLowering &TLI);
-
- /// set - Initialize this FunctionLoweringInfo with the given Function
- /// and its associated MachineFunction.
- ///
- void set(const Function &Fn, MachineFunction &MF, bool EnableFastISel);
-
- /// clear - Clear out all the function-specific state. This returns this
- /// FunctionLoweringInfo to an empty state, ready to be used for a
- /// different function.
- void clear();
-
- unsigned MakeReg(EVT VT);
-
- /// isExportedInst - Return true if the specified value is an instruction
- /// exported from its block.
- bool isExportedInst(const Value *V) {
- return ValueMap.count(V);
- }
-
- unsigned CreateRegForValue(const Value *V);
-
- unsigned InitializeRegForValue(const Value *V) {
- unsigned &R = ValueMap[V];
- assert(R == 0 && "Already initialized this value register!");
- return R = CreateRegForValue(V);
- }
-};
-
-/// AddCatchInfo - Extract the personality and type infos from an eh.selector
-/// call, and add them to the specified machine basic block.
-void AddCatchInfo(const CallInst &I,
- MachineModuleInfo *MMI, MachineBasicBlock *MBB);
-
-/// CopyCatchInfo - Copy catch information from DestBB to SrcBB.
-void CopyCatchInfo(const BasicBlock *SrcBB, const BasicBlock *DestBB,
- MachineModuleInfo *MMI, FunctionLoweringInfo &FLI);
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 16eb8a72adc8..61c2a90e7edc 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -123,7 +123,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
EVT VT = Node->getValueType(ResNo);
const TargetRegisterClass *SrcRC = 0, *DstRC = 0;
- SrcRC = TRI->getPhysicalRegisterRegClass(SrcReg, VT);
+ SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT);
// Figure out the register class to create for the destreg.
if (VRBase) {
@@ -142,11 +142,8 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
} else {
// Create the reg, emit the copy.
VRBase = MRI->createVirtualRegister(DstRC);
- bool Emitted = TII->copyRegToReg(*MBB, InsertPos, VRBase, SrcReg,
- DstRC, SrcRC, Node->getDebugLoc());
-
- assert(Emitted && "Unable to issue a copy instruction!\n");
- (void) Emitted;
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ VRBase).addReg(SrcReg);
}
SDValue Op(Node, ResNo);
@@ -246,7 +243,7 @@ unsigned InstrEmitter::getVR(SDValue Op,
const TargetRegisterClass *RC = TLI->getRegClassFor(Op.getValueType());
VReg = MRI->createVirtualRegister(RC);
}
- BuildMI(MBB, Op.getDebugLoc(),
+ BuildMI(*MBB, InsertPos, Op.getDebugLoc(),
TII->get(TargetOpcode::IMPLICIT_DEF), VReg);
return VReg;
}
@@ -288,10 +285,8 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
"Don't have operand info for this instruction!");
if (DstRC && SrcRC != DstRC && !SrcRC->hasSuperClass(DstRC)) {
unsigned NewVReg = MRI->createVirtualRegister(DstRC);
- bool Emitted = TII->copyRegToReg(*MBB, InsertPos, NewVReg, VReg,
- DstRC, SrcRC, Op.getNode()->getDebugLoc());
- assert(Emitted && "Unable to issue a copy instruction!\n");
- (void) Emitted;
+ BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg);
VReg = NewVReg;
}
}
@@ -428,12 +423,9 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
}
if (Opc == TargetOpcode::EXTRACT_SUBREG) {
+ // EXTRACT_SUBREG is lowered as %dst = COPY %src:sub
unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
- // Create the extract_subreg machine instruction.
- MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
- TII->get(TargetOpcode::EXTRACT_SUBREG));
-
// Figure out the register class to create for the destreg.
unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
const TargetRegisterClass *TRC = MRI->getRegClass(VReg);
@@ -450,11 +442,16 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
VRBase = MRI->createVirtualRegister(SRC);
}
- // Add def, source, and subreg index
- MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+ // Create the extract_subreg machine instruction.
+ MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), VRBase);
+
+ // Add source, and subreg index
AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap, /*IsDebug=*/false,
IsClone, IsCloned);
- MI->addOperand(MachineOperand::CreateImm(SubIdx));
+ assert(TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg()) &&
+ "Cannot yet extract from physregs");
+ MI->getOperand(1).setSubReg(SubIdx);
MBB->insert(InsertPos, MI);
} else if (Opc == TargetOpcode::INSERT_SUBREG ||
Opc == TargetOpcode::SUBREG_TO_REG) {
@@ -511,18 +508,13 @@ void
InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
DenseMap<SDValue, unsigned> &VRBaseMap) {
unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
- const TargetRegisterClass *SrcRC = MRI->getRegClass(VReg);
+ // Create the new VReg in the destination class and emit a copy.
unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
const TargetRegisterClass *DstRC = TRI->getRegClass(DstRCIdx);
-
- // Create the new VReg in the destination class and emit a copy.
unsigned NewVReg = MRI->createVirtualRegister(DstRC);
- bool Emitted = TII->copyRegToReg(*MBB, InsertPos, NewVReg, VReg,
- DstRC, SrcRC, Node->getDebugLoc());
- assert(Emitted &&
- "Unable to issue a copy instruction for a COPY_TO_REGCLASS node!\n");
- (void) Emitted;
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ NewVReg).addReg(VReg);
SDValue Op(Node, 0);
bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
@@ -604,9 +596,10 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
} else if (SD->getKind() == SDDbgValue::CONST) {
const Value *V = SD->getConst();
if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
- // FIXME: SDDbgValues aren't updated with legalization, so it's possible
- // to have i128 values in them at this point. As a crude workaround, just
- // drop the debug info if this happens.
+ // FIXME: SDDbgValue constants aren't updated with legalization, so it's
+ // possible to have i128 constants in them at this point. Dwarf writer
+ // does not handle i128 constants at the moment so, as a crude workaround,
+ // just drop the debug info if this happens.
if (!CI->getValue().isSignedIntN(64))
MIB.addReg(0U);
else
@@ -676,6 +669,33 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
// Create the new machine instruction.
MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II);
+
+ // The MachineInstr constructor adds implicit-def operands. Scan through
+ // these to determine which are dead.
+ if (MI->getNumOperands() != 0 &&
+ Node->getValueType(Node->getNumValues()-1) == MVT::Flag) {
+ // First, collect all used registers.
+ SmallVector<unsigned, 8> UsedRegs;
+ for (SDNode *F = Node->getFlaggedUser(); F; F = F->getFlaggedUser())
+ if (F->getOpcode() == ISD::CopyFromReg)
+ UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
+ else {
+ // Collect declared implicit uses.
+ const TargetInstrDesc &TID = TII->get(F->getMachineOpcode());
+ UsedRegs.append(TID.getImplicitUses(),
+ TID.getImplicitUses() + TID.getNumImplicitUses());
+ // In addition to declared implicit uses, we must also check for
+ // direct RegisterSDNode operands.
+ for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
+ if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
+ unsigned Reg = R->getReg();
+ if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg))
+ UsedRegs.push_back(Reg);
+ }
+ }
+ // Then mark unused registers as dead.
+ MI->setPhysRegsDeadExcept(UsedRegs, *TRI);
+ }
// Add result register values for things that are defined by this
// instruction.
@@ -696,16 +716,24 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
MI->setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(),
cast<MachineSDNode>(Node)->memoperands_end());
+ // Insert the instruction into position in the block. This needs to
+ // happen before any custom inserter hook is called so that the
+ // hook knows where in the block to insert the replacement code.
+ MBB->insert(InsertPos, MI);
+
if (II.usesCustomInsertionHook()) {
// Insert this instruction into the basic block using a target
// specific inserter which may returns a new basic block.
- MBB = TLI->EmitInstrWithCustomInserter(MI, MBB);
- InsertPos = MBB->end();
+ bool AtEnd = InsertPos == MBB->end();
+ MachineBasicBlock *NewMBB = TLI->EmitInstrWithCustomInserter(MI, MBB);
+ if (NewMBB != MBB) {
+ if (AtEnd)
+ InsertPos = NewMBB->end();
+ MBB = NewMBB;
+ }
return;
}
- MBB->insert(InsertPos, MI);
-
// Additional results must be an physical register def.
if (HasPhysRegOuts) {
for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {
@@ -761,24 +789,9 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
if (SrcReg == DestReg) // Coalesced away the copy? Ignore.
break;
-
- const TargetRegisterClass *SrcTRC = 0, *DstTRC = 0;
- // Get the register classes of the src/dst.
- if (TargetRegisterInfo::isVirtualRegister(SrcReg))
- SrcTRC = MRI->getRegClass(SrcReg);
- else
- SrcTRC = TRI->getPhysicalRegisterRegClass(SrcReg,SrcVal.getValueType());
- if (TargetRegisterInfo::isVirtualRegister(DestReg))
- DstTRC = MRI->getRegClass(DestReg);
- else
- DstTRC = TRI->getPhysicalRegisterRegClass(DestReg,
- Node->getOperand(1).getValueType());
-
- bool Emitted = TII->copyRegToReg(*MBB, InsertPos, DestReg, SrcReg,
- DstTRC, SrcTRC, Node->getDebugLoc());
- assert(Emitted && "Unable to issue a copy instruction!\n");
- (void) Emitted;
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ DestReg).addReg(SrcReg);
break;
}
case ISD::CopyFromReg: {
@@ -807,6 +820,12 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol();
MI->addOperand(MachineOperand::CreateES(AsmStr));
+ // Add the isAlignStack bit.
+ int64_t isAlignStack =
+ cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_IsAlignStack))->
+ getZExtValue();
+ MI->addOperand(MachineOperand::CreateImm(isAlignStack));
+
// Add all of the operand registers to the instruction.
for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
unsigned Flags =
@@ -821,14 +840,22 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
case InlineAsm::Kind_RegDef:
for (; NumVals; --NumVals, ++i) {
unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
- MI->addOperand(MachineOperand::CreateReg(Reg, true));
+ // FIXME: Add dead flags for physical and virtual registers defined.
+ // For now, mark physical register defs as implicit to help fast
+ // regalloc. This makes inline asm look a lot like calls.
+ MI->addOperand(MachineOperand::CreateReg(Reg, true,
+ /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg)));
}
break;
case InlineAsm::Kind_RegDefEarlyClobber:
for (; NumVals; --NumVals, ++i) {
unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
- MI->addOperand(MachineOperand::CreateReg(Reg, true, false, false,
- false, false, true));
+ MI->addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/ true,
+ /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg),
+ /*isKill=*/ false,
+ /*isDead=*/ false,
+ /*isUndef=*/false,
+ /*isEarlyClobber=*/ true));
}
break;
case InlineAsm::Kind_RegUse: // Use of register.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 62a37a5fd0ae..7a47da4ec52e 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -31,6 +31,7 @@
#include "llvm/LLVMContext.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/DenseMap.h"
@@ -133,7 +134,7 @@ private:
/// whose vector element type is narrower than the original shuffle type.
/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl,
- SDValue N1, SDValue N2,
+ SDValue N1, SDValue N2,
SmallVectorImpl<int> &Mask) const;
bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
@@ -143,6 +144,8 @@ private:
DebugLoc dl);
SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
+ std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
+ SDNode *Node, bool isSigned);
SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,
RTLIB::Libcall Call_PPCF128);
@@ -172,6 +175,8 @@ private:
SDValue ExpandExtractFromVectorThroughStack(SDValue Op);
SDValue ExpandVectorBuildThroughStack(SDNode* Node);
+ std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
+
void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
};
@@ -181,8 +186,8 @@ private:
/// performs the same shuffe in terms of order or result bytes, but on a type
/// whose vector element type is narrower than the original shuffle type.
/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
-SDValue
-SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl,
+SDValue
+SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl,
SDValue N1, SDValue N2,
SmallVectorImpl<int> &Mask) const {
unsigned NumMaskElts = VT.getVectorNumElements();
@@ -193,12 +198,12 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl,
if (NumEltsGrowth == 1)
return DAG.getVectorShuffle(NVT, dl, N1, N2, &Mask[0]);
-
+
SmallVector<int, 8> NewMask;
for (unsigned i = 0; i != NumMaskElts; ++i) {
int Idx = Mask[i];
for (unsigned j = 0; j != NumEltsGrowth; ++j) {
- if (Idx < 0)
+ if (Idx < 0)
NewMask.push_back(-1);
else
NewMask.push_back(Idx * NumEltsGrowth + j);
@@ -320,7 +325,8 @@ bool SelectionDAGLegalize::LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
bool OperandsLeadToDest = false;
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
OperandsLeadToDest |= // If an operand leads to Dest, so do we.
- LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest, NodesLeadingTo);
+ LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest,
+ NodesLeadingTo);
if (OperandsLeadToDest) {
NodesLeadingTo.insert(N);
@@ -357,7 +363,7 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
EVT SVT = VT;
while (SVT != MVT::f32) {
SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1);
- if (CFP->isValueValidForType(SVT, CFP->getValueAPF()) &&
+ if (ConstantFPSDNode::isValueValidForType(SVT, CFP->getValueAPF()) &&
// Only do this if the target has a native EXTLOAD instruction from
// smaller type.
TLI.isLoadExtLegal(ISD::EXTLOAD, SVT) &&
@@ -372,8 +378,8 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
if (Extend)
- return DAG.getExtLoad(ISD::EXTLOAD, dl,
- OrigVT, DAG.getEntryNode(),
+ return DAG.getExtLoad(ISD::EXTLOAD, OrigVT, dl,
+ DAG.getEntryNode(),
CPIdx, PseudoSourceValue::getConstantPool(),
0, VT, false, false, Alignment);
return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
@@ -450,7 +456,7 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
8 * (StoredBytes - Offset));
// Load from the stack slot.
- SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, RegVT, dl, Store, StackPtr,
NULL, 0, MemVT, false, false, 0);
Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
@@ -552,7 +558,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
// The last copy may be partial. Do an extending load.
EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
8 * (LoadedBytes - Offset));
- SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, RegVT, dl, Chain, Ptr,
LD->getSrcValue(), SVOffset + Offset,
MemVT, LD->isVolatile(),
LD->isNonTemporal(),
@@ -568,7 +574,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
Stores.size());
// Finally, perform the original load only redirected to the stack slot.
- Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
+ Load = DAG.getExtLoad(LD->getExtensionType(), VT, dl, TF, StackBase,
NULL, 0, LoadedVT, false, false, 0);
// Callers expect a MERGE_VALUES node.
@@ -597,23 +603,23 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
// Load the value in two parts
SDValue Lo, Hi;
if (TLI.isLittleEndian()) {
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(),
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, VT, dl, Chain, Ptr, LD->getSrcValue(),
SVOffset, NewLoadedVT, LD->isVolatile(),
LD->isNonTemporal(), Alignment);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, TLI.getPointerTy()));
- Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(),
+ Hi = DAG.getExtLoad(HiExtType, VT, dl, Chain, Ptr, LD->getSrcValue(),
SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(),
- LD->isNonTemporal(), MinAlign(Alignment, IncrementSize));
+ LD->isNonTemporal(), MinAlign(Alignment,IncrementSize));
} else {
- Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(),
+ Hi = DAG.getExtLoad(HiExtType, VT, dl, Chain, Ptr, LD->getSrcValue(),
SVOffset, NewLoadedVT, LD->isVolatile(),
LD->isNonTemporal(), Alignment);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, TLI.getPointerTy()));
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(),
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, VT, dl, Chain, Ptr, LD->getSrcValue(),
SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(),
- LD->isNonTemporal(), MinAlign(Alignment, IncrementSize));
+ LD->isNonTemporal(), MinAlign(Alignment,IncrementSize));
}
// aggregate the two parts
@@ -773,7 +779,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
"Unexpected illegal type!");
for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
- assert((isTypeLegal(Node->getOperand(i).getValueType()) ||
+ assert((isTypeLegal(Node->getOperand(i).getValueType()) ||
Node->getOperand(i).getOpcode() == ISD::TargetConstant) &&
"Unexpected illegal type!");
@@ -853,6 +859,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
case ISD::MERGE_VALUES:
case ISD::EH_RETURN:
case ISD::FRAME_TO_ARGS_OFFSET:
+ case ISD::EH_SJLJ_SETJMP:
+ case ISD::EH_SJLJ_LONGJMP:
// These operations lie about being legal: when they claim to be legal,
// they should actually be expanded.
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
@@ -925,8 +933,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
break;
}
- Result = DAG.UpdateNodeOperands(Result.getValue(0), Ops.data(),
- Ops.size());
+ Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), Ops.data(),
+ Ops.size()), 0);
switch (Action) {
case TargetLowering::Legal:
for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
@@ -1000,11 +1008,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
NodesLeadingTo);
}
- // Now that we legalized all of the inputs (which may have inserted
- // libcalls) create the new CALLSEQ_START node.
+ // Now that we have legalized all of the inputs (which may have inserted
+ // libcalls), create the new CALLSEQ_START node.
Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
- // Merge in the last call, to ensure that this call start after the last
+ // Merge in the last call to ensure that this call starts after the last
// call ended.
if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) {
Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
@@ -1016,7 +1024,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
if (Tmp1 != Node->getOperand(0)) {
SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
Ops[0] = Tmp1;
- Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+ Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), &Ops[0],
+ Ops.size()), Result.getResNo());
}
// Remember that the CALLSEQ_START is legalized.
@@ -1058,7 +1067,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
if (Tmp1 != Node->getOperand(0)) {
SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
Ops[0] = Tmp1;
- Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+ Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
+ &Ops[0], Ops.size()),
+ Result.getResNo());
}
} else {
Tmp2 = LegalizeOp(Node->getOperand(Node->getNumOperands()-1));
@@ -1067,7 +1078,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
Ops[0] = Tmp1;
Ops.back() = Tmp2;
- Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+ Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
+ &Ops[0], Ops.size()),
+ Result.getResNo());
}
}
assert(IsLegalizingCall && "Call sequence imbalance between start/end?");
@@ -1087,7 +1100,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
ISD::LoadExtType ExtType = LD->getExtensionType();
if (ExtType == ISD::NON_EXTLOAD) {
EVT VT = Node->getValueType(0);
- Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset());
+ Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
+ Tmp1, Tmp2, LD->getOffset()),
+ Result.getResNo());
Tmp3 = Result.getValue(0);
Tmp4 = Result.getValue(1);
@@ -1100,7 +1115,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
const Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
if (LD->getAlignment() < ABIAlignment){
- Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
+ Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
DAG, TLI);
Tmp3 = Result.getOperand(0);
Tmp4 = Result.getOperand(1);
@@ -1166,7 +1181,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
ISD::LoadExtType NewExtType =
ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
- Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
+ Result = DAG.getExtLoad(NewExtType, Node->getValueType(0), dl,
Tmp1, Tmp2, LD->getSrcValue(), SVOffset,
NVT, isVolatile, isNonTemporal, Alignment);
@@ -1202,8 +1217,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
if (TLI.isLittleEndian()) {
// EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
// Load the bottom RoundWidth bits.
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl,
- Node->getValueType(0), Tmp1, Tmp2,
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, Node->getValueType(0), dl,
+ Tmp1, Tmp2,
LD->getSrcValue(), SVOffset, RoundVT, isVolatile,
isNonTemporal, Alignment);
@@ -1211,13 +1226,13 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
IncrementSize = RoundWidth / 8;
Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
DAG.getIntPtrConstant(IncrementSize));
- Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
+ Hi = DAG.getExtLoad(ExtType, Node->getValueType(0), dl, Tmp1, Tmp2,
LD->getSrcValue(), SVOffset + IncrementSize,
ExtraVT, isVolatile, isNonTemporal,
MinAlign(Alignment, IncrementSize));
- // Build a factor node to remember that this load is independent of the
- // other one.
+ // Build a factor node to remember that this load is independent of
+ // the other one.
Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
Hi.getValue(1));
@@ -1231,7 +1246,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// Big endian - avoid unaligned loads.
// EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
// Load the top RoundWidth bits.
- Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
+ Hi = DAG.getExtLoad(ExtType, Node->getValueType(0), dl, Tmp1, Tmp2,
LD->getSrcValue(), SVOffset, RoundVT, isVolatile,
isNonTemporal, Alignment);
@@ -1239,14 +1254,14 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
IncrementSize = RoundWidth / 8;
Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
DAG.getIntPtrConstant(IncrementSize));
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl,
- Node->getValueType(0), Tmp1, Tmp2,
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD,
+ Node->getValueType(0), dl, Tmp1, Tmp2,
LD->getSrcValue(), SVOffset + IncrementSize,
ExtraVT, isVolatile, isNonTemporal,
MinAlign(Alignment, IncrementSize));
- // Build a factor node to remember that this load is independent of the
- // other one.
+ // Build a factor node to remember that this load is independent of
+ // the other one.
Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
Hi.getValue(1));
@@ -1267,7 +1282,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
isCustom = true;
// FALLTHROUGH
case TargetLowering::Legal:
- Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset());
+ Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
+ Tmp1, Tmp2, LD->getOffset()),
+ Result.getResNo());
Tmp1 = Result.getValue(0);
Tmp2 = Result.getValue(1);
@@ -1281,10 +1298,12 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// If this is an unaligned load and the target doesn't support it,
// expand it.
if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
- const Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
+ const Type *Ty =
+ LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment =
+ TLI.getTargetData()->getABITypeAlignment(Ty);
if (LD->getAlignment() < ABIAlignment){
- Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
+ Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
DAG, TLI);
Tmp1 = Result.getOperand(0);
Tmp2 = Result.getOperand(1);
@@ -1310,10 +1329,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Tmp2 = LegalizeOp(Load.getValue(1));
break;
}
- assert(ExtType != ISD::EXTLOAD &&"EXTLOAD should always be supported!");
+ assert(ExtType != ISD::EXTLOAD &&
+ "EXTLOAD should always be supported!");
// Turn the unsupported load into an EXTLOAD followed by an explicit
// zero/sign extend inreg.
- Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
+ Result = DAG.getExtLoad(ISD::EXTLOAD, Node->getValueType(0), dl,
Tmp1, Tmp2, LD->getSrcValue(),
LD->getSrcValueOffset(), SrcVT,
LD->isVolatile(), LD->isNonTemporal(),
@@ -1355,8 +1375,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
{
Tmp3 = LegalizeOp(ST->getValue());
- Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2,
- ST->getOffset());
+ Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
+ Tmp1, Tmp3, Tmp2,
+ ST->getOffset()),
+ Result.getResNo());
EVT VT = Tmp3.getValueType();
switch (TLI.getOperationAction(ISD::STORE, VT)) {
@@ -1366,7 +1388,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// expand it.
if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
+ unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
if (ST->getAlignment() < ABIAlignment)
Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()),
DAG, TLI);
@@ -1459,8 +1481,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
} else {
if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() ||
Tmp2 != ST->getBasePtr())
- Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2,
- ST->getOffset());
+ Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
+ Tmp1, Tmp3, Tmp2,
+ ST->getOffset()),
+ Result.getResNo());
switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
default: assert(0 && "This action is not supported yet!");
@@ -1469,7 +1493,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// expand it.
if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
+ unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
if (ST->getAlignment() < ABIAlignment)
Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()),
DAG, TLI);
@@ -1531,7 +1555,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0,
false, false, 0);
else
- return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
+ return DAG.getExtLoad(ISD::EXTLOAD, Op.getValueType(), dl, Ch, StackPtr,
NULL, 0, Vec.getValueType().getVectorElementType(),
false, false, 0);
}
@@ -1568,7 +1592,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
Node->getOperand(i), Idx, SV, Offset,
EltVT, false, false, 0));
} else
- Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl,
+ Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl,
Node->getOperand(i), Idx, SV, Offset,
false, false, 0));
}
@@ -1763,7 +1787,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
DestAlign);
assert(SlotSize < DestSize && "Unknown extension!");
- return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, SV, 0, SlotVT,
+ return DAG.getExtLoad(ISD::EXTLOAD, DestVT, dl, Store, FIPtr, SV, 0, SlotVT,
false, false, DestAlign);
}
@@ -1926,6 +1950,44 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
return CallInfo.first;
}
+// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to
+// ExpandLibCall except that the first operand is the in-chain.
+std::pair<SDValue, SDValue>
+SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
+ SDNode *Node,
+ bool isSigned) {
+ assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
+ SDValue InChain = Node->getOperand(0);
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = Node->getOperand(i).getValueType();
+ const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Node->getOperand(i);
+ Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ // Splice the libcall in wherever FindInputOutputChains tells us to.
+ const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+ std::pair<SDValue, SDValue> CallInfo =
+ TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+ 0, TLI.getLibcallCallingConv(LC), false,
+ /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, Node->getDebugLoc());
+
+ // Legalize the call sequence, starting with the chain. This will advance
+ // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
+ // was added by LowerCallTo (guaranteeing proper serialization of calls).
+ LegalizeOp(CallInfo.second);
+ return CallInfo;
+}
+
SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
RTLIB::Libcall Call_F32,
RTLIB::Libcall Call_F64,
@@ -2048,7 +2110,8 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84);
SDValue LoFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, LoOr);
SDValue HiFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, HiOr);
- SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt, TwoP84PlusTwoP52);
+ SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt,
+ TwoP84PlusTwoP52);
return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub);
}
@@ -2058,11 +2121,11 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) {
EVT SHVT = TLI.getShiftAmountTy();
- SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
+ SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
DAG.getConstant(UINT64_C(0xfffffffffffff800), MVT::i64));
SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And,
DAG.getConstant(UINT64_C(0x800), MVT::i64));
- SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
+ SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
DAG.getConstant(UINT64_C(0x7ff), MVT::i64));
SDValue Ne = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
And2, DAG.getConstant(UINT64_C(0), MVT::i64), ISD::SETNE);
@@ -2122,7 +2185,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
false, false, Alignment);
else {
FudgeInReg =
- LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
+ LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, DestVT, dl,
DAG.getEntryNode(), CPIdx,
PseudoSourceValue::getConstantPool(), 0,
MVT::f32, false, false, Alignment));
@@ -2332,6 +2395,92 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
}
}
+std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
+ unsigned Opc = Node->getOpcode();
+ MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
+ RTLIB::Libcall LC;
+
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unhandled atomic intrinsic Expand!");
+ break;
+ case ISD::ATOMIC_SWAP:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break;
+ }
+ break;
+ case ISD::ATOMIC_CMP_SWAP:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_ADD:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_ADD_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_SUB:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_SUB_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_AND:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_AND_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_OR:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_OR_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_XOR:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_XOR_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_NAND:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_NAND_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break;
+ }
+ break;
+ }
+
+ return ExpandChainLibCall(LC, Node, false);
+}
+
void SelectionDAGLegalize::ExpandNode(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
DebugLoc dl = Node->getDebugLoc();
@@ -2357,10 +2506,48 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
case ISD::EH_RETURN:
case ISD::EH_LABEL:
case ISD::PREFETCH:
- case ISD::MEMBARRIER:
case ISD::VAEND:
+ case ISD::EH_SJLJ_LONGJMP:
+ Results.push_back(Node->getOperand(0));
+ break;
+ case ISD::EH_SJLJ_SETJMP:
+ Results.push_back(DAG.getConstant(0, MVT::i32));
Results.push_back(Node->getOperand(0));
break;
+ case ISD::MEMBARRIER: {
+ // If the target didn't lower this, lower it to '__sync_synchronize()' call
+ TargetLowering::ArgListTy Args;
+ std::pair<SDValue, SDValue> CallResult =
+ TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
+ false, false, false, false, 0, CallingConv::C, false,
+ /*isReturnValueUsed=*/true,
+ DAG.getExternalSymbol("__sync_synchronize",
+ TLI.getPointerTy()),
+ Args, DAG, dl);
+ Results.push_back(CallResult.second);
+ break;
+ }
+ // By default, atomic intrinsics are marked Legal and lowered. Targets
+ // which don't support them directly, however, may want libcalls, in which
+ // case they mark them Expand, and we get here.
+ // FIXME: Unimplemented for now. Add libcalls.
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_CMP_SWAP: {
+ std::pair<SDValue, SDValue> Tmp = ExpandAtomic(Node);
+ Results.push_back(Tmp.first);
+ Results.push_back(Tmp.second);
+ break;
+ }
case ISD::DYNAMIC_STACKALLOC:
ExpandDYNAMIC_STACKALLOC(Node, Results);
break;
@@ -2465,15 +2652,31 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
EVT VT = Node->getValueType(0);
Tmp1 = Node->getOperand(0);
Tmp2 = Node->getOperand(1);
- SDValue VAList = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0,
- false, false, 0);
+ unsigned Align = Node->getConstantOperandVal(3);
+
+ SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0,
+ false, false, 0);
+ SDValue VAList = VAListLoad;
+
+ if (Align > TLI.getMinStackArgumentAlignment()) {
+ assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2");
+
+ VAList = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList,
+ DAG.getConstant(Align - 1,
+ TLI.getPointerTy()));
+
+ VAList = DAG.getNode(ISD::AND, dl, TLI.getPointerTy(), VAList,
+ DAG.getConstant(-Align,
+ TLI.getPointerTy()));
+ }
+
// Increment the pointer, VAList, to the next vaarg
Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList,
DAG.getConstant(TLI.getTargetData()->
getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),
TLI.getPointerTy()));
// Store the incremented VAList to the legalized pointer
- Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Tmp2, V, 0,
+ Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2, V, 0,
false, false, 0);
// Load the actual argument out of the pointer VAList
Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0,
@@ -2496,7 +2699,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
case ISD::EXTRACT_VECTOR_ELT:
if (Node->getOperand(0).getValueType().getVectorNumElements() == 1)
// This must be an access of the only element. Return it.
- Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, Node->getValueType(0),
+ Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, Node->getValueType(0),
Node->getOperand(0));
else
Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0));
@@ -2948,13 +3151,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
const TargetData &TD = *TLI.getTargetData();
unsigned EntrySize =
DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD);
-
+
Index = DAG.getNode(ISD::MUL, dl, PTy,
Index, DAG.getConstant(EntrySize, PTy));
SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
- SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr,
+ SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, PTy, dl, Chain, Addr,
PseudoSourceValue::getJumpTable(), 0, MemVT,
false, false, 0);
Addr = LD;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index e3eb949567a3..650ee5a0721c 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -453,8 +453,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
SDValue NewL;
if (L->getExtensionType() == ISD::NON_EXTLOAD) {
- NewL = DAG.getLoad(L->getAddressingMode(), dl, L->getExtensionType(),
- NVT, L->getChain(), L->getBasePtr(), L->getOffset(),
+ NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(),
+ NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(),
L->getSrcValue(), L->getSrcValueOffset(), NVT,
L->isVolatile(), L->isNonTemporal(), L->getAlignment());
// Legalized the chain result - switch anything that used the old chain to
@@ -464,8 +464,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
}
// Do a non-extending load followed by FP_EXTEND.
- NewL = DAG.getLoad(L->getAddressingMode(), dl, ISD::NON_EXTLOAD,
- L->getMemoryVT(), L->getChain(),
+ NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD,
+ L->getMemoryVT(), dl, L->getChain(),
L->getBasePtr(), L->getOffset(),
L->getSrcValue(), L->getSrcValueOffset(),
L->getMemoryVT(), L->isVolatile(),
@@ -504,7 +504,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
SDValue NewVAARG;
- NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2));
+ NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2),
+ N->getConstantOperandVal(3));
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
@@ -698,9 +699,10 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
}
// Update N to have the operands specified.
- return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
DAG.getCondCode(CCCode), NewLHS, NewRHS,
- N->getOperand(4));
+ N->getOperand(4)),
+ 0);
}
SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) {
@@ -739,9 +741,10 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
}
// Update N to have the operands specified.
- return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
N->getOperand(2), N->getOperand(3),
- DAG.getCondCode(CCCode));
+ DAG.getCondCode(CCCode)),
+ 0);
}
SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) {
@@ -757,8 +760,9 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) {
}
// Otherwise, update N to have the operands specified.
- return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,
- DAG.getCondCode(CCCode));
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ DAG.getCondCode(CCCode)),
+ 0);
}
SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
@@ -1106,7 +1110,7 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,
assert(NVT.isByteSized() && "Expanded type not byte sized!");
assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?");
- Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr,
+ Hi = DAG.getExtLoad(LD->getExtensionType(), NVT, dl, Chain, Ptr,
LD->getSrcValue(), LD->getSrcValueOffset(),
LD->getMemoryVT(), LD->isVolatile(),
LD->isNonTemporal(), LD->getAlignment());
@@ -1294,9 +1298,9 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) {
}
// Update N to have the operands specified.
- return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
DAG.getCondCode(CCCode), NewLHS, NewRHS,
- N->getOperand(4));
+ N->getOperand(4)), 0);
}
SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) {
@@ -1375,9 +1379,9 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) {
}
// Update N to have the operands specified.
- return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
N->getOperand(2), N->getOperand(3),
- DAG.getCondCode(CCCode));
+ DAG.getCondCode(CCCode)), 0);
}
SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) {
@@ -1393,8 +1397,8 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) {
}
// Otherwise, update N to have the operands specified.
- return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,
- DAG.getCondCode(CCCode));
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ DAG.getCondCode(CCCode)), 0);
}
SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 8b382bc7670d..b94ea9a3a9af 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -369,7 +369,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
ISD::LoadExtType ExtType =
ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType();
DebugLoc dl = N->getDebugLoc();
- SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(),
+ SDValue Res = DAG.getExtLoad(ExtType, NVT, dl, N->getChain(), N->getBasePtr(),
N->getSrcValue(), N->getSrcValueOffset(),
N->getMemoryVT(), N->isVolatile(),
N->isNonTemporal(), N->getAlignment());
@@ -572,7 +572,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
SmallVector<SDValue, 8> Parts(NumRegs);
for (unsigned i = 0; i < NumRegs; ++i) {
- Parts[i] = DAG.getVAArg(RegVT, dl, Chain, Ptr, N->getOperand(2));
+ Parts[i] = DAG.getVAArg(RegVT, dl, Chain, Ptr, N->getOperand(2),
+ N->getConstantOperandVal(3));
Chain = Parts[i].getValue(1);
}
@@ -725,8 +726,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo) {
// The chain (Op#0), CC (#1) and basic block destination (Op#4) are always
// legal types.
- return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
- N->getOperand(1), LHS, RHS, N->getOperand(4));
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ N->getOperand(1), LHS, RHS, N->getOperand(4)),
+ 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) {
@@ -737,8 +739,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) {
SDValue Cond = PromoteTargetBoolean(N->getOperand(1), SVT);
// The chain (Op#0) and basic block destination (Op#2) are always legal types.
- return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), Cond,
- N->getOperand(2));
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Cond,
+ N->getOperand(2)), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) {
@@ -773,7 +775,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) {
for (unsigned i = 0; i < NumElts; ++i)
NewOps.push_back(GetPromotedInteger(N->getOperand(i)));
- return DAG.UpdateNodeOperands(SDValue(N, 0), &NewOps[0], NumElts);
+ return SDValue(DAG.UpdateNodeOperands(N, &NewOps[0], NumElts), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_RNDSAT(SDNode *N) {
@@ -798,17 +800,18 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N,
assert(N->getOperand(1).getValueType().getSizeInBits() >=
N->getValueType(0).getVectorElementType().getSizeInBits() &&
"Type of inserted value narrower than vector element type!");
- return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
GetPromotedInteger(N->getOperand(1)),
- N->getOperand(2));
+ N->getOperand(2)),
+ 0);
}
assert(OpNo == 2 && "Different operand and result vector types?");
// Promote the index.
SDValue Idx = ZExtPromotedInteger(N->getOperand(2));
- return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
- N->getOperand(1), Idx);
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ N->getOperand(1), Idx), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_MEMBARRIER(SDNode *N) {
@@ -819,15 +822,14 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MEMBARRIER(SDNode *N) {
SDValue Flag = GetPromotedInteger(N->getOperand(i));
NewOps[i] = DAG.getZeroExtendInReg(Flag, dl, MVT::i1);
}
- return DAG.UpdateNodeOperands(SDValue (N, 0), NewOps,
- array_lengthof(NewOps));
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps, array_lengthof(NewOps)), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) {
// Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote
// the operand in place.
- return DAG.UpdateNodeOperands(SDValue(N, 0),
- GetPromotedInteger(N->getOperand(0)));
+ return SDValue(DAG.UpdateNodeOperands(N,
+ GetPromotedInteger(N->getOperand(0))), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
@@ -837,8 +839,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
EVT SVT = TLI.getSetCCResultType(N->getOperand(1).getValueType());
SDValue Cond = PromoteTargetBoolean(N->getOperand(0), SVT);
- return DAG.UpdateNodeOperands(SDValue(N, 0), Cond,
- N->getOperand(1), N->getOperand(2));
+ return SDValue(DAG.UpdateNodeOperands(N, Cond,
+ N->getOperand(1), N->getOperand(2)), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) {
@@ -849,8 +851,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) {
PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(4))->get());
// The CC (#4) and the possible return values (#2 and #3) have legal types.
- return DAG.UpdateNodeOperands(SDValue(N, 0), LHS, RHS, N->getOperand(2),
- N->getOperand(3), N->getOperand(4));
+ return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2),
+ N->getOperand(3), N->getOperand(4)), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) {
@@ -861,12 +863,12 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) {
PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(2))->get());
// The CC (#2) is always legal.
- return DAG.UpdateNodeOperands(SDValue(N, 0), LHS, RHS, N->getOperand(2));
+ return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2)), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) {
- return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
- ZExtPromotedInteger(N->getOperand(1)));
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ ZExtPromotedInteger(N->getOperand(1))), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) {
@@ -878,8 +880,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) {
- return DAG.UpdateNodeOperands(SDValue(N, 0),
- SExtPromotedInteger(N->getOperand(0)));
+ return SDValue(DAG.UpdateNodeOperands(N,
+ SExtPromotedInteger(N->getOperand(0))), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
@@ -905,8 +907,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) {
- return DAG.UpdateNodeOperands(SDValue(N, 0),
- ZExtPromotedInteger(N->getOperand(0)));
+ return SDValue(DAG.UpdateNodeOperands(N,
+ ZExtPromotedInteger(N->getOperand(0))), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
@@ -990,6 +992,11 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::SHL:
case ISD::SRA:
case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break;
+
+ case ISD::SADDO:
+ case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break;
+ case ISD::UADDO:
+ case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break;
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -1526,7 +1533,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
if (N->getMemoryVT().bitsLE(NVT)) {
EVT MemVT = N->getMemoryVT();
- Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset,
+ Lo = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset,
MemVT, isVolatile, isNonTemporal, Alignment);
// Remember the chain.
@@ -1559,7 +1566,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
unsigned IncrementSize = NVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
- Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(),
+ Hi = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(),
SVOffset+IncrementSize, NEVT,
isVolatile, isNonTemporal,
MinAlign(Alignment, IncrementSize));
@@ -1577,7 +1584,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
unsigned ExcessBits = (EBytes - IncrementSize)*8;
// Load both the high bits and maybe some of the low bits.
- Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset,
+ Hi = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset,
EVT::getIntegerVT(*DAG.getContext(),
MemVT.getSizeInBits() - ExcessBits),
isVolatile, isNonTemporal, Alignment);
@@ -1586,7 +1593,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
// Load the rest of the low bits.
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getSrcValue(),
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, NVT, dl, Ch, Ptr, N->getSrcValue(),
SVOffset+IncrementSize,
EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
isVolatile, isNonTemporal,
@@ -1716,6 +1723,48 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
SplitInteger(MakeLibCall(LC, VT, Ops, 2, true/*irrelevant*/, dl), Lo, Hi);
}
+void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ DebugLoc dl = Node->getDebugLoc();
+
+ // Expand the result by simply replacing it with the equivalent
+ // non-overflow-checking operation.
+ SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
+ ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+ LHS, RHS);
+ SplitInteger(Sum, Lo, Hi);
+
+ // Compute the overflow.
+ //
+ // LHSSign -> LHS >= 0
+ // RHSSign -> RHS >= 0
+ // SumSign -> Sum >= 0
+ //
+ // Add:
+ // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
+ // Sub:
+ // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
+ //
+ EVT OType = Node->getValueType(1);
+ SDValue Zero = DAG.getConstant(0, LHS.getValueType());
+
+ SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
+ SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
+ SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
+ Node->getOpcode() == ISD::SADDO ?
+ ISD::SETEQ : ISD::SETNE);
+
+ SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE);
+ SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
+
+ SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(Node, 1), Cmp);
+}
+
void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT VT = N->getValueType(0);
@@ -1912,6 +1961,29 @@ void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi);
}
+void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ DebugLoc dl = N->getDebugLoc();
+
+ // Expand the result by simply replacing it with the equivalent
+ // non-overflow-checking operation.
+ SDValue Sum = DAG.getNode(N->getOpcode() == ISD::UADDO ?
+ ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+ LHS, RHS);
+ SplitInteger(Sum, Lo, Hi);
+
+ // Calculate the overflow: addition overflows iff a + b < a, and subtraction
+ // overflows iff a - b > a.
+ SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS,
+ N->getOpcode () == ISD::UADDO ?
+ ISD::SETULT : ISD::SETUGT);
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(N, 1), Ofl);
+}
+
void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT VT = N->getValueType(0);
@@ -2154,9 +2226,9 @@ SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) {
}
// Update N to have the operands specified.
- return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
DAG.getCondCode(CCCode), NewLHS, NewRHS,
- N->getOperand(4));
+ N->getOperand(4)), 0);
}
SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) {
@@ -2172,9 +2244,9 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) {
}
// Update N to have the operands specified.
- return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
N->getOperand(2), N->getOperand(3),
- DAG.getCondCode(CCCode));
+ DAG.getCondCode(CCCode)), 0);
}
SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) {
@@ -2190,8 +2262,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) {
}
// Otherwise, update N to have the operands specified.
- return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,
- DAG.getCondCode(CCCode));
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ DAG.getCondCode(CCCode)), 0);
}
SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) {
@@ -2200,7 +2272,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) {
// upper half of the shift amount is zero. Just use the lower half.
SDValue Lo, Hi;
GetExpandedInteger(N->getOperand(1), Lo, Hi);
- return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), Lo);
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Lo), 0);
}
SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) {
@@ -2209,7 +2281,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) {
// constant to valid type.
SDValue Lo, Hi;
GetExpandedInteger(N->getOperand(0), Lo, Hi);
- return DAG.UpdateNodeOperands(SDValue(N, 0), Lo);
+ return SDValue(DAG.UpdateNodeOperands(N, Lo), 0);
}
SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
@@ -2384,7 +2456,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
// Load the value out, extending it from f32 to the destination float type.
// FIXME: Avoid the extend by constructing the right constant pool?
- SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(),
+ SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, DstVT, dl, DAG.getEntryNode(),
FudgePtr, NULL, 0, MVT::f32,
false, false, Alignment);
return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 17f131b21e4a..6e56c98e9b56 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -485,15 +485,14 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) {
NewOps.push_back(Op);
} else if (Op != OrigOp) {
// This is the first operand to change - add all operands so far.
- NewOps.insert(NewOps.end(), N->op_begin(), N->op_begin() + i);
+ NewOps.append(N->op_begin(), N->op_begin() + i);
NewOps.push_back(Op);
}
}
// Some operands changed - update the node.
if (!NewOps.empty()) {
- SDNode *M = DAG.UpdateNodeOperands(SDValue(N, 0), &NewOps[0],
- NewOps.size()).getNode();
+ SDNode *M = DAG.UpdateNodeOperands(N, &NewOps[0], NewOps.size());
if (M != N) {
// The node morphed into a different node. Normally for this to happen
// the original node would have to be marked NewNode. However this can
@@ -684,40 +683,45 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
// can potentially cause recursive merging.
SmallSetVector<SDNode*, 16> NodesToAnalyze;
NodeUpdateListener NUL(*this, NodesToAnalyze);
- DAG.ReplaceAllUsesOfValueWith(From, To, &NUL);
-
- // The old node may still be present in a map like ExpandedIntegers or
- // PromotedIntegers. Inform maps about the replacement.
- ReplacedValues[From] = To;
-
- // Process the list of nodes that need to be reanalyzed.
- while (!NodesToAnalyze.empty()) {
- SDNode *N = NodesToAnalyze.back();
- NodesToAnalyze.pop_back();
- if (N->getNodeId() != DAGTypeLegalizer::NewNode)
- // The node was analyzed while reanalyzing an earlier node - it is safe to
- // skip. Note that this is not a morphing node - otherwise it would still
- // be marked NewNode.
- continue;
+ do {
+ DAG.ReplaceAllUsesOfValueWith(From, To, &NUL);
+
+ // The old node may still be present in a map like ExpandedIntegers or
+ // PromotedIntegers. Inform maps about the replacement.
+ ReplacedValues[From] = To;
+
+ // Process the list of nodes that need to be reanalyzed.
+ while (!NodesToAnalyze.empty()) {
+ SDNode *N = NodesToAnalyze.back();
+ NodesToAnalyze.pop_back();
+ if (N->getNodeId() != DAGTypeLegalizer::NewNode)
+ // The node was analyzed while reanalyzing an earlier node - it is safe
+ // to skip. Note that this is not a morphing node - otherwise it would
+ // still be marked NewNode.
+ continue;
- // Analyze the node's operands and recalculate the node ID.
- SDNode *M = AnalyzeNewNode(N);
- if (M != N) {
- // The node morphed into a different node. Make everyone use the new node
- // instead.
- assert(M->getNodeId() != NewNode && "Analysis resulted in NewNode!");
- assert(N->getNumValues() == M->getNumValues() &&
- "Node morphing changed the number of results!");
- for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
- SDValue OldVal(N, i);
- SDValue NewVal(M, i);
- if (M->getNodeId() == Processed)
- RemapValue(NewVal);
- DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL);
+ // Analyze the node's operands and recalculate the node ID.
+ SDNode *M = AnalyzeNewNode(N);
+ if (M != N) {
+ // The node morphed into a different node. Make everyone use the new
+ // node instead.
+ assert(M->getNodeId() != NewNode && "Analysis resulted in NewNode!");
+ assert(N->getNumValues() == M->getNumValues() &&
+ "Node morphing changed the number of results!");
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ SDValue OldVal(N, i);
+ SDValue NewVal(M, i);
+ if (M->getNodeId() == Processed)
+ RemapValue(NewVal);
+ DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL);
+ }
+ // The original node continues to exist in the DAG, marked NewNode.
}
- // The original node continues to exist in the DAG, marked NewNode.
}
- }
+ // When recursively update nodes with new nodes, it is possible to have
+ // new uses of From due to CSE. If this happens, replace the new uses of
+ // From with To.
+ } while (!From.use_empty());
}
void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index c6659630ede2..bd86694446d6 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -345,6 +345,9 @@ private:
void ExpandIntRes_UREM (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_Shift (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
+
void ExpandShiftByConstant(SDNode *N, unsigned Amt,
SDValue &Lo, SDValue &Hi);
bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -620,6 +623,7 @@ private:
SDValue WidenVecRes_Binary(SDNode *N);
SDValue WidenVecRes_Convert(SDNode *N);
+ SDValue WidenVecRes_POWI(SDNode *N);
SDValue WidenVecRes_Shift(SDNode *N);
SDValue WidenVecRes_Unary(SDNode *N);
SDValue WidenVecRes_InregOp(SDNode *N);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 88e1e624ae32..9c2b1d9ed73d 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -238,13 +238,15 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
}
void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
SDValue Chain = N->getOperand(0);
SDValue Ptr = N->getOperand(1);
DebugLoc dl = N->getDebugLoc();
+ const unsigned Align = N->getConstantOperandVal(3);
- Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2));
- Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2));
+ Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), Align);
+ Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2), 0);
// Handle endianness of the load.
if (TLI.isBigEndian())
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 0e2bd0233712..621c08724210 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -116,7 +116,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
Ops.push_back(LegalizeOp(Node->getOperand(i)));
SDValue Result =
- DAG.UpdateNodeOperands(Op.getValue(0), Ops.data(), Ops.size());
+ SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops.data(), Ops.size()), 0);
bool HasVectorValue = false;
for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 7efeea1ddaf9..93aeff5c1e6c 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -165,9 +165,10 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
assert(N->isUnindexed() && "Indexed vector load?");
- SDValue Result = DAG.getLoad(ISD::UNINDEXED, N->getDebugLoc(),
+ SDValue Result = DAG.getLoad(ISD::UNINDEXED,
N->getExtensionType(),
N->getValueType(0).getVectorElementType(),
+ N->getDebugLoc(),
N->getChain(), N->getBasePtr(),
DAG.getUNDEF(N->getBasePtr().getValueType()),
N->getSrcValue(), N->getSrcValueOffset(),
@@ -448,6 +449,11 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FLOG:
+ case ISD::FLOG2:
+ case ISD::FLOG10:
SplitVecRes_UnaryOp(N, Lo, Hi);
break;
@@ -755,14 +761,14 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
EVT LoMemVT, HiMemVT;
GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
- Lo = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, LoVT, Ch, Ptr, Offset,
+ Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
SV, SVOffset, LoMemVT, isVolatile, isNonTemporal, Alignment);
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
SVOffset += IncrementSize;
- Hi = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, HiVT, Ch, Ptr, Offset,
+ Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
SV, SVOffset, HiMemVT, isVolatile, isNonTemporal, Alignment);
// Build a factor node to remember that this load is independent of the
@@ -1082,10 +1088,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
uint64_t LoElts = Lo.getValueType().getVectorNumElements();
if (IdxVal < LoElts)
- return DAG.UpdateNodeOperands(SDValue(N, 0), Lo, Idx);
- return DAG.UpdateNodeOperands(SDValue(N, 0), Hi,
+ return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0);
+ return SDValue(DAG.UpdateNodeOperands(N, Hi,
DAG.getConstant(IdxVal - LoElts,
- Idx.getValueType()));
+ Idx.getValueType())),
+ 0);
}
// Store the vector to the stack.
@@ -1099,7 +1106,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
// Load back the required element.
StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
- return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
+ return DAG.getExtLoad(ISD::EXTLOAD, N->getValueType(0), dl, Store, StackPtr,
SV, 0, EltVT, false, false, 0);
}
@@ -1199,7 +1206,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FDIV:
case ISD::FMUL:
case ISD::FPOW:
- case ISD::FPOWI:
case ISD::FREM:
case ISD::FSUB:
case ISD::MUL:
@@ -1215,6 +1221,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_Binary(N);
break;
+ case ISD::FPOWI:
+ Res = WidenVecRes_POWI(N);
+ break;
+
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
@@ -1241,6 +1251,11 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FNEG:
case ISD::FSIN:
case ISD::FSQRT:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FLOG:
+ case ISD::FLOG2:
+ case ISD::FLOG10:
Res = WidenVecRes_Unary(N);
break;
}
@@ -1258,7 +1273,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
EVT WidenEltVT = WidenVT.getVectorElementType();
EVT VT = WidenVT;
unsigned NumElts = VT.getVectorNumElements();
- while (!TLI.isTypeLegal(VT) && NumElts != 1) {
+ while (!TLI.isTypeSynthesizable(VT) && NumElts != 1) {
NumElts = NumElts / 2;
VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
}
@@ -1273,13 +1288,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
} else {
// Since the operation can trap, apply operation on the original vector.
+ EVT MaxVT = VT;
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
unsigned CurNumElts = N->getValueType(0).getVectorNumElements();
SmallVector<SDValue, 16> ConcatOps(CurNumElts);
unsigned ConcatEnd = 0; // Current ConcatOps index.
- unsigned Idx = 0; // Current Idx into input vectors.
+ int Idx = 0; // Current Idx into input vectors.
+
+ // NumElts := greatest synthesizable vector size (at most WidenVT)
+ // while (orig. vector has unhandled elements) {
+ // take munches of size NumElts from the beginning and add to ConcatOps
+ // NumElts := next smaller supported vector size or 1
+ // }
while (CurNumElts != 0) {
while (CurNumElts >= NumElts) {
SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
@@ -1290,26 +1312,21 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
Idx += NumElts;
CurNumElts -= NumElts;
}
- EVT PrevVecVT = VT;
do {
NumElts = NumElts / 2;
VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
- } while (!TLI.isTypeLegal(VT) && NumElts != 1);
+ } while (!TLI.isTypeSynthesizable(VT) && NumElts != 1);
if (NumElts == 1) {
- // Since we are using concat vector, build a vector from the scalar ops.
- SDValue VecOp = DAG.getUNDEF(PrevVecVT);
for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
InOp1, DAG.getIntPtrConstant(Idx));
SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
InOp2, DAG.getIntPtrConstant(Idx));
- VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, PrevVecVT, VecOp,
- DAG.getNode(Opcode, dl, WidenEltVT, EOp1, EOp2),
- DAG.getIntPtrConstant(i));
+ ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
+ EOp1, EOp2);
}
CurNumElts = 0;
- ConcatOps[ConcatEnd++] = VecOp;
}
}
@@ -1320,23 +1337,65 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
return ConcatOps[0];
}
- // Rebuild vector to one with the widen type
- Idx = ConcatEnd - 1;
- while (Idx != 0) {
+ // while (Some element of ConcatOps is not of type MaxVT) {
+ // From the end of ConcatOps, collect elements of the same type and put
+ // them into an op of the next larger supported type
+ // }
+ while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) {
+ Idx = ConcatEnd - 1;
VT = ConcatOps[Idx--].getValueType();
- while (Idx != 0 && ConcatOps[Idx].getValueType() == VT)
- --Idx;
- if (Idx != 0) {
- VT = ConcatOps[Idx].getValueType();
- ConcatOps[Idx+1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
- &ConcatOps[Idx+1], ConcatEnd - Idx - 1);
+ while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT)
+ Idx--;
+
+ int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1;
+ EVT NextVT;
+ do {
+ NextSize *= 2;
+ NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize);
+ } while (!TLI.isTypeSynthesizable(NextVT));
+
+ if (!VT.isVector()) {
+ // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT
+ SDValue VecOp = DAG.getUNDEF(NextVT);
+ unsigned NumToInsert = ConcatEnd - Idx - 1;
+ for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) {
+ VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp,
+ ConcatOps[OpIdx], DAG.getIntPtrConstant(i));
+ }
+ ConcatOps[Idx+1] = VecOp;
ConcatEnd = Idx + 2;
+ }
+ else {
+ // Vector type, create a CONCAT_VECTORS of type NextVT
+ SDValue undefVec = DAG.getUNDEF(VT);
+ unsigned OpsToConcat = NextSize/VT.getVectorNumElements();
+ SmallVector<SDValue, 16> SubConcatOps(OpsToConcat);
+ unsigned RealVals = ConcatEnd - Idx - 1;
+ unsigned SubConcatEnd = 0;
+ unsigned SubConcatIdx = Idx + 1;
+ while (SubConcatEnd < RealVals)
+ SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx];
+ while (SubConcatEnd < OpsToConcat)
+ SubConcatOps[SubConcatEnd++] = undefVec;
+ ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl,
+ NextVT, &SubConcatOps[0],
+ OpsToConcat);
+ ConcatEnd = SubConcatIdx + 1;
}
}
+
+ // Check to see if we have a single operation with the widen type.
+ if (ConcatEnd == 1) {
+ VT = ConcatOps[0].getValueType();
+ if (VT == WidenVT)
+ return ConcatOps[0];
+ }
- unsigned NumOps = WidenVT.getVectorNumElements()/VT.getVectorNumElements();
+ // add undefs of size MaxVT until ConcatOps grows to length of WidenVT
+ unsigned NumOps =
+ WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements();
if (NumOps != ConcatEnd ) {
- SDValue UndefVal = DAG.getUNDEF(VT);
+ SDValue UndefVal = DAG.getUNDEF(MaxVT);
for (unsigned j = ConcatEnd; j < NumOps; ++j)
ConcatOps[j] = UndefVal;
}
@@ -1366,7 +1425,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
return DAG.getNode(Opcode, dl, WidenVT, InOp);
}
- if (TLI.isTypeLegal(InWidenVT)) {
+ if (TLI.isTypeSynthesizable(InWidenVT)) {
// Because the result and the input are different vector types, widening
// the result could create a legal type but widening the input might make
// it an illegal type that might lead to repeatedly splitting the input
@@ -1410,6 +1469,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
}
+SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ SDValue ShOp = N->getOperand(1);
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp, ShOp);
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp = GetWidenedVector(N->getOperand(0));
@@ -1501,7 +1567,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts);
}
- if (TLI.isTypeLegal(NewInVT)) {
+ if (TLI.isTypeSynthesizable(NewInVT)) {
// Because the result and the input are different vector types, widening
// the result could create a legal type but widening the input might make
// it an illegal type that might lead to repeatedly splitting the input
@@ -1642,7 +1708,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
SatOp, CvtCode);
}
- if (TLI.isTypeLegal(InWidenVT)) {
+ if (TLI.isTypeSynthesizable(InWidenVT)) {
// Because the result and the input are different vector types, widening
// the result could create a legal type but widening the input might make
// it an illegal type that might lead to repeatedly splitting the input
@@ -1968,7 +2034,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) {
if (InWidenSize % Size == 0 && !VT.isVector()) {
unsigned NewNumElts = InWidenSize / Size;
EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts);
- if (TLI.isTypeLegal(NewVT)) {
+ if (TLI.isTypeSynthesizable(NewVT)) {
SDValue BitOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, InOp);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
DAG.getIntPtrConstant(0));
@@ -2066,7 +2132,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
unsigned MemVTWidth = MemVT.getSizeInBits();
if (MemVT.getSizeInBits() <= WidenEltWidth)
break;
- if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
+ if (TLI.isTypeSynthesizable(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
(MemVTWidth <= Width ||
(Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
RetVT = MemVT;
@@ -2080,7 +2146,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) {
EVT MemVT = (MVT::SimpleValueType) VT;
unsigned MemVTWidth = MemVT.getSizeInBits();
- if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
+ if (TLI.isTypeSynthesizable(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
(WidenWidth % MemVTWidth) == 0 &&
(MemVTWidth <= Width ||
(Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
@@ -2286,14 +2352,14 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
unsigned WidenNumElts = WidenVT.getVectorNumElements();
SmallVector<SDValue, 16> Ops(WidenNumElts);
unsigned Increment = LdEltVT.getSizeInBits() / 8;
- Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, SV, SVOffset,
+ Ops[0] = DAG.getExtLoad(ExtType, EltVT, dl, Chain, BasePtr, SV, SVOffset,
LdEltVT, isVolatile, isNonTemporal, Align);
LdChain.push_back(Ops[0].getValue(1));
unsigned i = 0, Offset = Increment;
for (i=1; i < NumElts; ++i, Offset += Increment) {
SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
BasePtr, DAG.getIntPtrConstant(Offset));
- Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, SV,
+ Ops[i] = DAG.getExtLoad(ExtType, EltVT, dl, Chain, NewBasePtr, SV,
SVOffset + Offset, LdEltVT, isVolatile,
isNonTemporal, Align);
LdChain.push_back(Ops[i].getValue(1));
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index ad8630afff45..3b86c3286585 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -535,7 +535,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
SUnit *LRDef = LiveRegDefs[Reg];
EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
const TargetRegisterClass *RC =
- TRI->getPhysicalRegisterRegClass(Reg, VT);
+ TRI->getMinimalPhysRegClass(Reg, VT);
const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
// If cross copy register class is null, then it must be possible copy
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 820ba6681606..3ef521c398e1 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -320,7 +320,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
CapturePred(&*I);
- if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) {
+ if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]){
assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
"Physical register dependency violated?");
@@ -795,7 +795,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
SUnit *LRDef = LiveRegDefs[Reg];
EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
const TargetRegisterClass *RC =
- TRI->getPhysicalRegisterRegClass(Reg, VT);
+ TRI->getMinimalPhysRegClass(Reg, VT);
const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
// If cross copy register class is null, then it must be possible copy
@@ -1116,7 +1116,7 @@ namespace {
SUnit *pop() {
if (empty()) return NULL;
std::vector<SUnit *>::iterator Best = Queue.begin();
- for (std::vector<SUnit *>::iterator I = next(Queue.begin()),
+ for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
E = Queue.end(); I != E; ++I)
if (Picker(*Best, *I))
Best = I;
@@ -1275,6 +1275,17 @@ bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
return left->getHeight() > right->getHeight();
} else if (RStall)
return false;
+
+ // If either node is scheduling for latency, sort them by height and latency
+ // first.
+ if (left->SchedulingPref == Sched::Latency ||
+ right->SchedulingPref == Sched::Latency) {
+ if (left->getHeight() != right->getHeight())
+ return left->getHeight() > right->getHeight();
+ if (left->Latency != right->Latency)
+ return left->Latency > right->Latency;
+ }
+
return BURRSort(left, right, SPQ);
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 3185c88b82bf..06cf05308755 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -59,7 +59,11 @@ SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) {
SUnits.back().OrigNode = &SUnits.back();
SUnit *SU = &SUnits.back();
const TargetLowering &TLI = DAG->getTargetLoweringInfo();
- SU->SchedulingPref = TLI.getSchedulingPreference(N);
+ if (N->isMachineOpcode() &&
+ N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF)
+ SU->SchedulingPref = Sched::None;
+ else
+ SU->SchedulingPref = TLI.getSchedulingPreference(N);
return SU;
}
@@ -97,7 +101,7 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) {
PhysReg = Reg;
const TargetRegisterClass *RC =
- TRI->getPhysicalRegisterRegClass(Reg, Def->getValueType(ResNo));
+ TRI->getMinimalPhysRegClass(Reg, Def->getValueType(ResNo));
Cost = RC->getCopyCost();
}
}
@@ -106,17 +110,42 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag,
SelectionDAG *DAG) {
SmallVector<EVT, 4> VTs;
- for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
- VTs.push_back(N->getValueType(i));
+ SDNode *FlagDestNode = Flag.getNode();
+
+ // Don't add a flag from a node to itself.
+ if (FlagDestNode == N) return;
+
+ // Don't add a flag to something which already has a flag.
+ if (N->getValueType(N->getNumValues() - 1) == MVT::Flag) return;
+
+ for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
+ VTs.push_back(N->getValueType(I));
+
if (AddFlag)
VTs.push_back(MVT::Flag);
+
SmallVector<SDValue, 4> Ops;
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
- Ops.push_back(N->getOperand(i));
- if (Flag.getNode())
+ for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I)
+ Ops.push_back(N->getOperand(I));
+
+ if (FlagDestNode)
Ops.push_back(Flag);
+
SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size());
+ MachineSDNode::mmo_iterator Begin = 0, End = 0;
+ MachineSDNode *MN = dyn_cast<MachineSDNode>(N);
+
+ // Store memory references.
+ if (MN) {
+ Begin = MN->memoperands_begin();
+ End = MN->memoperands_end();
+ }
+
DAG->MorphNodeTo(N, N->getOpcode(), VTList, &Ops[0], Ops.size());
+
+ // Reset the memory references
+ if (MN)
+ MN->setMemRefs(Begin, End);
}
/// ClusterNeighboringLoads - Force nearby loads together by "flagging" them.
@@ -124,98 +153,98 @@ static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag,
/// offsets are not far apart (target specific), it add MVT::Flag inputs and
/// outputs to ensure they are scheduled together and in order. This
/// optimization may benefit some targets by improving cache locality.
-void ScheduleDAGSDNodes::ClusterNeighboringLoads() {
+void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
+ SDNode *Chain = 0;
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Other)
+ Chain = Node->getOperand(NumOps-1).getNode();
+ if (!Chain)
+ return;
+
+ // Look for other loads of the same chain. Find loads that are loading from
+ // the same base pointer and different offsets.
SmallPtrSet<SDNode*, 16> Visited;
SmallVector<int64_t, 4> Offsets;
DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode.
- for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
- E = DAG->allnodes_end(); NI != E; ++NI) {
- SDNode *Node = &*NI;
- if (!Node || !Node->isMachineOpcode())
+ bool Cluster = false;
+ SDNode *Base = Node;
+ for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end();
+ I != E; ++I) {
+ SDNode *User = *I;
+ if (User == Node || !Visited.insert(User))
continue;
-
- unsigned Opc = Node->getMachineOpcode();
- const TargetInstrDesc &TID = TII->get(Opc);
- if (!TID.mayLoad())
+ int64_t Offset1, Offset2;
+ if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) ||
+ Offset1 == Offset2)
+ // FIXME: Should be ok if they addresses are identical. But earlier
+ // optimizations really should have eliminated one of the loads.
continue;
+ if (O2SMap.insert(std::make_pair(Offset1, Base)).second)
+ Offsets.push_back(Offset1);
+ O2SMap.insert(std::make_pair(Offset2, User));
+ Offsets.push_back(Offset2);
+ if (Offset2 < Offset1)
+ Base = User;
+ Cluster = true;
+ }
- SDNode *Chain = 0;
- unsigned NumOps = Node->getNumOperands();
- if (Node->getOperand(NumOps-1).getValueType() == MVT::Other)
- Chain = Node->getOperand(NumOps-1).getNode();
- if (!Chain)
- continue;
+ if (!Cluster)
+ return;
- // Look for other loads of the same chain. Find loads that are loading from
- // the same base pointer and different offsets.
- Visited.clear();
- Offsets.clear();
- O2SMap.clear();
- bool Cluster = false;
- SDNode *Base = Node;
- int64_t BaseOffset;
- for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end();
- I != E; ++I) {
- SDNode *User = *I;
- if (User == Node || !Visited.insert(User))
- continue;
- int64_t Offset1, Offset2;
- if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) ||
- Offset1 == Offset2)
- // FIXME: Should be ok if they addresses are identical. But earlier
- // optimizations really should have eliminated one of the loads.
- continue;
- if (O2SMap.insert(std::make_pair(Offset1, Base)).second)
- Offsets.push_back(Offset1);
- O2SMap.insert(std::make_pair(Offset2, User));
- Offsets.push_back(Offset2);
- if (Offset2 < Offset1) {
- Base = User;
- BaseOffset = Offset2;
- } else {
- BaseOffset = Offset1;
- }
- Cluster = true;
- }
+ // Sort them in increasing order.
+ std::sort(Offsets.begin(), Offsets.end());
+
+ // Check if the loads are close enough.
+ SmallVector<SDNode*, 4> Loads;
+ unsigned NumLoads = 0;
+ int64_t BaseOff = Offsets[0];
+ SDNode *BaseLoad = O2SMap[BaseOff];
+ Loads.push_back(BaseLoad);
+ for (unsigned i = 1, e = Offsets.size(); i != e; ++i) {
+ int64_t Offset = Offsets[i];
+ SDNode *Load = O2SMap[Offset];
+ if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,NumLoads))
+ break; // Stop right here. Ignore loads that are further away.
+ Loads.push_back(Load);
+ ++NumLoads;
+ }
- if (!Cluster)
- continue;
+ if (NumLoads == 0)
+ return;
- // Sort them in increasing order.
- std::sort(Offsets.begin(), Offsets.end());
-
- // Check if the loads are close enough.
- SmallVector<SDNode*, 4> Loads;
- unsigned NumLoads = 0;
- int64_t BaseOff = Offsets[0];
- SDNode *BaseLoad = O2SMap[BaseOff];
- Loads.push_back(BaseLoad);
- for (unsigned i = 1, e = Offsets.size(); i != e; ++i) {
- int64_t Offset = Offsets[i];
- SDNode *Load = O2SMap[Offset];
- if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,
- NumLoads))
- break; // Stop right here. Ignore loads that are further away.
- Loads.push_back(Load);
- ++NumLoads;
- }
+ // Cluster loads by adding MVT::Flag outputs and inputs. This also
+ // ensure they are scheduled in order of increasing addresses.
+ SDNode *Lead = Loads[0];
+ AddFlags(Lead, SDValue(0, 0), true, DAG);
+
+ SDValue InFlag = SDValue(Lead, Lead->getNumValues() - 1);
+ for (unsigned I = 1, E = Loads.size(); I != E; ++I) {
+ bool OutFlag = I < E - 1;
+ SDNode *Load = Loads[I];
+
+ AddFlags(Load, InFlag, OutFlag, DAG);
+
+ if (OutFlag)
+ InFlag = SDValue(Load, Load->getNumValues() - 1);
+
+ ++LoadsClustered;
+ }
+}
- if (NumLoads == 0)
+/// ClusterNodes - Cluster certain nodes which should be scheduled together.
+///
+void ScheduleDAGSDNodes::ClusterNodes() {
+ for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
+ E = DAG->allnodes_end(); NI != E; ++NI) {
+ SDNode *Node = &*NI;
+ if (!Node || !Node->isMachineOpcode())
continue;
- // Cluster loads by adding MVT::Flag outputs and inputs. This also
- // ensure they are scheduled in order of increasing addresses.
- SDNode *Lead = Loads[0];
- AddFlags(Lead, SDValue(0,0), true, DAG);
- SDValue InFlag = SDValue(Lead, Lead->getNumValues()-1);
- for (unsigned i = 1, e = Loads.size(); i != e; ++i) {
- bool OutFlag = i < e-1;
- SDNode *Load = Loads[i];
- AddFlags(Load, InFlag, OutFlag, DAG);
- if (OutFlag)
- InFlag = SDValue(Load, Load->getNumValues()-1);
- ++LoadsClustered;
- }
+ unsigned Opc = Node->getMachineOpcode();
+ const TargetInstrDesc &TID = TII->get(Opc);
+ if (TID.mayLoad())
+ // Cluster loads from "near" addresses into combined SUnits.
+ ClusterNeighboringLoads(Node);
}
}
@@ -364,8 +393,10 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
if (Cost >= 0)
PhysReg = 0;
- const SDep& dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
- OpSU->Latency, PhysReg);
+ // If this is a ctrl dep, latency is 1.
+ unsigned OpLatency = isChain ? 1 : OpSU->Latency;
+ const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
+ OpLatency, PhysReg);
if (!isChain && !UnitLatencies) {
ComputeOperandLatency(OpN, N, i, const_cast<SDep &>(dep));
ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
@@ -382,8 +413,8 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
/// excludes nodes that aren't interesting to scheduling, and represents
/// flagged together nodes with a single SUnit.
void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
- // Cluster loads from "near" addresses into combined SUnits.
- ClusterNeighboringLoads();
+ // Cluster certain nodes which should be scheduled together.
+ ClusterNodes();
// Populate the SUnits array.
BuildSchedUnits();
// Compute all the scheduling dependencies between nodes.
@@ -427,15 +458,18 @@ void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
return;
unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
- if (Def->isMachineOpcode() && Use->isMachineOpcode()) {
+ if (Def->isMachineOpcode()) {
const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
if (DefIdx >= II.getNumDefs())
return;
int DefCycle = InstrItins.getOperandCycle(II.getSchedClass(), DefIdx);
if (DefCycle < 0)
return;
- const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass();
- int UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx);
+ int UseCycle = 1;
+ if (Use->isMachineOpcode()) {
+ const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass();
+ UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx);
+ }
if (UseCycle >= 0) {
int Latency = DefCycle - UseCycle + 1;
if (Latency >= 0)
@@ -473,7 +507,7 @@ namespace {
}
// ProcessSourceNode - Process nodes with source order numbers. These are added
-// to a vector which EmitSchedule use to determine how to insert dbg_value
+// to a vector which EmitSchedule uses to determine how to insert dbg_value
// instructions in the right order.
static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
InstrEmitter &Emitter,
@@ -485,13 +519,13 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
return;
MachineBasicBlock *BB = Emitter.getBlock();
- if (BB->empty() || BB->back().isPHI()) {
+ if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) {
// Did not insert any instruction.
Orders.push_back(std::make_pair(Order, (MachineInstr*)0));
return;
}
- Orders.push_back(std::make_pair(Order, &BB->back()));
+ Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos())));
if (!N->getHasDebugValue())
return;
// Opportunistically insert immediate dbg_value uses, i.e. those with source
@@ -530,7 +564,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
for (; PDI != PDE; ++PDI) {
MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap);
if (DbgMI)
- BB->insert(BB->end(), DbgMI);
+ BB->insert(InsertPos, DbgMI);
}
}
@@ -574,9 +608,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
// Insert all the dbg_values which have not already been inserted in source
// order sequence.
if (HasDbg) {
- MachineBasicBlock::iterator BBBegin = BB->empty() ? BB->end() : BB->begin();
- while (BBBegin != BB->end() && BBBegin->isPHI())
- ++BBBegin;
+ MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI();
// Sort the source order instructions and use the order to insert debug
// values.
@@ -586,14 +618,12 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
SDDbgInfo::DbgIterator DE = DAG->DbgEnd();
// Now emit the rest according to source order.
unsigned LastOrder = 0;
- MachineInstr *LastMI = 0;
for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) {
unsigned Order = Orders[i].first;
MachineInstr *MI = Orders[i].second;
// Insert all SDDbgValue's whose order(s) are before "Order".
if (!MI)
continue;
- MachineBasicBlock *MIBB = MI->getParent();
#ifndef NDEBUG
unsigned LastDIOrder = 0;
#endif
@@ -612,13 +642,14 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
// Insert to start of the BB (after PHIs).
BB->insert(BBBegin, DbgMI);
else {
+ // Insert at the instruction, which may be in a different
+ // block, if the block was split by a custom inserter.
MachineBasicBlock::iterator Pos = MI;
- MIBB->insert(llvm::next(Pos), DbgMI);
+ MI->getParent()->insert(llvm::next(Pos), DbgMI);
}
}
}
LastOrder = Order;
- LastMI = MI;
}
// Add trailing DbgValue's before the terminator. FIXME: May want to add
// some of them before one or more conditional branches?
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index e8714ba83285..842fc8c72703 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -108,7 +108,10 @@ namespace llvm {
private:
/// ClusterNeighboringLoads - Cluster loads from "near" addresses into
/// combined SUnits.
- void ClusterNeighboringLoads();
+ void ClusterNeighboringLoads(SDNode *Node);
+ /// ClusterNodes - Cluster certain nodes which should be scheduled together.
+ ///
+ void ClusterNodes();
/// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph.
void BuildSchedUnits();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 38bf68b8539f..e83a0346b535 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -790,9 +790,8 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
}
// EntryNode could meaningfully have debug info if we can find it...
-SelectionDAG::SelectionDAG(const TargetMachine &tm, FunctionLoweringInfo &fli)
+SelectionDAG::SelectionDAG(const TargetMachine &tm)
: TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()),
- FLI(fli),
EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)),
Root(getEntryNode()), Ordering(0) {
AllNodes.push_back(&EntryNode);
@@ -808,7 +807,6 @@ void SelectionDAG::init(MachineFunction &mf) {
SelectionDAG::~SelectionDAG() {
allnodes_clear();
delete Ordering;
- DbgInfo->clear();
delete DbgInfo;
}
@@ -835,11 +833,8 @@ void SelectionDAG::clear() {
EntryNode.UseList = 0;
AllNodes.push_back(&EntryNode);
Root = getEntryNode();
- delete Ordering;
- Ordering = new SDNodeOrdering();
+ Ordering->clear();
DbgInfo->clear();
- delete DbgInfo;
- DbgInfo = new SDDbgInfo();
}
SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) {
@@ -980,7 +975,7 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) {
}
}
-SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV,
+SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL,
EVT VT, int64_t Offset,
bool isTargetGA,
unsigned char TargetFlags) {
@@ -1015,7 +1010,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV,
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, GV, VT,
+ SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, DL, GV, VT,
Offset, TargetFlags);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
@@ -2291,7 +2286,6 @@ bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const {
SDValue SelectionDAG::getShuffleScalarElt(const ShuffleVectorSDNode *N,
unsigned i) {
EVT VT = N->getValueType(0);
- DebugLoc dl = N->getDebugLoc();
if (N->getMaskElt(i) < 0)
return getUNDEF(VT.getVectorElementType());
unsigned Index = N->getMaskElt(i);
@@ -2475,9 +2469,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
VT.getVectorNumElements() ==
Operand.getValueType().getVectorNumElements()) &&
"Vector element count mismatch!");
- if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND)
+
+ if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
+ OpOpcode == ISD::ANY_EXTEND)
// (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x)
return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+
+ // (ext (trunx x)) -> x
+ if (OpOpcode == ISD::TRUNCATE) {
+ SDValue OpOp = Operand.getNode()->getOperand(0);
+ if (OpOp.getValueType() == VT)
+ return OpOp;
+ }
break;
case ISD::TRUNCATE:
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
@@ -2622,7 +2625,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
if (N1.getOpcode() == ISD::BUILD_VECTOR &&
N2.getOpcode() == ISD::BUILD_VECTOR) {
SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), N1.getNode()->op_end());
- Elts.insert(Elts.end(), N2.getNode()->op_begin(), N2.getNode()->op_end());
+ Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end());
return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size());
}
break;
@@ -3011,7 +3014,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
SDValue N1, SDValue N2, SDValue N3) {
// Perform various simplifications.
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
- ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
switch (Opcode) {
case ISD::CONCAT_VECTORS:
// A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to
@@ -3020,8 +3022,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
N2.getOpcode() == ISD::BUILD_VECTOR &&
N3.getOpcode() == ISD::BUILD_VECTOR) {
SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), N1.getNode()->op_end());
- Elts.insert(Elts.end(), N2.getNode()->op_begin(), N2.getNode()->op_end());
- Elts.insert(Elts.end(), N3.getNode()->op_begin(), N3.getNode()->op_end());
+ Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end());
+ Elts.append(N3.getNode()->op_begin(), N3.getNode()->op_end());
return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size());
}
break;
@@ -3041,14 +3043,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
if (N2 == N3) return N2; // select C, X, X -> X
break;
- case ISD::BRCOND:
- if (N2C) {
- if (N2C->getZExtValue()) // Unconditional branch
- return getNode(ISD::BR, DL, MVT::Other, N1, N3);
- else
- return N1; // Never-taken branch
- }
- break;
case ISD::VECTOR_SHUFFLE:
llvm_unreachable("should use getVectorShuffle constructor!");
break;
@@ -3267,6 +3261,15 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
if (VT.bitsGT(LVT))
VT = LVT;
}
+
+ // If we're optimizing for size, and there is a limit, bump the maximum number
+ // of operations inserted down to 4. This is a wild guess that approximates
+ // the size of a call to memcpy or memset (3 arguments + call).
+ if (Limit != ~0U) {
+ const Function *F = DAG.getMachineFunction().getFunction();
+ if (F->hasFnAttr(Attribute::OptimizeForSize))
+ Limit = 4;
+ }
unsigned NumMemOps = 0;
while (Size != 0) {
@@ -3321,9 +3324,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
std::string Str;
bool CopyFromStr = isMemSrcFromString(Src, Str);
bool isZeroStr = CopyFromStr && Str.empty();
- uint64_t Limit = -1ULL;
- if (!AlwaysInline)
- Limit = TLI.getMaxStoresPerMemcpy();
+ unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy();
+
if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
(DstAlignCanChange ? 0 : Align),
(isZeroStr ? 0 : SrcAlign),
@@ -3368,7 +3370,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
// FIXME does the case above also need this?
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
assert(NVT.bitsGE(VT));
- Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
+ Value = DAG.getExtLoad(ISD::EXTLOAD, NVT, dl, Chain,
getMemBasePlusOffset(Src, SrcOff, DAG),
SrcSV, SrcSVOff + SrcOff, VT, isVol, false,
MinAlign(SrcAlign, SrcOff));
@@ -3401,9 +3403,6 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
// below a certain threshold.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
std::vector<EVT> MemOps;
- uint64_t Limit = -1ULL;
- if (!AlwaysInline)
- Limit = TLI.getMaxStoresPerMemmove();
bool DstAlignCanChange = false;
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
@@ -3412,6 +3411,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
unsigned SrcAlign = DAG.InferPtrAlignment(Src);
if (Align > SrcAlign)
SrcAlign = Align;
+ unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove();
if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
(DstAlignCanChange ? 0 : Align),
@@ -3895,8 +3895,8 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
}
SDValue
-SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,
- ISD::LoadExtType ExtType, EVT VT, SDValue Chain,
+SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
+ EVT VT, DebugLoc dl, SDValue Chain,
SDValue Ptr, SDValue Offset,
const Value *SV, int SVOffset, EVT MemVT,
bool isVolatile, bool isNonTemporal,
@@ -3919,12 +3919,12 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,
MachineMemOperand *MMO =
MF.getMachineMemOperand(SV, Flags, SVOffset,
MemVT.getStoreSize(), Alignment);
- return getLoad(AM, dl, ExtType, VT, Chain, Ptr, Offset, MemVT, MMO);
+ return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO);
}
SDValue
-SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,
- ISD::LoadExtType ExtType, EVT VT, SDValue Chain,
+SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
+ EVT VT, DebugLoc dl, SDValue Chain,
SDValue Ptr, SDValue Offset, EVT MemVT,
MachineMemOperand *MMO) {
if (VT == MemVT) {
@@ -3974,18 +3974,18 @@ SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl,
bool isVolatile, bool isNonTemporal,
unsigned Alignment) {
SDValue Undef = getUNDEF(Ptr.getValueType());
- return getLoad(ISD::UNINDEXED, dl, ISD::NON_EXTLOAD, VT, Chain, Ptr, Undef,
+ return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
SV, SVOffset, VT, isVolatile, isNonTemporal, Alignment);
}
-SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT,
+SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, EVT VT, DebugLoc dl,
SDValue Chain, SDValue Ptr,
const Value *SV,
int SVOffset, EVT MemVT,
bool isVolatile, bool isNonTemporal,
unsigned Alignment) {
SDValue Undef = getUNDEF(Ptr.getValueType());
- return getLoad(ISD::UNINDEXED, dl, ExtType, VT, Chain, Ptr, Undef,
+ return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef,
SV, SVOffset, MemVT, isVolatile, isNonTemporal, Alignment);
}
@@ -3995,7 +3995,7 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
LoadSDNode *LD = cast<LoadSDNode>(OrigLoad);
assert(LD->getOffset().getOpcode() == ISD::UNDEF &&
"Load is already a indexed load!");
- return getLoad(AM, dl, LD->getExtensionType(), OrigLoad.getValueType(),
+ return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,
LD->getChain(), Base, Offset, LD->getSrcValue(),
LD->getSrcValueOffset(), LD->getMemoryVT(),
LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment());
@@ -4141,9 +4141,10 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base,
SDValue SelectionDAG::getVAArg(EVT VT, DebugLoc dl,
SDValue Chain, SDValue Ptr,
- SDValue SV) {
- SDValue Ops[] = { Chain, Ptr, SV };
- return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 3);
+ SDValue SV,
+ unsigned Align) {
+ SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, MVT::i32) };
+ return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 4);
}
SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
@@ -4425,17 +4426,16 @@ SDVTList SelectionDAG::getVTList(const EVT *VTs, unsigned NumVTs) {
/// already exists. If the resultant node does not exist in the DAG, the
/// input node is returned. As a degenerate case, if you specify the same
/// input operands as the node already has, the input node is returned.
-SDValue SelectionDAG::UpdateNodeOperands(SDValue InN, SDValue Op) {
- SDNode *N = InN.getNode();
+SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op) {
assert(N->getNumOperands() == 1 && "Update with wrong number of operands");
// Check to see if there is no change.
- if (Op == N->getOperand(0)) return InN;
+ if (Op == N->getOperand(0)) return N;
// See if the modified node already exists.
void *InsertPos = 0;
if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos))
- return SDValue(Existing, InN.getResNo());
+ return Existing;
// Nope it doesn't. Remove the node from its current place in the maps.
if (InsertPos)
@@ -4447,22 +4447,20 @@ SDValue SelectionDAG::UpdateNodeOperands(SDValue InN, SDValue Op) {
// If this gets put into a CSE map, add it.
if (InsertPos) CSEMap.InsertNode(N, InsertPos);
- return InN;
+ return N;
}
-SDValue SelectionDAG::
-UpdateNodeOperands(SDValue InN, SDValue Op1, SDValue Op2) {
- SDNode *N = InN.getNode();
+SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) {
assert(N->getNumOperands() == 2 && "Update with wrong number of operands");
// Check to see if there is no change.
if (Op1 == N->getOperand(0) && Op2 == N->getOperand(1))
- return InN; // No operands changed, just return the input node.
+ return N; // No operands changed, just return the input node.
// See if the modified node already exists.
void *InsertPos = 0;
if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos))
- return SDValue(Existing, InN.getResNo());
+ return Existing;
// Nope it doesn't. Remove the node from its current place in the maps.
if (InsertPos)
@@ -4477,32 +4475,31 @@ UpdateNodeOperands(SDValue InN, SDValue Op1, SDValue Op2) {
// If this gets put into a CSE map, add it.
if (InsertPos) CSEMap.InsertNode(N, InsertPos);
- return InN;
+ return N;
}
-SDValue SelectionDAG::
-UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2, SDValue Op3) {
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3) {
SDValue Ops[] = { Op1, Op2, Op3 };
return UpdateNodeOperands(N, Ops, 3);
}
-SDValue SelectionDAG::
-UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2,
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2,
SDValue Op3, SDValue Op4) {
SDValue Ops[] = { Op1, Op2, Op3, Op4 };
return UpdateNodeOperands(N, Ops, 4);
}
-SDValue SelectionDAG::
-UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2,
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2,
SDValue Op3, SDValue Op4, SDValue Op5) {
SDValue Ops[] = { Op1, Op2, Op3, Op4, Op5 };
return UpdateNodeOperands(N, Ops, 5);
}
-SDValue SelectionDAG::
-UpdateNodeOperands(SDValue InN, const SDValue *Ops, unsigned NumOps) {
- SDNode *N = InN.getNode();
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, const SDValue *Ops, unsigned NumOps) {
assert(N->getNumOperands() == NumOps &&
"Update with wrong number of operands");
@@ -4516,12 +4513,12 @@ UpdateNodeOperands(SDValue InN, const SDValue *Ops, unsigned NumOps) {
}
// No operands changed, just return the input node.
- if (!AnyChange) return InN;
+ if (!AnyChange) return N;
// See if the modified node already exists.
void *InsertPos = 0;
if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, NumOps, InsertPos))
- return SDValue(Existing, InN.getResNo());
+ return Existing;
// Nope it doesn't. Remove the node from its current place in the maps.
if (InsertPos)
@@ -4535,7 +4532,7 @@ UpdateNodeOperands(SDValue InN, const SDValue *Ops, unsigned NumOps) {
// If this gets put into a CSE map, add it.
if (InsertPos) CSEMap.InsertNode(N, InsertPos);
- return InN;
+ return N;
}
/// DropOperands - Release the operands and set this node to have
@@ -5378,9 +5375,10 @@ HandleSDNode::~HandleSDNode() {
DropOperands();
}
-GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, const GlobalValue *GA,
+GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, DebugLoc DL,
+ const GlobalValue *GA,
EVT VT, int64_t o, unsigned char TF)
- : SDNode(Opc, DebugLoc(), getSDVTList(VT)), Offset(o), TargetFlags(TF) {
+ : SDNode(Opc, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) {
TheGlobal = GA;
}
@@ -5669,13 +5667,16 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FSQRT: return "fsqrt";
case ISD::FSIN: return "fsin";
case ISD::FCOS: return "fcos";
- case ISD::FPOWI: return "fpowi";
- case ISD::FPOW: return "fpow";
case ISD::FTRUNC: return "ftrunc";
case ISD::FFLOOR: return "ffloor";
case ISD::FCEIL: return "fceil";
case ISD::FRINT: return "frint";
case ISD::FNEARBYINT: return "fnearbyint";
+ case ISD::FEXP: return "fexp";
+ case ISD::FEXP2: return "fexp2";
+ case ISD::FLOG: return "flog";
+ case ISD::FLOG2: return "flog2";
+ case ISD::FLOG10: return "flog10";
// Binary operators
case ISD::ADD: return "add";
@@ -5706,7 +5707,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FREM: return "frem";
case ISD::FCOPYSIGN: return "fcopysign";
case ISD::FGETSIGN: return "fgetsign";
+ case ISD::FPOW: return "fpow";
+ case ISD::FPOWI: return "fpowi";
case ISD::SETCC: return "setcc";
case ISD::VSETCC: return "vsetcc";
case ISD::SELECT: return "select";
@@ -6260,23 +6263,6 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
const MachineFrameInfo &MFI = *getMachineFunction().getFrameInfo();
unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
FrameOffset);
- if (MFI.isFixedObjectIndex(FrameIdx)) {
- int64_t ObjectOffset = MFI.getObjectOffset(FrameIdx) + FrameOffset;
-
- // The alignment of the frame index can be determined from its offset from
- // the incoming frame position. If the frame object is at offset 32 and
- // the stack is guaranteed to be 16-byte aligned, then we know that the
- // object is 16-byte aligned.
- unsigned StackAlign = getTarget().getFrameInfo()->getStackAlignment();
- unsigned Align = MinAlign(ObjectOffset, StackAlign);
-
- // Finally, the frame object itself may have a known alignment. Factor
- // the alignment + offset into a new alignment. For example, if we know
- // the FI is 8 byte aligned, but the pointer is 4 off, we really have a
- // 4-byte alignment of the resultant pointer. Likewise align 4 + 4-byte
- // offset = 4-byte alignment, align 4 + 1-byte offset = align 1, etc.
- return std::max(Align, FIInfoAlign);
- }
return FIInfoAlign;
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index fbe601f2db09..d323c163c143 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -14,7 +14,6 @@
#define DEBUG_TYPE "isel"
#include "SDNodeDbgValue.h"
#include "SelectionDAGBuilder.h"
-#include "FunctionLoweringInfo.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -32,6 +31,7 @@
#include "llvm/Module.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCStrategy.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -70,113 +70,6 @@ LimitFPPrecision("limit-float-precision",
cl::location(LimitFloatPrecision),
cl::init(0));
-namespace {
- /// RegsForValue - This struct represents the registers (physical or virtual)
- /// that a particular set of values is assigned, and the type information
- /// about the value. The most common situation is to represent one value at a
- /// time, but struct or array values are handled element-wise as multiple
- /// values. The splitting of aggregates is performed recursively, so that we
- /// never have aggregate-typed registers. The values at this point do not
- /// necessarily have legal types, so each value may require one or more
- /// registers of some legal type.
- ///
- struct RegsForValue {
- /// TLI - The TargetLowering object.
- ///
- const TargetLowering *TLI;
-
- /// ValueVTs - The value types of the values, which may not be legal, and
- /// may need be promoted or synthesized from one or more registers.
- ///
- SmallVector<EVT, 4> ValueVTs;
-
- /// RegVTs - The value types of the registers. This is the same size as
- /// ValueVTs and it records, for each value, what the type of the assigned
- /// register or registers are. (Individual values are never synthesized
- /// from more than one type of register.)
- ///
- /// With virtual registers, the contents of RegVTs is redundant with TLI's
- /// getRegisterType member function, however when with physical registers
- /// it is necessary to have a separate record of the types.
- ///
- SmallVector<EVT, 4> RegVTs;
-
- /// Regs - This list holds the registers assigned to the values.
- /// Each legal or promoted value requires one register, and each
- /// expanded value requires multiple registers.
- ///
- SmallVector<unsigned, 4> Regs;
-
- RegsForValue() : TLI(0) {}
-
- RegsForValue(const TargetLowering &tli,
- const SmallVector<unsigned, 4> &regs,
- EVT regvt, EVT valuevt)
- : TLI(&tli), ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
- RegsForValue(const TargetLowering &tli,
- const SmallVector<unsigned, 4> &regs,
- const SmallVector<EVT, 4> &regvts,
- const SmallVector<EVT, 4> &valuevts)
- : TLI(&tli), ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {}
- RegsForValue(LLVMContext &Context, const TargetLowering &tli,
- unsigned Reg, const Type *Ty) : TLI(&tli) {
- ComputeValueVTs(tli, Ty, ValueVTs);
-
- for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
- EVT ValueVT = ValueVTs[Value];
- unsigned NumRegs = TLI->getNumRegisters(Context, ValueVT);
- EVT RegisterVT = TLI->getRegisterType(Context, ValueVT);
- for (unsigned i = 0; i != NumRegs; ++i)
- Regs.push_back(Reg + i);
- RegVTs.push_back(RegisterVT);
- Reg += NumRegs;
- }
- }
-
- /// areValueTypesLegal - Return true if types of all the values are legal.
- bool areValueTypesLegal() {
- for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
- EVT RegisterVT = RegVTs[Value];
- if (!TLI->isTypeLegal(RegisterVT))
- return false;
- }
- return true;
- }
-
-
- /// append - Add the specified values to this one.
- void append(const RegsForValue &RHS) {
- TLI = RHS.TLI;
- ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
- RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
- Regs.append(RHS.Regs.begin(), RHS.Regs.end());
- }
-
-
- /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
- /// this value and returns the result as a ValueVTs value. This uses
- /// Chain/Flag as the input and updates them for the output Chain/Flag.
- /// If the Flag pointer is NULL, no flag is used.
- SDValue getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
- SDValue &Chain, SDValue *Flag) const;
-
- /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
- /// specified value into the registers specified by this object. This uses
- /// Chain/Flag as the input and updates them for the output Chain/Flag.
- /// If the Flag pointer is NULL, no flag is used.
- void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
- SDValue &Chain, SDValue *Flag) const;
-
- /// AddInlineAsmOperands - Add this value to the specified inlineasm node
- /// operand list. This adds the code marker, matching input operand index
- /// (if applicable), and includes the number of values added into it.
- void AddInlineAsmOperands(unsigned Kind,
- bool HasMatching, unsigned MatchingIdx,
- SelectionDAG &DAG,
- std::vector<SDValue> &Ops) const;
- };
-}
-
/// getCopyFromParts - Create a value that contains the specified legal parts
/// combined into the value they represent. If the parts combine to a type
/// larger then ValueVT then AssertOp can be used to specify whether the extra
@@ -528,6 +421,268 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl,
}
}
+namespace {
+ /// RegsForValue - This struct represents the registers (physical or virtual)
+ /// that a particular set of values is assigned, and the type information
+ /// about the value. The most common situation is to represent one value at a
+ /// time, but struct or array values are handled element-wise as multiple
+ /// values. The splitting of aggregates is performed recursively, so that we
+ /// never have aggregate-typed registers. The values at this point do not
+ /// necessarily have legal types, so each value may require one or more
+ /// registers of some legal type.
+ ///
+ struct RegsForValue {
+ /// ValueVTs - The value types of the values, which may not be legal, and
+ /// may need be promoted or synthesized from one or more registers.
+ ///
+ SmallVector<EVT, 4> ValueVTs;
+
+ /// RegVTs - The value types of the registers. This is the same size as
+ /// ValueVTs and it records, for each value, what the type of the assigned
+ /// register or registers are. (Individual values are never synthesized
+ /// from more than one type of register.)
+ ///
+ /// With virtual registers, the contents of RegVTs is redundant with TLI's
+ /// getRegisterType member function, however when with physical registers
+ /// it is necessary to have a separate record of the types.
+ ///
+ SmallVector<EVT, 4> RegVTs;
+
+ /// Regs - This list holds the registers assigned to the values.
+ /// Each legal or promoted value requires one register, and each
+ /// expanded value requires multiple registers.
+ ///
+ SmallVector<unsigned, 4> Regs;
+
+ RegsForValue() {}
+
+ RegsForValue(const SmallVector<unsigned, 4> &regs,
+ EVT regvt, EVT valuevt)
+ : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
+
+ RegsForValue(const SmallVector<unsigned, 4> &regs,
+ const SmallVector<EVT, 4> &regvts,
+ const SmallVector<EVT, 4> &valuevts)
+ : ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {}
+
+ RegsForValue(LLVMContext &Context, const TargetLowering &tli,
+ unsigned Reg, const Type *Ty) {
+ ComputeValueVTs(tli, Ty, ValueVTs);
+
+ for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ EVT ValueVT = ValueVTs[Value];
+ unsigned NumRegs = tli.getNumRegisters(Context, ValueVT);
+ EVT RegisterVT = tli.getRegisterType(Context, ValueVT);
+ for (unsigned i = 0; i != NumRegs; ++i)
+ Regs.push_back(Reg + i);
+ RegVTs.push_back(RegisterVT);
+ Reg += NumRegs;
+ }
+ }
+
+ /// areValueTypesLegal - Return true if types of all the values are legal.
+ bool areValueTypesLegal(const TargetLowering &TLI) {
+ for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ EVT RegisterVT = RegVTs[Value];
+ if (!TLI.isTypeLegal(RegisterVT))
+ return false;
+ }
+ return true;
+ }
+
+ /// append - Add the specified values to this one.
+ void append(const RegsForValue &RHS) {
+ ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
+ RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
+ Regs.append(RHS.Regs.begin(), RHS.Regs.end());
+ }
+
+ /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
+ /// this value and returns the result as a ValueVTs value. This uses
+ /// Chain/Flag as the input and updates them for the output Chain/Flag.
+ /// If the Flag pointer is NULL, no flag is used.
+ SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo,
+ DebugLoc dl,
+ SDValue &Chain, SDValue *Flag) const;
+
+ /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
+ /// specified value into the registers specified by this object. This uses
+ /// Chain/Flag as the input and updates them for the output Chain/Flag.
+ /// If the Flag pointer is NULL, no flag is used.
+ void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
+ SDValue &Chain, SDValue *Flag) const;
+
+ /// AddInlineAsmOperands - Add this value to the specified inlineasm node
+ /// operand list. This adds the code marker, matching input operand index
+ /// (if applicable), and includes the number of values added into it.
+ void AddInlineAsmOperands(unsigned Kind,
+ bool HasMatching, unsigned MatchingIdx,
+ SelectionDAG &DAG,
+ std::vector<SDValue> &Ops) const;
+ };
+}
+
+/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
+/// this value and returns the result as a ValueVT value. This uses
+/// Chain/Flag as the input and updates them for the output Chain/Flag.
+/// If the Flag pointer is NULL, no flag is used.
+SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
+ FunctionLoweringInfo &FuncInfo,
+ DebugLoc dl,
+ SDValue &Chain, SDValue *Flag) const {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Assemble the legal parts into the final values.
+ SmallVector<SDValue, 4> Values(ValueVTs.size());
+ SmallVector<SDValue, 8> Parts;
+ for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ // Copy the legal parts from the registers.
+ EVT ValueVT = ValueVTs[Value];
+ unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
+ EVT RegisterVT = RegVTs[Value];
+
+ Parts.resize(NumRegs);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ SDValue P;
+ if (Flag == 0) {
+ P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
+ } else {
+ P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
+ *Flag = P.getValue(2);
+ }
+
+ Chain = P.getValue(1);
+
+ // If the source register was virtual and if we know something about it,
+ // add an assert node.
+ if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) &&
+ RegisterVT.isInteger() && !RegisterVT.isVector()) {
+ unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister;
+ if (FuncInfo.LiveOutRegInfo.size() > SlotNo) {
+ const FunctionLoweringInfo::LiveOutInfo &LOI =
+ FuncInfo.LiveOutRegInfo[SlotNo];
+
+ unsigned RegSize = RegisterVT.getSizeInBits();
+ unsigned NumSignBits = LOI.NumSignBits;
+ unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes();
+
+ // FIXME: We capture more information than the dag can represent. For
+ // now, just use the tightest assertzext/assertsext possible.
+ bool isSExt = true;
+ EVT FromVT(MVT::Other);
+ if (NumSignBits == RegSize)
+ isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1
+ else if (NumZeroBits >= RegSize-1)
+ isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1
+ else if (NumSignBits > RegSize-8)
+ isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8
+ else if (NumZeroBits >= RegSize-8)
+ isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8
+ else if (NumSignBits > RegSize-16)
+ isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16
+ else if (NumZeroBits >= RegSize-16)
+ isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16
+ else if (NumSignBits > RegSize-32)
+ isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32
+ else if (NumZeroBits >= RegSize-32)
+ isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
+
+ if (FromVT != MVT::Other)
+ P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
+ RegisterVT, P, DAG.getValueType(FromVT));
+ }
+ }
+
+ Parts[i] = P;
+ }
+
+ Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
+ NumRegs, RegisterVT, ValueVT);
+ Part += NumRegs;
+ Parts.clear();
+ }
+
+ return DAG.getNode(ISD::MERGE_VALUES, dl,
+ DAG.getVTList(&ValueVTs[0], ValueVTs.size()),
+ &Values[0], ValueVTs.size());
+}
+
+/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
+/// specified value into the registers specified by this object. This uses
+/// Chain/Flag as the input and updates them for the output Chain/Flag.
+/// If the Flag pointer is NULL, no flag is used.
+void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
+ SDValue &Chain, SDValue *Flag) const {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Get the list of the values's legal parts.
+ unsigned NumRegs = Regs.size();
+ SmallVector<SDValue, 8> Parts(NumRegs);
+ for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ EVT ValueVT = ValueVTs[Value];
+ unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
+ EVT RegisterVT = RegVTs[Value];
+
+ getCopyToParts(DAG, dl,
+ Val.getValue(Val.getResNo() + Value),
+ &Parts[Part], NumParts, RegisterVT);
+ Part += NumParts;
+ }
+
+ // Copy the parts into the registers.
+ SmallVector<SDValue, 8> Chains(NumRegs);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ SDValue Part;
+ if (Flag == 0) {
+ Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
+ } else {
+ Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
+ *Flag = Part.getValue(1);
+ }
+
+ Chains[i] = Part.getValue(0);
+ }
+
+ if (NumRegs == 1 || Flag)
+ // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
+ // flagged to it. That is the CopyToReg nodes and the user are considered
+ // a single scheduling unit. If we create a TokenFactor and return it as
+ // chain, then the TokenFactor is both a predecessor (operand) of the
+ // user as well as a successor (the TF operands are flagged to the user).
+ // c1, f1 = CopyToReg
+ // c2, f2 = CopyToReg
+ // c3 = TokenFactor c1, c2
+ // ...
+ // = op c3, ..., f2
+ Chain = Chains[NumRegs-1];
+ else
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs);
+}
+
+/// AddInlineAsmOperands - Add this value to the specified inlineasm node
+/// operand list. This adds the code marker and includes the number of
+/// values added into it.
+void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
+ unsigned MatchingIdx,
+ SelectionDAG &DAG,
+ std::vector<SDValue> &Ops) const {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
+ if (HasMatching)
+ Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
+ SDValue Res = DAG.getTargetConstant(Flag, MVT::i32);
+ Ops.push_back(Res);
+
+ for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
+ EVT RegisterVT = RegVTs[Value];
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ assert(Reg < Regs.size() && "Mismatch in # registers expected");
+ Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT));
+ }
+ }
+}
void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) {
AA = &aa;
@@ -543,6 +698,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) {
/// consumed.
void SelectionDAGBuilder::clear() {
NodeMap.clear();
+ UnusedArgNodeMap.clear();
PendingLoads.clear();
PendingExports.clear();
CurDebugLoc = DebugLoc();
@@ -649,27 +805,63 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
}
}
+// getValue - Return an SDValue for the given Value.
SDValue SelectionDAGBuilder::getValue(const Value *V) {
+ // If we already have an SDValue for this value, use it. It's important
+ // to do this first, so that we don't create a CopyFromReg if we already
+ // have a regular SDValue.
+ SDValue &N = NodeMap[V];
+ if (N.getNode()) return N;
+
+ // If there's a virtual register allocated and initialized for this
+ // value, use it.
+ DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
+ if (It != FuncInfo.ValueMap.end()) {
+ unsigned InReg = It->second;
+ RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
+ SDValue Chain = DAG.getEntryNode();
+ return N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL);
+ }
+
+ // Otherwise create a new SDValue and remember it.
+ SDValue Val = getValueImpl(V);
+ NodeMap[V] = Val;
+ return Val;
+}
+
+/// getNonRegisterValue - Return an SDValue for the given Value, but
+/// don't look in FuncInfo.ValueMap for a virtual register.
+SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
+ // If we already have an SDValue for this value, use it.
SDValue &N = NodeMap[V];
if (N.getNode()) return N;
+ // Otherwise create a new SDValue and remember it.
+ SDValue Val = getValueImpl(V);
+ NodeMap[V] = Val;
+ return Val;
+}
+
+/// getValueImpl - Helper function for getValue and getMaterializedValue.
+/// Create an SDValue for the given value.
+SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
if (const Constant *C = dyn_cast<Constant>(V)) {
EVT VT = TLI.getValueType(V->getType(), true);
if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
- return N = DAG.getConstant(*CI, VT);
+ return DAG.getConstant(*CI, VT);
if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
- return N = DAG.getGlobalAddress(GV, VT);
+ return DAG.getGlobalAddress(GV, getCurDebugLoc(), VT);
if (isa<ConstantPointerNull>(C))
- return N = DAG.getConstant(0, TLI.getPointerTy());
+ return DAG.getConstant(0, TLI.getPointerTy());
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
- return N = DAG.getConstantFP(*CFP, VT);
+ return DAG.getConstantFP(*CFP, VT);
if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
- return N = DAG.getUNDEF(VT);
+ return DAG.getUNDEF(VT);
if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
visit(CE->getOpcode(), *CE);
@@ -757,82 +949,25 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
return DAG.getFrameIndex(SI->second, TLI.getPointerTy());
}
- unsigned InReg = FuncInfo.ValueMap[V];
- assert(InReg && "Value not in map!");
-
- RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
- SDValue Chain = DAG.getEntryNode();
- return RFV.getCopyFromRegs(DAG, getCurDebugLoc(), Chain, NULL);
-}
-
-/// Get the EVTs and ArgFlags collections that represent the legalized return
-/// type of the given function. This does not require a DAG or a return value,
-/// and is suitable for use before any DAGs for the function are constructed.
-static void getReturnInfo(const Type* ReturnType,
- Attributes attr, SmallVectorImpl<EVT> &OutVTs,
- SmallVectorImpl<ISD::ArgFlagsTy> &OutFlags,
- const TargetLowering &TLI,
- SmallVectorImpl<uint64_t> *Offsets = 0) {
- SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(TLI, ReturnType, ValueVTs);
- unsigned NumValues = ValueVTs.size();
- if (NumValues == 0) return;
- unsigned Offset = 0;
-
- for (unsigned j = 0, f = NumValues; j != f; ++j) {
- EVT VT = ValueVTs[j];
- ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
-
- if (attr & Attribute::SExt)
- ExtendKind = ISD::SIGN_EXTEND;
- else if (attr & Attribute::ZExt)
- ExtendKind = ISD::ZERO_EXTEND;
-
- // FIXME: C calling convention requires the return type to be promoted to
- // at least 32-bit. But this is not necessary for non-C calling
- // conventions. The frontend should mark functions whose return values
- // require promoting with signext or zeroext attributes.
- if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
- EVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32);
- if (VT.bitsLT(MinVT))
- VT = MinVT;
- }
-
- unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT);
- EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT);
- unsigned PartSize = TLI.getTargetData()->getTypeAllocSize(
- PartVT.getTypeForEVT(ReturnType->getContext()));
-
- // 'inreg' on function refers to return value
- ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
- if (attr & Attribute::InReg)
- Flags.setInReg();
-
- // Propagate extension type if any
- if (attr & Attribute::SExt)
- Flags.setSExt();
- else if (attr & Attribute::ZExt)
- Flags.setZExt();
-
- for (unsigned i = 0; i < NumParts; ++i) {
- OutVTs.push_back(PartVT);
- OutFlags.push_back(Flags);
- if (Offsets)
- {
- Offsets->push_back(Offset);
- Offset += PartSize;
- }
- }
+ // If this is an instruction which fast-isel has deferred, select it now.
+ if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
+ unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
+ RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType());
+ SDValue Chain = DAG.getEntryNode();
+ return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL);
}
+
+ llvm_unreachable("Can't get register for value!");
+ return SDValue();
}
void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
SDValue Chain = getControlRoot();
SmallVector<ISD::OutputArg, 8> Outs;
- FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
+ SmallVector<SDValue, 8> OutVals;
- if (!FLI.CanLowerReturn) {
- unsigned DemoteReg = FLI.DemoteRegister;
+ if (!FuncInfo.CanLowerReturn) {
+ unsigned DemoteReg = FuncInfo.DemoteRegister;
const Function *F = I.getParent()->getParent();
// Emit a store of the return value through the virtual register.
@@ -908,8 +1043,11 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
else if (F->paramHasAttr(0, Attribute::ZExt))
Flags.setZExt();
- for (unsigned i = 0; i < NumParts; ++i)
- Outs.push_back(ISD::OutputArg(Flags, Parts[i], /*isfixed=*/true));
+ for (unsigned i = 0; i < NumParts; ++i) {
+ Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
+ /*isfixed=*/true));
+ OutVals.push_back(Parts[i]);
+ }
}
}
}
@@ -918,7 +1056,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
CallingConv::ID CallConv =
DAG.getMachineFunction().getFunction()->getCallingConv();
Chain = TLI.LowerReturn(Chain, CallConv, isVarArg,
- Outs, getCurDebugLoc(), DAG);
+ Outs, OutVals, getCurDebugLoc(), DAG);
// Verify that the target's LowerReturn behaved as expected.
assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
@@ -1119,7 +1257,7 @@ SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){
}
void SelectionDAGBuilder::visitBr(const BranchInst &I) {
- MachineBasicBlock *BrMBB = FuncInfo.MBBMap[I.getParent()];
+ MachineBasicBlock *BrMBB = FuncInfo.MBB;
// Update machine-CFG edges.
MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
@@ -1269,18 +1407,10 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
MVT::Other, getControlRoot(), Cond,
DAG.getBasicBlock(CB.TrueBB));
- // If the branch was constant folded, fix up the CFG.
- if (BrCond.getOpcode() == ISD::BR) {
- SwitchBB->removeSuccessor(CB.FalseBB);
- } else {
- // Otherwise, go ahead and insert the false branch.
- if (BrCond == getControlRoot())
- SwitchBB->removeSuccessor(CB.TrueBB);
-
- if (CB.FalseBB != NextBlock)
- BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
- DAG.getBasicBlock(CB.FalseBB));
- }
+ // Insert the false branch.
+ if (CB.FalseBB != NextBlock)
+ BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
+ DAG.getBasicBlock(CB.FalseBB));
DAG.setRoot(BrCond);
}
@@ -1319,7 +1449,7 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
// therefore require extension or truncating.
SwitchOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), TLI.getPointerTy());
- unsigned JumpTableReg = FuncInfo.MakeReg(TLI.getPointerTy());
+ unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy());
SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
JumpTableReg, SwitchOp);
JT.Reg = JumpTableReg;
@@ -1370,7 +1500,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
SDValue ShiftOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(),
TLI.getPointerTy());
- B.Reg = FuncInfo.MakeReg(TLI.getPointerTy());
+ B.Reg = FuncInfo.CreateReg(TLI.getPointerTy());
SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
B.Reg, ShiftOp);
@@ -1402,29 +1532,41 @@ void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB,
unsigned Reg,
BitTestCase &B,
MachineBasicBlock *SwitchBB) {
- // Make desired shift
SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg,
TLI.getPointerTy());
- SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(),
- TLI.getPointerTy(),
- DAG.getConstant(1, TLI.getPointerTy()),
- ShiftOp);
-
- // Emit bit tests and jumps
- SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(),
- TLI.getPointerTy(), SwitchVal,
- DAG.getConstant(B.Mask, TLI.getPointerTy()));
- SDValue AndCmp = DAG.getSetCC(getCurDebugLoc(),
- TLI.getSetCCResultType(AndOp.getValueType()),
- AndOp, DAG.getConstant(0, TLI.getPointerTy()),
- ISD::SETNE);
+ SDValue Cmp;
+ if (CountPopulation_64(B.Mask) == 1) {
+ // Testing for a single bit; just compare the shift count with what it
+ // would need to be to shift a 1 bit in that position.
+ Cmp = DAG.getSetCC(getCurDebugLoc(),
+ TLI.getSetCCResultType(ShiftOp.getValueType()),
+ ShiftOp,
+ DAG.getConstant(CountTrailingZeros_64(B.Mask),
+ TLI.getPointerTy()),
+ ISD::SETEQ);
+ } else {
+ // Make desired shift
+ SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(),
+ TLI.getPointerTy(),
+ DAG.getConstant(1, TLI.getPointerTy()),
+ ShiftOp);
+
+ // Emit bit tests and jumps
+ SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(),
+ TLI.getPointerTy(), SwitchVal,
+ DAG.getConstant(B.Mask, TLI.getPointerTy()));
+ Cmp = DAG.getSetCC(getCurDebugLoc(),
+ TLI.getSetCCResultType(AndOp.getValueType()),
+ AndOp, DAG.getConstant(0, TLI.getPointerTy()),
+ ISD::SETNE);
+ }
SwitchBB->addSuccessor(B.TargetBB);
SwitchBB->addSuccessor(NextMBB);
SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
MVT::Other, getControlRoot(),
- AndCmp, DAG.getBasicBlock(B.TargetBB));
+ Cmp, DAG.getBasicBlock(B.TargetBB));
// Set NextBlock to be the MBB immediately after the current one, if any.
// This is used to avoid emitting unnecessary branches to the next block.
@@ -1441,7 +1583,7 @@ void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB,
}
void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
- MachineBasicBlock *InvokeMBB = FuncInfo.MBBMap[I.getParent()];
+ MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
// Retrieve successors.
MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
@@ -1969,7 +2111,7 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
}
void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
- MachineBasicBlock *SwitchMBB = FuncInfo.MBBMap[SI.getParent()];
+ MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
// Figure out which block is immediately after the current one.
MachineBasicBlock *NextBlock = 0;
@@ -2035,7 +2177,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
}
void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
- MachineBasicBlock *IndirectBrMBB = FuncInfo.MBBMap[I.getParent()];
+ MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
// Update machine-CFG edges with unique successors.
SmallVector<BasicBlock*, 32> succs;
@@ -2245,7 +2387,6 @@ void SelectionDAGBuilder::visitPtrToInt(const User &I) {
// What to do depends on the size of the integer and the size of the pointer.
// We can either truncate, zero extend, or no-op, accordingly.
SDValue N = getValue(I.getOperand(0));
- EVT SrcVT = N.getValueType();
EVT DestVT = TLI.getValueType(I.getType());
setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
}
@@ -2254,7 +2395,6 @@ void SelectionDAGBuilder::visitIntToPtr(const User &I) {
// What to do depends on the size of the integer and the size of the pointer.
// We can either truncate, zero extend, or no-op, accordingly.
SDValue N = getValue(I.getOperand(0));
- EVT SrcVT = N.getValueType();
EVT DestVT = TLI.getValueType(I.getType());
setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
}
@@ -2579,7 +2719,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
// If this is a constant subscript, handle it quickly.
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
- if (CI->getZExtValue() == 0) continue;
+ if (CI->isZero()) continue;
uint64_t Offs =
TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
SDValue OffsVal;
@@ -2643,12 +2783,13 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
SDValue AllocSize = getValue(I.getArraySize());
- AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), AllocSize.getValueType(),
- AllocSize,
- DAG.getConstant(TySize, AllocSize.getValueType()));
-
EVT IntPtr = TLI.getPointerTy();
- AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr);
+ if (AllocSize.getValueType() != IntPtr)
+ AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr);
+
+ AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), IntPtr,
+ AllocSize,
+ DAG.getConstant(TySize, IntPtr));
// Handle alignment. If the requested alignment is less than or equal to
// the stack alignment, ignore it. If the size is greater than or equal to
@@ -2804,8 +2945,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy()));
// Add all operands of the call to the operand list.
- for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) {
- SDValue Op = getValue(I.getOperand(i));
+ for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+ SDValue Op = getValue(I.getArgOperand(i));
assert(TLI.isTypeLegal(Op.getValueType()) &&
"Intrinsic uses a non-legal type?");
Ops.push_back(Op);
@@ -2910,11 +3051,11 @@ SelectionDAGBuilder::implVisitBinaryAtomic(const CallInst& I,
SDValue Root = getRoot();
SDValue L =
DAG.getAtomic(Op, getCurDebugLoc(),
- getValue(I.getOperand(2)).getValueType().getSimpleVT(),
+ getValue(I.getArgOperand(1)).getValueType().getSimpleVT(),
Root,
- getValue(I.getOperand(1)),
- getValue(I.getOperand(2)),
- I.getOperand(1));
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ I.getArgOperand(0));
setValue(&I, L);
DAG.setRoot(L.getValue(1));
return 0;
@@ -2923,8 +3064,8 @@ SelectionDAGBuilder::implVisitBinaryAtomic(const CallInst& I,
// implVisitAluOverflow - Lower arithmetic overflow instrinsics.
const char *
SelectionDAGBuilder::implVisitAluOverflow(const CallInst &I, ISD::NodeType Op) {
- SDValue Op1 = getValue(I.getOperand(1));
- SDValue Op2 = getValue(I.getOperand(2));
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2));
@@ -2938,9 +3079,9 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {
SDValue result;
DebugLoc dl = getCurDebugLoc();
- if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
+ if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
- SDValue Op = getValue(I.getOperand(1));
+ SDValue Op = getValue(I.getArgOperand(0));
// Put the exponent in the right bit position for later addition to the
// final result:
@@ -3050,8 +3191,8 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {
} else {
// No special expansion.
result = DAG.getNode(ISD::FEXP, dl,
- getValue(I.getOperand(1)).getValueType(),
- getValue(I.getOperand(1)));
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)));
}
setValue(&I, result);
@@ -3064,9 +3205,9 @@ SelectionDAGBuilder::visitLog(const CallInst &I) {
SDValue result;
DebugLoc dl = getCurDebugLoc();
- if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
+ if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
- SDValue Op = getValue(I.getOperand(1));
+ SDValue Op = getValue(I.getArgOperand(0));
SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
// Scale the exponent by log(2) [0.69314718f].
@@ -3160,8 +3301,8 @@ SelectionDAGBuilder::visitLog(const CallInst &I) {
} else {
// No special expansion.
result = DAG.getNode(ISD::FLOG, dl,
- getValue(I.getOperand(1)).getValueType(),
- getValue(I.getOperand(1)));
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)));
}
setValue(&I, result);
@@ -3174,9 +3315,9 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) {
SDValue result;
DebugLoc dl = getCurDebugLoc();
- if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
+ if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
- SDValue Op = getValue(I.getOperand(1));
+ SDValue Op = getValue(I.getArgOperand(0));
SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
// Get the exponent.
@@ -3269,8 +3410,8 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) {
} else {
// No special expansion.
result = DAG.getNode(ISD::FLOG2, dl,
- getValue(I.getOperand(1)).getValueType(),
- getValue(I.getOperand(1)));
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)));
}
setValue(&I, result);
@@ -3283,9 +3424,9 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) {
SDValue result;
DebugLoc dl = getCurDebugLoc();
- if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
+ if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
- SDValue Op = getValue(I.getOperand(1));
+ SDValue Op = getValue(I.getArgOperand(0));
SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
// Scale the exponent by log10(2) [0.30102999f].
@@ -3371,8 +3512,8 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) {
} else {
// No special expansion.
result = DAG.getNode(ISD::FLOG10, dl,
- getValue(I.getOperand(1)).getValueType(),
- getValue(I.getOperand(1)));
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)));
}
setValue(&I, result);
@@ -3385,9 +3526,9 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {
SDValue result;
DebugLoc dl = getCurDebugLoc();
- if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
+ if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
- SDValue Op = getValue(I.getOperand(1));
+ SDValue Op = getValue(I.getArgOperand(0));
SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op);
@@ -3485,8 +3626,8 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {
} else {
// No special expansion.
result = DAG.getNode(ISD::FEXP2, dl,
- getValue(I.getOperand(1)).getValueType(),
- getValue(I.getOperand(1)));
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)));
}
setValue(&I, result);
@@ -3497,12 +3638,12 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {
void
SelectionDAGBuilder::visitPow(const CallInst &I) {
SDValue result;
- const Value *Val = I.getOperand(1);
+ const Value *Val = I.getArgOperand(0);
DebugLoc dl = getCurDebugLoc();
bool IsExp10 = false;
if (getValue(Val).getValueType() == MVT::f32 &&
- getValue(I.getOperand(2)).getValueType() == MVT::f32 &&
+ getValue(I.getArgOperand(1)).getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(Val))) {
if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
@@ -3513,7 +3654,7 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {
}
if (IsExp10 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
- SDValue Op = getValue(I.getOperand(2));
+ SDValue Op = getValue(I.getArgOperand(1));
// Put the exponent in the right bit position for later addition to the
// final result:
@@ -3618,9 +3759,9 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {
} else {
// No special expansion.
result = DAG.getNode(ISD::FPOW, dl,
- getValue(I.getOperand(1)).getValueType(),
- getValue(I.getOperand(1)),
- getValue(I.getOperand(2)));
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)));
}
setValue(&I, result);
@@ -3696,7 +3837,7 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const DbgValueInst &DI,
if (DV.isInlinedFnArgument(MF.getFunction()))
return false;
- MachineBasicBlock *MBB = FuncInfo.MBBMap[DI.getParent()];
+ MachineBasicBlock *MBB = FuncInfo.MBB;
if (MBB != &MF.front())
return false;
@@ -3750,11 +3891,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::vacopy: visitVACopy(I); return 0;
case Intrinsic::returnaddress:
setValue(&I, DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(),
- getValue(I.getOperand(1))));
+ getValue(I.getArgOperand(0))));
return 0;
case Intrinsic::frameaddress:
setValue(&I, DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(),
- getValue(I.getOperand(1))));
+ getValue(I.getArgOperand(0))));
return 0;
case Intrinsic::setjmp:
return "_setjmp"+!TLI.usesUnderscoreSetJmp();
@@ -3763,63 +3904,63 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::memcpy: {
// Assert for address < 256 since we support only user defined address
// spaces.
- assert(cast<PointerType>(I.getOperand(1)->getType())->getAddressSpace()
+ assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
< 256 &&
- cast<PointerType>(I.getOperand(2)->getType())->getAddressSpace()
+ cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace()
< 256 &&
"Unknown address space");
- SDValue Op1 = getValue(I.getOperand(1));
- SDValue Op2 = getValue(I.getOperand(2));
- SDValue Op3 = getValue(I.getOperand(3));
- unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
- bool isVol = cast<ConstantInt>(I.getOperand(5))->getZExtValue();
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+ bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false,
- I.getOperand(1), 0, I.getOperand(2), 0));
+ I.getArgOperand(0), 0, I.getArgOperand(1), 0));
return 0;
}
case Intrinsic::memset: {
// Assert for address < 256 since we support only user defined address
// spaces.
- assert(cast<PointerType>(I.getOperand(1)->getType())->getAddressSpace()
+ assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
< 256 &&
"Unknown address space");
- SDValue Op1 = getValue(I.getOperand(1));
- SDValue Op2 = getValue(I.getOperand(2));
- SDValue Op3 = getValue(I.getOperand(3));
- unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
- bool isVol = cast<ConstantInt>(I.getOperand(5))->getZExtValue();
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+ bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
- I.getOperand(1), 0));
+ I.getArgOperand(0), 0));
return 0;
}
case Intrinsic::memmove: {
// Assert for address < 256 since we support only user defined address
// spaces.
- assert(cast<PointerType>(I.getOperand(1)->getType())->getAddressSpace()
+ assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
< 256 &&
- cast<PointerType>(I.getOperand(2)->getType())->getAddressSpace()
+ cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace()
< 256 &&
"Unknown address space");
- SDValue Op1 = getValue(I.getOperand(1));
- SDValue Op2 = getValue(I.getOperand(2));
- SDValue Op3 = getValue(I.getOperand(3));
- unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
- bool isVol = cast<ConstantInt>(I.getOperand(5))->getZExtValue();
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+ bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
// If the source and destination are known to not be aliases, we can
// lower memmove as memcpy.
uint64_t Size = -1ULL;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op3))
Size = C->getZExtValue();
- if (AA->alias(I.getOperand(1), Size, I.getOperand(2), Size) ==
+ if (AA->alias(I.getArgOperand(0), Size, I.getArgOperand(1), Size) ==
AliasAnalysis::NoAlias) {
DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
- false, I.getOperand(1), 0, I.getOperand(2), 0));
+ false, I.getArgOperand(0), 0, I.getArgOperand(1), 0));
return 0;
}
DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
- I.getOperand(1), 0, I.getOperand(2), 0));
+ I.getArgOperand(0), 0, I.getArgOperand(1), 0));
return 0;
}
case Intrinsic::dbg_declare: {
@@ -3908,7 +4049,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
} else {
bool createUndef = false;
// FIXME : Why not use getValue() directly ?
- SDValue &N = NodeMap[V];
+ SDValue N = NodeMap[V];
+ if (!N.getNode() && isa<Argument>(V))
+ // Check unused arguments map.
+ N = UnusedArgNodeMap[V];
if (N.getNode()) {
if (!EmitFuncArgumentDbgValue(DI, V, Variable, Offset, N)) {
SDV = DAG.getDbgValue(Variable, N.getNode(),
@@ -3956,7 +4100,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::eh_exception: {
// Insert the EXCEPTIONADDR instruction.
- assert(FuncInfo.MBBMap[I.getParent()]->isLandingPad() &&
+ assert(FuncInfo.MBB->isLandingPad() &&
"Call to eh.exception not in landing pad!");
SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
SDValue Ops[1];
@@ -3968,7 +4112,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::eh_selector: {
- MachineBasicBlock *CallMBB = FuncInfo.MBBMap[I.getParent()];
+ MachineBasicBlock *CallMBB = FuncInfo.MBB;
MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
if (CallMBB->isLandingPad())
AddCatchInfo(I, &MMI, CallMBB);
@@ -3978,13 +4122,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
#endif
// FIXME: Mark exception selector register as live in. Hack for PR1508.
unsigned Reg = TLI.getExceptionSelectorRegister();
- if (Reg) FuncInfo.MBBMap[I.getParent()]->addLiveIn(Reg);
+ if (Reg) FuncInfo.MBB->addLiveIn(Reg);
}
// Insert the EHSELECTION instruction.
SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
SDValue Ops[2];
- Ops[0] = getValue(I.getOperand(1));
+ Ops[0] = getValue(I.getArgOperand(0));
Ops[1] = getRoot();
SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2);
DAG.setRoot(Op.getValue(1));
@@ -3994,7 +4138,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::eh_typeid_for: {
// Find the type id for the given typeinfo.
- GlobalVariable *GV = ExtractTypeInfo(I.getOperand(1));
+ GlobalVariable *GV = ExtractTypeInfo(I.getArgOperand(0));
unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV);
Res = DAG.getConstant(TypeID, MVT::i32);
setValue(&I, Res);
@@ -4007,15 +4151,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl,
MVT::Other,
getControlRoot(),
- getValue(I.getOperand(1)),
- getValue(I.getOperand(2))));
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1))));
return 0;
case Intrinsic::eh_unwind_init:
DAG.getMachineFunction().getMMI().setCallsUnwindInit(true);
return 0;
case Intrinsic::eh_dwarf_cfa: {
- EVT VT = getValue(I.getOperand(1)).getValueType();
- SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), dl,
+ SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), dl,
TLI.getPointerTy());
SDValue Offset = DAG.getNode(ISD::ADD, dl,
TLI.getPointerTy(),
@@ -4031,7 +4174,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::eh_sjlj_callsite: {
MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
- ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1));
+ ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0));
assert(CI && "Non-constant call site value in eh.sjlj.callsite!");
assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!");
@@ -4040,13 +4183,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::eh_sjlj_setjmp: {
setValue(&I, DAG.getNode(ISD::EH_SJLJ_SETJMP, dl, MVT::i32, getRoot(),
- getValue(I.getOperand(1))));
+ getValue(I.getArgOperand(0))));
return 0;
}
case Intrinsic::eh_sjlj_longjmp: {
DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other,
getRoot(),
- getValue(I.getOperand(1))));
+ getValue(I.getArgOperand(0))));
return 0;
}
@@ -4072,34 +4215,34 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::convertuu: Code = ISD::CVT_UU; break;
}
EVT DestVT = TLI.getValueType(I.getType());
- const Value *Op1 = I.getOperand(1);
+ const Value *Op1 = I.getArgOperand(0);
Res = DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1),
DAG.getValueType(DestVT),
DAG.getValueType(getValue(Op1).getValueType()),
- getValue(I.getOperand(2)),
- getValue(I.getOperand(3)),
+ getValue(I.getArgOperand(1)),
+ getValue(I.getArgOperand(2)),
Code);
setValue(&I, Res);
return 0;
}
case Intrinsic::sqrt:
setValue(&I, DAG.getNode(ISD::FSQRT, dl,
- getValue(I.getOperand(1)).getValueType(),
- getValue(I.getOperand(1))));
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0))));
return 0;
case Intrinsic::powi:
- setValue(&I, ExpandPowI(dl, getValue(I.getOperand(1)),
- getValue(I.getOperand(2)), DAG));
+ setValue(&I, ExpandPowI(dl, getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)), DAG));
return 0;
case Intrinsic::sin:
setValue(&I, DAG.getNode(ISD::FSIN, dl,
- getValue(I.getOperand(1)).getValueType(),
- getValue(I.getOperand(1))));
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0))));
return 0;
case Intrinsic::cos:
setValue(&I, DAG.getNode(ISD::FCOS, dl,
- getValue(I.getOperand(1)).getValueType(),
- getValue(I.getOperand(1))));
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0))));
return 0;
case Intrinsic::log:
visitLog(I);
@@ -4121,14 +4264,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return 0;
case Intrinsic::convert_to_fp16:
setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl,
- MVT::i16, getValue(I.getOperand(1))));
+ MVT::i16, getValue(I.getArgOperand(0))));
return 0;
case Intrinsic::convert_from_fp16:
setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, dl,
- MVT::f32, getValue(I.getOperand(1))));
+ MVT::f32, getValue(I.getArgOperand(0))));
return 0;
case Intrinsic::pcmarker: {
- SDValue Tmp = getValue(I.getOperand(1));
+ SDValue Tmp = getValue(I.getArgOperand(0));
DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp));
return 0;
}
@@ -4143,23 +4286,23 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::bswap:
setValue(&I, DAG.getNode(ISD::BSWAP, dl,
- getValue(I.getOperand(1)).getValueType(),
- getValue(I.getOperand(1))));
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0))));
return 0;
case Intrinsic::cttz: {
- SDValue Arg = getValue(I.getOperand(1));
+ SDValue Arg = getValue(I.getArgOperand(0));
EVT Ty = Arg.getValueType();
setValue(&I, DAG.getNode(ISD::CTTZ, dl, Ty, Arg));
return 0;
}
case Intrinsic::ctlz: {
- SDValue Arg = getValue(I.getOperand(1));
+ SDValue Arg = getValue(I.getArgOperand(0));
EVT Ty = Arg.getValueType();
setValue(&I, DAG.getNode(ISD::CTLZ, dl, Ty, Arg));
return 0;
}
case Intrinsic::ctpop: {
- SDValue Arg = getValue(I.getOperand(1));
+ SDValue Arg = getValue(I.getArgOperand(0));
EVT Ty = Arg.getValueType();
setValue(&I, DAG.getNode(ISD::CTPOP, dl, Ty, Arg));
return 0;
@@ -4173,7 +4316,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return 0;
}
case Intrinsic::stackrestore: {
- Res = getValue(I.getOperand(1));
+ Res = getValue(I.getArgOperand(0));
DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Res));
return 0;
}
@@ -4183,8 +4326,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
MachineFrameInfo *MFI = MF.getFrameInfo();
EVT PtrTy = TLI.getPointerTy();
- SDValue Src = getValue(I.getOperand(1)); // The guard's value.
- AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2));
+ SDValue Src = getValue(I.getArgOperand(0)); // The guard's value.
+ AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
int FI = FuncInfo.StaticAllocaMap[Slot];
MFI->setStackProtectorIndex(FI);
@@ -4201,14 +4344,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::objectsize: {
// If we don't know by now, we're never going to know.
- ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(2));
+ ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1));
assert(CI && "Non-constant type in __builtin_object_size?");
- SDValue Arg = getValue(I.getOperand(0));
+ SDValue Arg = getValue(I.getCalledValue());
EVT Ty = Arg.getValueType();
- if (CI->getZExtValue() == 0)
+ if (CI->isZero())
Res = DAG.getConstant(-1ULL, Ty);
else
Res = DAG.getConstant(0, Ty);
@@ -4221,14 +4364,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return 0;
case Intrinsic::init_trampoline: {
- const Function *F = cast<Function>(I.getOperand(2)->stripPointerCasts());
+ const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts());
SDValue Ops[6];
Ops[0] = getRoot();
- Ops[1] = getValue(I.getOperand(1));
- Ops[2] = getValue(I.getOperand(2));
- Ops[3] = getValue(I.getOperand(3));
- Ops[4] = DAG.getSrcValue(I.getOperand(1));
+ Ops[1] = getValue(I.getArgOperand(0));
+ Ops[2] = getValue(I.getArgOperand(1));
+ Ops[3] = getValue(I.getArgOperand(2));
+ Ops[4] = DAG.getSrcValue(I.getArgOperand(0));
Ops[5] = DAG.getSrcValue(F);
Res = DAG.getNode(ISD::TRAMPOLINE, dl,
@@ -4241,8 +4384,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::gcroot:
if (GFI) {
- const Value *Alloca = I.getOperand(1);
- const Constant *TypeMap = cast<Constant>(I.getOperand(2));
+ const Value *Alloca = I.getArgOperand(0);
+ const Constant *TypeMap = cast<Constant>(I.getArgOperand(1));
FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
GFI->addStackRoot(FI->getIndex(), TypeMap);
@@ -4274,9 +4417,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::prefetch: {
SDValue Ops[4];
Ops[0] = getRoot();
- Ops[1] = getValue(I.getOperand(1));
- Ops[2] = getValue(I.getOperand(2));
- Ops[3] = getValue(I.getOperand(3));
+ Ops[1] = getValue(I.getArgOperand(0));
+ Ops[2] = getValue(I.getArgOperand(1));
+ Ops[3] = getValue(I.getArgOperand(2));
DAG.setRoot(DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4));
return 0;
}
@@ -4285,7 +4428,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue Ops[6];
Ops[0] = getRoot();
for (int x = 1; x < 6; ++x)
- Ops[x] = getValue(I.getOperand(x));
+ Ops[x] = getValue(I.getArgOperand(x - 1));
DAG.setRoot(DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, &Ops[0], 6));
return 0;
@@ -4294,12 +4437,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue Root = getRoot();
SDValue L =
DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, getCurDebugLoc(),
- getValue(I.getOperand(2)).getValueType().getSimpleVT(),
+ getValue(I.getArgOperand(1)).getValueType().getSimpleVT(),
Root,
- getValue(I.getOperand(1)),
- getValue(I.getOperand(2)),
- getValue(I.getOperand(3)),
- I.getOperand(1));
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ getValue(I.getArgOperand(2)),
+ I.getArgOperand(0));
setValue(&I, L);
DAG.setRoot(L.getValue(1));
return 0;
@@ -4353,14 +4496,13 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
Args.reserve(CS.arg_size());
// Check whether the function can return without sret-demotion.
- SmallVector<EVT, 4> OutVTs;
- SmallVector<ISD::ArgFlagsTy, 4> OutsFlags;
+ SmallVector<ISD::OutputArg, 4> Outs;
SmallVector<uint64_t, 4> Offsets;
- getReturnInfo(RetTy, CS.getAttributes().getRetAttributes(),
- OutVTs, OutsFlags, TLI, &Offsets);
+ GetReturnInfo(RetTy, CS.getAttributes().getRetAttributes(),
+ Outs, TLI, &Offsets);
bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
- FTy->isVarArg(), OutVTs, OutsFlags, DAG);
+ FTy->isVarArg(), Outs, FTy->getContext());
SDValue DemoteStackSlot;
@@ -4453,7 +4595,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
ComputeValueVTs(TLI, PtrRetTy, PVTs);
assert(PVTs.size() == 1 && "Pointers should fit in one register");
EVT PtrVT = PVTs[0];
- unsigned NumValues = OutVTs.size();
+ unsigned NumValues = Outs.size();
SmallVector<SDValue, 4> Values(NumValues);
SmallVector<SDValue, 4> Chains(NumValues);
@@ -4461,7 +4603,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT,
DemoteStackSlot,
DAG.getConstant(Offsets[i], PtrVT));
- SDValue L = DAG.getLoad(OutVTs[i], getCurDebugLoc(), Result.second,
+ SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second,
Add, NULL, Offsets[i], false, false, 1);
Values[i] = L;
Chains[i] = L.getValue(1);
@@ -4580,16 +4722,16 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
/// lowered like a normal call.
bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
// Verify that the prototype makes sense. int memcmp(void*,void*,size_t)
- if (I.getNumOperands() != 4)
+ if (I.getNumArgOperands() != 3)
return false;
- const Value *LHS = I.getOperand(1), *RHS = I.getOperand(2);
+ const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1);
if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() ||
- !I.getOperand(3)->getType()->isIntegerTy() ||
+ !I.getArgOperand(2)->getType()->isIntegerTy() ||
!I.getType()->isIntegerTy())
return false;
- const ConstantInt *Size = dyn_cast<ConstantInt>(I.getOperand(3));
+ const ConstantInt *Size = dyn_cast<ConstantInt>(I.getArgOperand(2));
// memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0
// memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0
@@ -4656,11 +4798,16 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
void SelectionDAGBuilder::visitCall(const CallInst &I) {
+ // Handle inline assembly differently.
+ if (isa<InlineAsm>(I.getCalledValue())) {
+ visitInlineAsm(&I);
+ return;
+ }
+
const char *RenameFn = 0;
if (Function *F = I.getCalledFunction()) {
if (F->isDeclaration()) {
- const TargetIntrinsicInfo *II = TM.getIntrinsicInfo();
- if (II) {
+ if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) {
if (unsigned IID = II->getIntrinsicID(F)) {
RenameFn = visitIntrinsicCall(I, IID);
if (!RenameFn)
@@ -4679,51 +4826,51 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
if (!F->hasLocalLinkage() && F->hasName()) {
StringRef Name = F->getName();
if (Name == "copysign" || Name == "copysignf" || Name == "copysignl") {
- if (I.getNumOperands() == 3 && // Basic sanity checks.
- I.getOperand(1)->getType()->isFloatingPointTy() &&
- I.getType() == I.getOperand(1)->getType() &&
- I.getType() == I.getOperand(2)->getType()) {
- SDValue LHS = getValue(I.getOperand(1));
- SDValue RHS = getValue(I.getOperand(2));
+ if (I.getNumArgOperands() == 2 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType() &&
+ I.getType() == I.getArgOperand(1)->getType()) {
+ SDValue LHS = getValue(I.getArgOperand(0));
+ SDValue RHS = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(),
LHS.getValueType(), LHS, RHS));
return;
}
} else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") {
- if (I.getNumOperands() == 2 && // Basic sanity checks.
- I.getOperand(1)->getType()->isFloatingPointTy() &&
- I.getType() == I.getOperand(1)->getType()) {
- SDValue Tmp = getValue(I.getOperand(1));
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType()) {
+ SDValue Tmp = getValue(I.getArgOperand(0));
setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(),
Tmp.getValueType(), Tmp));
return;
}
} else if (Name == "sin" || Name == "sinf" || Name == "sinl") {
- if (I.getNumOperands() == 2 && // Basic sanity checks.
- I.getOperand(1)->getType()->isFloatingPointTy() &&
- I.getType() == I.getOperand(1)->getType() &&
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType() &&
I.onlyReadsMemory()) {
- SDValue Tmp = getValue(I.getOperand(1));
+ SDValue Tmp = getValue(I.getArgOperand(0));
setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(),
Tmp.getValueType(), Tmp));
return;
}
} else if (Name == "cos" || Name == "cosf" || Name == "cosl") {
- if (I.getNumOperands() == 2 && // Basic sanity checks.
- I.getOperand(1)->getType()->isFloatingPointTy() &&
- I.getType() == I.getOperand(1)->getType() &&
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType() &&
I.onlyReadsMemory()) {
- SDValue Tmp = getValue(I.getOperand(1));
+ SDValue Tmp = getValue(I.getArgOperand(0));
setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(),
Tmp.getValueType(), Tmp));
return;
}
} else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") {
- if (I.getNumOperands() == 2 && // Basic sanity checks.
- I.getOperand(1)->getType()->isFloatingPointTy() &&
- I.getType() == I.getOperand(1)->getType() &&
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType() &&
I.onlyReadsMemory()) {
- SDValue Tmp = getValue(I.getOperand(1));
+ SDValue Tmp = getValue(I.getArgOperand(0));
setValue(&I, DAG.getNode(ISD::FSQRT, getCurDebugLoc(),
Tmp.getValueType(), Tmp));
return;
@@ -4733,14 +4880,11 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
return;
}
}
- } else if (isa<InlineAsm>(I.getOperand(0))) {
- visitInlineAsm(&I);
- return;
}
-
+
SDValue Callee;
if (!RenameFn)
- Callee = getValue(I.getOperand(0));
+ Callee = getValue(I.getCalledValue());
else
Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy());
@@ -4749,210 +4893,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
LowerCallTo(&I, Callee, I.isTailCall());
}
-/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
-/// this value and returns the result as a ValueVT value. This uses
-/// Chain/Flag as the input and updates them for the output Chain/Flag.
-/// If the Flag pointer is NULL, no flag is used.
-SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
- SDValue &Chain, SDValue *Flag) const {
- // Assemble the legal parts into the final values.
- SmallVector<SDValue, 4> Values(ValueVTs.size());
- SmallVector<SDValue, 8> Parts;
- for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
- // Copy the legal parts from the registers.
- EVT ValueVT = ValueVTs[Value];
- unsigned NumRegs = TLI->getNumRegisters(*DAG.getContext(), ValueVT);
- EVT RegisterVT = RegVTs[Value];
-
- Parts.resize(NumRegs);
- for (unsigned i = 0; i != NumRegs; ++i) {
- SDValue P;
- if (Flag == 0) {
- P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
- } else {
- P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
- *Flag = P.getValue(2);
- }
-
- Chain = P.getValue(1);
-
- // If the source register was virtual and if we know something about it,
- // add an assert node.
- if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) &&
- RegisterVT.isInteger() && !RegisterVT.isVector()) {
- unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister;
- FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
- if (FLI.LiveOutRegInfo.size() > SlotNo) {
- FunctionLoweringInfo::LiveOutInfo &LOI = FLI.LiveOutRegInfo[SlotNo];
-
- unsigned RegSize = RegisterVT.getSizeInBits();
- unsigned NumSignBits = LOI.NumSignBits;
- unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes();
-
- // FIXME: We capture more information than the dag can represent. For
- // now, just use the tightest assertzext/assertsext possible.
- bool isSExt = true;
- EVT FromVT(MVT::Other);
- if (NumSignBits == RegSize)
- isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1
- else if (NumZeroBits >= RegSize-1)
- isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1
- else if (NumSignBits > RegSize-8)
- isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8
- else if (NumZeroBits >= RegSize-8)
- isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8
- else if (NumSignBits > RegSize-16)
- isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16
- else if (NumZeroBits >= RegSize-16)
- isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16
- else if (NumSignBits > RegSize-32)
- isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32
- else if (NumZeroBits >= RegSize-32)
- isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
-
- if (FromVT != MVT::Other)
- P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
- RegisterVT, P, DAG.getValueType(FromVT));
- }
- }
-
- Parts[i] = P;
- }
-
- Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
- NumRegs, RegisterVT, ValueVT);
- Part += NumRegs;
- Parts.clear();
- }
-
- return DAG.getNode(ISD::MERGE_VALUES, dl,
- DAG.getVTList(&ValueVTs[0], ValueVTs.size()),
- &Values[0], ValueVTs.size());
-}
-
-/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
-/// specified value into the registers specified by this object. This uses
-/// Chain/Flag as the input and updates them for the output Chain/Flag.
-/// If the Flag pointer is NULL, no flag is used.
-void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
- SDValue &Chain, SDValue *Flag) const {
- // Get the list of the values's legal parts.
- unsigned NumRegs = Regs.size();
- SmallVector<SDValue, 8> Parts(NumRegs);
- for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
- EVT ValueVT = ValueVTs[Value];
- unsigned NumParts = TLI->getNumRegisters(*DAG.getContext(), ValueVT);
- EVT RegisterVT = RegVTs[Value];
-
- getCopyToParts(DAG, dl,
- Val.getValue(Val.getResNo() + Value),
- &Parts[Part], NumParts, RegisterVT);
- Part += NumParts;
- }
-
- // Copy the parts into the registers.
- SmallVector<SDValue, 8> Chains(NumRegs);
- for (unsigned i = 0; i != NumRegs; ++i) {
- SDValue Part;
- if (Flag == 0) {
- Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
- } else {
- Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
- *Flag = Part.getValue(1);
- }
-
- Chains[i] = Part.getValue(0);
- }
-
- if (NumRegs == 1 || Flag)
- // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
- // flagged to it. That is the CopyToReg nodes and the user are considered
- // a single scheduling unit. If we create a TokenFactor and return it as
- // chain, then the TokenFactor is both a predecessor (operand) of the
- // user as well as a successor (the TF operands are flagged to the user).
- // c1, f1 = CopyToReg
- // c2, f2 = CopyToReg
- // c3 = TokenFactor c1, c2
- // ...
- // = op c3, ..., f2
- Chain = Chains[NumRegs-1];
- else
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs);
-}
-
-/// AddInlineAsmOperands - Add this value to the specified inlineasm node
-/// operand list. This adds the code marker and includes the number of
-/// values added into it.
-void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
- unsigned MatchingIdx,
- SelectionDAG &DAG,
- std::vector<SDValue> &Ops) const {
- unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
- if (HasMatching)
- Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
- SDValue Res = DAG.getTargetConstant(Flag, MVT::i32);
- Ops.push_back(Res);
-
- for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
- unsigned NumRegs = TLI->getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
- EVT RegisterVT = RegVTs[Value];
- for (unsigned i = 0; i != NumRegs; ++i) {
- assert(Reg < Regs.size() && "Mismatch in # registers expected");
- Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT));
- }
- }
-}
-
-/// isAllocatableRegister - If the specified register is safe to allocate,
-/// i.e. it isn't a stack pointer or some other special register, return the
-/// register class for the register. Otherwise, return null.
-static const TargetRegisterClass *
-isAllocatableRegister(unsigned Reg, MachineFunction &MF,
- const TargetLowering &TLI,
- const TargetRegisterInfo *TRI) {
- EVT FoundVT = MVT::Other;
- const TargetRegisterClass *FoundRC = 0;
- for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(),
- E = TRI->regclass_end(); RCI != E; ++RCI) {
- EVT ThisVT = MVT::Other;
-
- const TargetRegisterClass *RC = *RCI;
- // If none of the value types for this register class are valid, we
- // can't use it. For example, 64-bit reg classes on 32-bit targets.
- for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
- I != E; ++I) {
- if (TLI.isTypeLegal(*I)) {
- // If we have already found this register in a different register class,
- // choose the one with the largest VT specified. For example, on
- // PowerPC, we favor f64 register classes over f32.
- if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) {
- ThisVT = *I;
- break;
- }
- }
- }
-
- if (ThisVT == MVT::Other) continue;
-
- // NOTE: This isn't ideal. In particular, this might allocate the
- // frame pointer in functions that need it (due to them not being taken
- // out of allocation, because a variable sized allocation hasn't been seen
- // yet). This is a slight code pessimization, but should still work.
- for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
- E = RC->allocation_order_end(MF); I != E; ++I)
- if (*I == Reg) {
- // We found a matching register class. Keep looking at others in case
- // we find one with larger registers that this physreg is also in.
- FoundRC = RC;
- FoundVT = ThisVT;
- break;
- }
- }
- return FoundRC;
-}
-
-
namespace llvm {
+
/// AsmOperandInfo - This contains information for each constraint that we are
/// lowering.
class LLVM_LIBRARY_VISIBILITY SDISelAsmOperandInfo :
@@ -5041,8 +4983,56 @@ private:
Regs.insert(*Aliases);
}
};
+
} // end llvm namespace.
+/// isAllocatableRegister - If the specified register is safe to allocate,
+/// i.e. it isn't a stack pointer or some other special register, return the
+/// register class for the register. Otherwise, return null.
+static const TargetRegisterClass *
+isAllocatableRegister(unsigned Reg, MachineFunction &MF,
+ const TargetLowering &TLI,
+ const TargetRegisterInfo *TRI) {
+ EVT FoundVT = MVT::Other;
+ const TargetRegisterClass *FoundRC = 0;
+ for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(),
+ E = TRI->regclass_end(); RCI != E; ++RCI) {
+ EVT ThisVT = MVT::Other;
+
+ const TargetRegisterClass *RC = *RCI;
+ // If none of the value types for this register class are valid, we
+ // can't use it. For example, 64-bit reg classes on 32-bit targets.
+ for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
+ I != E; ++I) {
+ if (TLI.isTypeLegal(*I)) {
+ // If we have already found this register in a different register class,
+ // choose the one with the largest VT specified. For example, on
+ // PowerPC, we favor f64 register classes over f32.
+ if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) {
+ ThisVT = *I;
+ break;
+ }
+ }
+ }
+
+ if (ThisVT == MVT::Other) continue;
+
+ // NOTE: This isn't ideal. In particular, this might allocate the
+ // frame pointer in functions that need it (due to them not being taken
+ // out of allocation, because a variable sized allocation hasn't been seen
+ // yet). This is a slight code pessimization, but should still work.
+ for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
+ E = RC->allocation_order_end(MF); I != E; ++I)
+ if (*I == Reg) {
+ // We found a matching register class. Keep looking at others in case
+ // we find one with larger registers that this physreg is also in.
+ FoundRC = RC;
+ FoundVT = ThisVT;
+ break;
+ }
+ }
+ return FoundRC;
+}
/// GetRegistersForValue - Assign registers (virtual or physical) for the
/// specified operand. We prefer to assign virtual registers, to allow the
@@ -5154,7 +5144,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
}
}
- OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT);
+ OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
return;
@@ -5172,7 +5162,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
for (; NumRegs; --NumRegs)
Regs.push_back(RegInfo.createVirtualRegister(RC));
- OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT);
+ OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
return;
}
@@ -5215,7 +5205,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
for (unsigned i = RegStart; i != RegEnd; ++i)
Regs.push_back(RegClassRegs[i]);
- OpInfo.AssignedRegs = RegsForValue(TLI, Regs, *RC->vt_begin(),
+ OpInfo.AssignedRegs = RegsForValue(Regs, *RC->vt_begin(),
OpInfo.ConstraintVT);
OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
return;
@@ -5332,7 +5322,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
}
// Compute the constraint code and ConstraintType to use.
- TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, hasMemory, &DAG);
+ TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG);
// If this is a memory input, and if the operand is not indirect, do what we
// need to to provide an address for the memory input.
@@ -5406,6 +5396,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc");
AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
+ // Remember the AlignStack bit as operand 3.
+ AsmNodeOperands.push_back(DAG.getTargetConstant(IA->isAlignStack() ? 1 : 0,
+ MVT::i1));
+
// Loop over all of the inputs, copying the operand values into the
// appropriate registers and processing the output regs.
RegsForValue RetValRegs;
@@ -5497,7 +5491,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
}
RegsForValue MatchedRegs;
- MatchedRegs.TLI = &TLI;
MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
EVT RegVT = AsmNodeOperands[CurOp+1].getValueType();
MatchedRegs.RegVTs.push_back(RegVT);
@@ -5535,7 +5528,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
std::vector<SDValue> Ops;
TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0],
- hasMemory, Ops, DAG);
+ Ops, DAG);
if (Ops.empty())
report_fatal_error("Invalid operand for inline asm constraint '" +
Twine(OpInfo.ConstraintCode) + "'!");
@@ -5570,7 +5563,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// Copy the input into the appropriate registers.
if (OpInfo.AssignedRegs.Regs.empty() ||
- !OpInfo.AssignedRegs.areValueTypesLegal())
+ !OpInfo.AssignedRegs.areValueTypesLegal(TLI))
report_fatal_error("Couldn't allocate input reg for constraint '" +
Twine(OpInfo.ConstraintCode) + "'!");
@@ -5595,7 +5588,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
}
// Finish up input operands. Set the input chain and add the flag last.
- AsmNodeOperands[0] = Chain;
+ AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(),
@@ -5606,7 +5599,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// If this asm returns a register value, copy the result from that register
// and set it as the value of the call.
if (!RetValRegs.Regs.empty()) {
- SDValue Val = RetValRegs.getCopyFromRegs(DAG, getCurDebugLoc(),
+ SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(),
Chain, &Flag);
// FIXME: Why don't we do this for inline asms with MRVs?
@@ -5646,7 +5639,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) {
RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
const Value *Ptr = IndirectStoresToEmit[i].second;
- SDValue OutVal = OutRegs.getCopyFromRegs(DAG, getCurDebugLoc(),
+ SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(),
Chain, &Flag);
StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
}
@@ -5672,14 +5665,16 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(),
MVT::Other, getRoot(),
- getValue(I.getOperand(1)),
- DAG.getSrcValue(I.getOperand(1))));
+ getValue(I.getArgOperand(0)),
+ DAG.getSrcValue(I.getArgOperand(0))));
}
void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
+ const TargetData &TD = *TLI.getTargetData();
SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(),
getRoot(), getValue(I.getOperand(0)),
- DAG.getSrcValue(I.getOperand(0)));
+ DAG.getSrcValue(I.getOperand(0)),
+ TD.getABITypeAlignment(I.getType()));
setValue(&I, V);
DAG.setRoot(V.getValue(1));
}
@@ -5687,17 +5682,17 @@ void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
void SelectionDAGBuilder::visitVAEnd(const CallInst &I) {
DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(),
MVT::Other, getRoot(),
- getValue(I.getOperand(1)),
- DAG.getSrcValue(I.getOperand(1))));
+ getValue(I.getArgOperand(0)),
+ DAG.getSrcValue(I.getArgOperand(0))));
}
void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(),
MVT::Other, getRoot(),
- getValue(I.getOperand(1)),
- getValue(I.getOperand(2)),
- DAG.getSrcValue(I.getOperand(1)),
- DAG.getSrcValue(I.getOperand(2))));
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ DAG.getSrcValue(I.getArgOperand(0)),
+ DAG.getSrcValue(I.getArgOperand(1))));
}
/// TargetLowering::LowerCallTo - This is the default LowerCallTo
@@ -5715,6 +5710,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
DebugLoc dl) const {
// Handle all of the outgoing arguments.
SmallVector<ISD::OutputArg, 32> Outs;
+ SmallVector<SDValue, 32> OutVals;
for (unsigned i = 0, e = Args.size(); i != e; ++i) {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(*this, Args[i].Ty, ValueVTs);
@@ -5768,13 +5764,15 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
for (unsigned j = 0; j != NumParts; ++j) {
// if it isn't first piece, alignment must be 1
- ISD::OutputArg MyFlags(Flags, Parts[j], i < NumFixedArgs);
+ ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(),
+ i < NumFixedArgs);
if (NumParts > 1 && j == 0)
MyFlags.Flags.setSplit();
else if (j != 0)
MyFlags.Flags.setOrigAlign(1);
Outs.push_back(MyFlags);
+ OutVals.push_back(Parts[j]);
}
}
}
@@ -5803,7 +5801,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
SmallVector<SDValue, 4> InVals;
Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall,
- Outs, Ins, dl, DAG, InVals);
+ Outs, OutVals, Ins, dl, DAG, InVals);
// Verify that the target's LowerCall behaved as expected.
assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
@@ -5876,7 +5874,7 @@ SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
void
SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
- SDValue Op = getValue(V);
+ SDValue Op = getNonRegisterValue(V);
assert((Op.getOpcode() != ISD::CopyFromReg ||
cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
"Copy from a reg to the same reg!");
@@ -5894,21 +5892,16 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
// If this is the entry block, emit arguments.
const Function &F = *LLVMBB->getParent();
SelectionDAG &DAG = SDB->DAG;
- SDValue OldRoot = DAG.getRoot();
DebugLoc dl = SDB->getCurDebugLoc();
const TargetData *TD = TLI.getTargetData();
SmallVector<ISD::InputArg, 16> Ins;
// Check whether the function can return without sret-demotion.
- SmallVector<EVT, 4> OutVTs;
- SmallVector<ISD::ArgFlagsTy, 4> OutsFlags;
- getReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
- OutVTs, OutsFlags, TLI);
- FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
-
- FLI.CanLowerReturn = TLI.CanLowerReturn(F.getCallingConv(), F.isVarArg(),
- OutVTs, OutsFlags, DAG);
- if (!FLI.CanLowerReturn) {
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
+ Outs, TLI);
+
+ if (!FuncInfo->CanLowerReturn) {
// Put in an sret pointer parameter before all the other parameters.
SmallVector<EVT, 1> ValueVTs;
ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
@@ -6002,7 +5995,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
// Set up the argument values.
unsigned i = 0;
Idx = 1;
- if (!FLI.CanLowerReturn) {
+ if (!FuncInfo->CanLowerReturn) {
// Create a virtual register for the sret pointer, and put in a copy
// from the sret argument into it.
SmallVector<EVT, 1> ValueVTs;
@@ -6016,7 +6009,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
MachineFunction& MF = SDB->DAG.getMachineFunction();
MachineRegisterInfo& RegInfo = MF.getRegInfo();
unsigned SRetReg = RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT));
- FLI.DemoteRegister = SRetReg;
+ FuncInfo->DemoteRegister = SRetReg;
NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurDebugLoc(),
SRetReg, ArgValue);
DAG.setRoot(NewRoot);
@@ -6032,6 +6025,12 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(TLI, I->getType(), ValueVTs);
unsigned NumValues = ValueVTs.size();
+
+ // If this argument is unused then remember its value. It is used to generate
+ // debugging information.
+ if (I->use_empty() && NumValues)
+ SDB->setUnusedArgValue(I, InVals[i]);
+
for (unsigned Value = 0; Value != NumValues; ++Value) {
EVT VT = ValueVTs[Value];
EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
@@ -6112,17 +6111,20 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
if (const Constant *C = dyn_cast<Constant>(PHIOp)) {
unsigned &RegOut = ConstantsOut[C];
if (RegOut == 0) {
- RegOut = FuncInfo.CreateRegForValue(C);
+ RegOut = FuncInfo.CreateRegs(C->getType());
CopyValueToVirtualRegister(C, RegOut);
}
Reg = RegOut;
} else {
- Reg = FuncInfo.ValueMap[PHIOp];
- if (Reg == 0) {
+ DenseMap<const Value *, unsigned>::iterator I =
+ FuncInfo.ValueMap.find(PHIOp);
+ if (I != FuncInfo.ValueMap.end())
+ Reg = I->second;
+ else {
assert(isa<AllocaInst>(PHIOp) &&
FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
"Didn't codegen value into a register!??");
- Reg = FuncInfo.CreateRegForValue(PHIOp);
+ Reg = FuncInfo.CreateRegs(PHIOp->getType());
CopyValueToVirtualRegister(PHIOp, Reg);
}
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 3fcd4b9dc437..46733d6db124 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -88,6 +88,10 @@ class SelectionDAGBuilder {
DebugLoc CurDebugLoc;
DenseMap<const Value*, SDValue> NodeMap;
+
+ /// UnusedArgNodeMap - Maps argument value for unused arguments. This is used
+ /// to preserve debug information for incoming arguments.
+ DenseMap<const Value*, SDValue> UnusedArgNodeMap;
public:
/// PendingLoads - Loads are not emitted to the program immediately. We bunch
@@ -342,6 +346,8 @@ public:
void visit(unsigned Opcode, const User &I);
SDValue getValue(const Value *V);
+ SDValue getNonRegisterValue(const Value *V);
+ SDValue getValueImpl(const Value *V);
void setValue(const Value *V, SDValue NewN) {
SDValue &N = NodeMap[V];
@@ -349,6 +355,12 @@ public:
N = NewN;
}
+ void setUnusedArgValue(const Value *V, SDValue NewN) {
+ SDValue &N = UnusedArgNodeMap[V];
+ assert(N.getNode() == 0 && "Already set a value for this node!");
+ N = NewN;
+ }
+
void GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
std::set<unsigned> &OutputRegs,
std::set<unsigned> &InputRegs);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 65b8d4f65919..08ba5482f7d2 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -14,7 +14,7 @@
#define DEBUG_TYPE "isel"
#include "ScheduleDAGSDNodes.h"
#include "SelectionDAGBuilder.h"
-#include "FunctionLoweringInfo.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/DebugInfo.h"
@@ -171,7 +171,7 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, CodeGenOpt::Level OL) :
MachineFunctionPass(&ID), TM(tm), TLI(*tm.getTargetLowering()),
FuncInfo(new FunctionLoweringInfo(TLI)),
- CurDAG(new SelectionDAG(tm, *FuncInfo)),
+ CurDAG(new SelectionDAG(tm)),
SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
GFI(),
OptLevel(OL),
@@ -244,7 +244,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
CurDAG->init(*MF);
- FuncInfo->set(Fn, *MF, EnableFastISel);
+ FuncInfo->set(Fn, *MF);
SDB->init(GFI, *AA);
SelectAllBasicBlocks(Fn);
@@ -300,7 +300,11 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
for (MachineBasicBlock::const_iterator
II = MBB->begin(), IE = MBB->end(); II != IE; ++II) {
const TargetInstrDesc &TID = TM.getInstrInfo()->get(II->getOpcode());
- if (II->isInlineAsm() || (TID.isCall() && !TID.isReturn())) {
+
+ // Operand 1 of an inline asm instruction indicates whether the asm
+ // needs stack or not.
+ if ((II->isInlineAsm() && II->getOperand(1).getImm()) ||
+ (TID.isCall() && !TID.isReturn())) {
MFI->setHasCalls(true);
goto done;
}
@@ -312,6 +316,26 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// Determine if there is a call to setjmp in the machine function.
MF->setCallsSetJmp(FunctionCallsSetJmp(&Fn));
+ // Replace forward-declared registers with the registers containing
+ // the desired value.
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ for (DenseMap<unsigned, unsigned>::iterator
+ I = FuncInfo->RegFixups.begin(), E = FuncInfo->RegFixups.end();
+ I != E; ++I) {
+ unsigned From = I->first;
+ unsigned To = I->second;
+ // If To is also scheduled to be replaced, find what its ultimate
+ // replacement is.
+ for (;;) {
+ DenseMap<unsigned, unsigned>::iterator J =
+ FuncInfo->RegFixups.find(To);
+ if (J == E) break;
+ To = J->second;
+ }
+ // Replace it.
+ MRI.replaceRegWith(From, To);
+ }
+
// Release function-specific state. SDB and CurDAG are already cleared
// at this point.
FuncInfo->clear();
@@ -319,10 +343,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
return true;
}
-MachineBasicBlock *
-SelectionDAGISel::SelectBasicBlock(MachineBasicBlock *BB,
- const BasicBlock *LLVMBB,
- BasicBlock::const_iterator Begin,
+void
+SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
BasicBlock::const_iterator End,
bool &HadTailCall) {
// Lower all of the non-terminator instructions. If a call is emitted
@@ -337,7 +359,7 @@ SelectionDAGISel::SelectBasicBlock(MachineBasicBlock *BB,
SDB->clear();
// Final step, emit the lowered DAG as machine code.
- return CodeGenAndEmitDAG(BB);
+ CodeGenAndEmitDAG();
}
namespace {
@@ -372,102 +394,6 @@ public:
};
}
-/// TrivialTruncElim - Eliminate some trivial nops that can result from
-/// ShrinkDemandedOps: (trunc (ext n)) -> n.
-static bool TrivialTruncElim(SDValue Op,
- TargetLowering::TargetLoweringOpt &TLO) {
- SDValue N0 = Op.getOperand(0);
- EVT VT = Op.getValueType();
- if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
- N0.getOpcode() == ISD::SIGN_EXTEND ||
- N0.getOpcode() == ISD::ANY_EXTEND) &&
- N0.getOperand(0).getValueType() == VT) {
- return TLO.CombineTo(Op, N0.getOperand(0));
- }
- return false;
-}
-
-/// ShrinkDemandedOps - A late transformation pass that shrink expressions
-/// using TargetLowering::TargetLoweringOpt::ShrinkDemandedOp. It converts
-/// x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
-void SelectionDAGISel::ShrinkDemandedOps() {
- SmallVector<SDNode*, 128> Worklist;
- SmallPtrSet<SDNode*, 128> InWorklist;
-
- // Add all the dag nodes to the worklist.
- Worklist.reserve(CurDAG->allnodes_size());
- for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
- E = CurDAG->allnodes_end(); I != E; ++I) {
- Worklist.push_back(I);
- InWorklist.insert(I);
- }
-
- TargetLowering::TargetLoweringOpt TLO(*CurDAG, true, true, true);
- while (!Worklist.empty()) {
- SDNode *N = Worklist.pop_back_val();
- InWorklist.erase(N);
-
- if (N->use_empty() && N != CurDAG->getRoot().getNode()) {
- // Deleting this node may make its operands dead, add them to the worklist
- // if they aren't already there.
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
- if (InWorklist.insert(N->getOperand(i).getNode()))
- Worklist.push_back(N->getOperand(i).getNode());
-
- CurDAG->DeleteNode(N);
- continue;
- }
-
- // Run ShrinkDemandedOp on scalar binary operations.
- if (N->getNumValues() != 1 ||
- !N->getValueType(0).isSimple() || !N->getValueType(0).isInteger())
- continue;
-
- unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
- APInt Demanded = APInt::getAllOnesValue(BitWidth);
- APInt KnownZero, KnownOne;
- if (!TLI.SimplifyDemandedBits(SDValue(N, 0), Demanded,
- KnownZero, KnownOne, TLO) &&
- (N->getOpcode() != ISD::TRUNCATE ||
- !TrivialTruncElim(SDValue(N, 0), TLO)))
- continue;
-
- // Revisit the node.
- assert(!InWorklist.count(N) && "Already in worklist");
- Worklist.push_back(N);
- InWorklist.insert(N);
-
- // Replace the old value with the new one.
- DEBUG(errs() << "\nShrinkDemandedOps replacing ";
- TLO.Old.getNode()->dump(CurDAG);
- errs() << "\nWith: ";
- TLO.New.getNode()->dump(CurDAG);
- errs() << '\n');
-
- if (InWorklist.insert(TLO.New.getNode()))
- Worklist.push_back(TLO.New.getNode());
-
- SDOPsWorkListRemover DeadNodes(Worklist, InWorklist);
- CurDAG->ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes);
-
- if (!TLO.Old.getNode()->use_empty()) continue;
-
- for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands();
- i != e; ++i) {
- SDNode *OpNode = TLO.Old.getNode()->getOperand(i).getNode();
- if (OpNode->hasOneUse()) {
- // Add OpNode to the end of the list to revisit.
- DeadNodes.RemoveFromWorklist(OpNode);
- Worklist.push_back(OpNode);
- InWorklist.insert(OpNode);
- }
- }
-
- DeadNodes.RemoveFromWorklist(TLO.Old.getNode());
- CurDAG->DeleteNode(TLO.Old.getNode());
- }
-}
-
void SelectionDAGISel::ComputeLiveOutVRegInfo() {
SmallPtrSet<SDNode*, 128> VisitedNodes;
SmallVector<SDNode*, 128> Worklist;
@@ -522,7 +448,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
} while (!Worklist.empty());
}
-MachineBasicBlock *SelectionDAGISel::CodeGenAndEmitDAG(MachineBasicBlock *BB) {
+void SelectionDAGISel::CodeGenAndEmitDAG() {
std::string GroupName;
if (TimePassesIsEnabled)
GroupName = "Instruction Selection and Scheduling";
@@ -531,23 +457,19 @@ MachineBasicBlock *SelectionDAGISel::CodeGenAndEmitDAG(MachineBasicBlock *BB) {
ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs ||
ViewSUnitDAGs)
BlockName = MF->getFunction()->getNameStr() + ":" +
- BB->getBasicBlock()->getNameStr();
+ FuncInfo->MBB->getBasicBlock()->getNameStr();
- DEBUG(dbgs() << "Initial selection DAG:\n");
- DEBUG(CurDAG->dump());
+ DEBUG(dbgs() << "Initial selection DAG:\n"; CurDAG->dump());
if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName);
// Run the DAG combiner in pre-legalize mode.
- if (TimePassesIsEnabled) {
- NamedRegionTimer T("DAG Combining 1", GroupName);
- CurDAG->Combine(Unrestricted, *AA, OptLevel);
- } else {
+ {
+ NamedRegionTimer T("DAG Combining 1", GroupName, TimePassesIsEnabled);
CurDAG->Combine(Unrestricted, *AA, OptLevel);
}
- DEBUG(dbgs() << "Optimized lowered selection DAG:\n");
- DEBUG(CurDAG->dump());
+ DEBUG(dbgs() << "Optimized lowered selection DAG:\n"; CurDAG->dump());
// Second step, hack on the DAG until it only uses operations and types that
// the target supports.
@@ -555,44 +477,36 @@ MachineBasicBlock *SelectionDAGISel::CodeGenAndEmitDAG(MachineBasicBlock *BB) {
BlockName);
bool Changed;
- if (TimePassesIsEnabled) {
- NamedRegionTimer T("Type Legalization", GroupName);
- Changed = CurDAG->LegalizeTypes();
- } else {
+ {
+ NamedRegionTimer T("Type Legalization", GroupName, TimePassesIsEnabled);
Changed = CurDAG->LegalizeTypes();
}
- DEBUG(dbgs() << "Type-legalized selection DAG:\n");
- DEBUG(CurDAG->dump());
+ DEBUG(dbgs() << "Type-legalized selection DAG:\n"; CurDAG->dump());
if (Changed) {
if (ViewDAGCombineLT)
CurDAG->viewGraph("dag-combine-lt input for " + BlockName);
// Run the DAG combiner in post-type-legalize mode.
- if (TimePassesIsEnabled) {
- NamedRegionTimer T("DAG Combining after legalize types", GroupName);
- CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);
- } else {
+ {
+ NamedRegionTimer T("DAG Combining after legalize types", GroupName,
+ TimePassesIsEnabled);
CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);
}
- DEBUG(dbgs() << "Optimized type-legalized selection DAG:\n");
- DEBUG(CurDAG->dump());
+ DEBUG(dbgs() << "Optimized type-legalized selection DAG:\n";
+ CurDAG->dump());
}
- if (TimePassesIsEnabled) {
- NamedRegionTimer T("Vector Legalization", GroupName);
- Changed = CurDAG->LegalizeVectors();
- } else {
+ {
+ NamedRegionTimer T("Vector Legalization", GroupName, TimePassesIsEnabled);
Changed = CurDAG->LegalizeVectors();
}
if (Changed) {
- if (TimePassesIsEnabled) {
- NamedRegionTimer T("Type Legalization 2", GroupName);
- CurDAG->LegalizeTypes();
- } else {
+ {
+ NamedRegionTimer T("Type Legalization 2", GroupName, TimePassesIsEnabled);
CurDAG->LegalizeTypes();
}
@@ -600,95 +514,79 @@ MachineBasicBlock *SelectionDAGISel::CodeGenAndEmitDAG(MachineBasicBlock *BB) {
CurDAG->viewGraph("dag-combine-lv input for " + BlockName);
// Run the DAG combiner in post-type-legalize mode.
- if (TimePassesIsEnabled) {
- NamedRegionTimer T("DAG Combining after legalize vectors", GroupName);
- CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
- } else {
+ {
+ NamedRegionTimer T("DAG Combining after legalize vectors", GroupName,
+ TimePassesIsEnabled);
CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
}
- DEBUG(dbgs() << "Optimized vector-legalized selection DAG:\n");
- DEBUG(CurDAG->dump());
+ DEBUG(dbgs() << "Optimized vector-legalized selection DAG:\n";
+ CurDAG->dump());
}
if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName);
- if (TimePassesIsEnabled) {
- NamedRegionTimer T("DAG Legalization", GroupName);
- CurDAG->Legalize(OptLevel);
- } else {
+ {
+ NamedRegionTimer T("DAG Legalization", GroupName, TimePassesIsEnabled);
CurDAG->Legalize(OptLevel);
}
- DEBUG(dbgs() << "Legalized selection DAG:\n");
- DEBUG(CurDAG->dump());
+ DEBUG(dbgs() << "Legalized selection DAG:\n"; CurDAG->dump());
if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName);
// Run the DAG combiner in post-legalize mode.
- if (TimePassesIsEnabled) {
- NamedRegionTimer T("DAG Combining 2", GroupName);
- CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
- } else {
+ {
+ NamedRegionTimer T("DAG Combining 2", GroupName, TimePassesIsEnabled);
CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
}
- DEBUG(dbgs() << "Optimized legalized selection DAG:\n");
- DEBUG(CurDAG->dump());
+ DEBUG(dbgs() << "Optimized legalized selection DAG:\n"; CurDAG->dump());
- if (OptLevel != CodeGenOpt::None) {
- ShrinkDemandedOps();
+ if (OptLevel != CodeGenOpt::None)
ComputeLiveOutVRegInfo();
- }
if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName);
// Third, instruction select all of the operations to machine code, adding the
// code to the MachineBasicBlock.
- if (TimePassesIsEnabled) {
- NamedRegionTimer T("Instruction Selection", GroupName);
- DoInstructionSelection();
- } else {
+ {
+ NamedRegionTimer T("Instruction Selection", GroupName, TimePassesIsEnabled);
DoInstructionSelection();
}
- DEBUG(dbgs() << "Selected selection DAG:\n");
- DEBUG(CurDAG->dump());
+ DEBUG(dbgs() << "Selected selection DAG:\n"; CurDAG->dump());
if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName);
// Schedule machine code.
ScheduleDAGSDNodes *Scheduler = CreateScheduler();
- if (TimePassesIsEnabled) {
- NamedRegionTimer T("Instruction Scheduling", GroupName);
- Scheduler->Run(CurDAG, BB, BB->end());
- } else {
- Scheduler->Run(CurDAG, BB, BB->end());
+ {
+ NamedRegionTimer T("Instruction Scheduling", GroupName,
+ TimePassesIsEnabled);
+ Scheduler->Run(CurDAG, FuncInfo->MBB, FuncInfo->InsertPt);
}
if (ViewSUnitDAGs) Scheduler->viewGraph();
// Emit machine code to BB. This can change 'BB' to the last block being
// inserted into.
- if (TimePassesIsEnabled) {
- NamedRegionTimer T("Instruction Creation", GroupName);
- BB = Scheduler->EmitSchedule();
- } else {
- BB = Scheduler->EmitSchedule();
+ {
+ NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled);
+
+ FuncInfo->MBB = Scheduler->EmitSchedule();
+ FuncInfo->InsertPt = Scheduler->InsertPos;
}
// Free the scheduler state.
- if (TimePassesIsEnabled) {
- NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName);
- delete Scheduler;
- } else {
+ {
+ NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName,
+ TimePassesIsEnabled);
delete Scheduler;
}
// Free the SelectionDAG state, now that we're finished with it.
CurDAG->clear();
-
- return BB;
}
void SelectionDAGISel::DoInstructionSelection() {
@@ -750,21 +648,22 @@ void SelectionDAGISel::DoInstructionSelection() {
/// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and
/// do other setup for EH landing-pad blocks.
-void SelectionDAGISel::PrepareEHLandingPad(MachineBasicBlock *BB) {
+void SelectionDAGISel::PrepareEHLandingPad() {
// Add a label to mark the beginning of the landing pad. Deletion of the
// landing pad can thus be detected via the MachineModuleInfo.
- MCSymbol *Label = MF->getMMI().addLandingPad(BB);
+ MCSymbol *Label = MF->getMMI().addLandingPad(FuncInfo->MBB);
const TargetInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL);
- BuildMI(BB, SDB->getCurDebugLoc(), II).addSym(Label);
+ BuildMI(*FuncInfo->MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
+ .addSym(Label);
// Mark exception register as live in.
unsigned Reg = TLI.getExceptionAddressRegister();
- if (Reg) BB->addLiveIn(Reg);
+ if (Reg) FuncInfo->MBB->addLiveIn(Reg);
// Mark exception selector register as live in.
Reg = TLI.getExceptionSelectorRegister();
- if (Reg) BB->addLiveIn(Reg);
+ if (Reg) FuncInfo->MBB->addLiveIn(Reg);
// FIXME: Hack around an exception handling flaw (PR1508): the personality
// function and list of typeids logically belong to the invoke (or, if you
@@ -777,7 +676,7 @@ void SelectionDAGISel::PrepareEHLandingPad(MachineBasicBlock *BB) {
// in exceptions not being caught because no typeids are associated with
// the invoke. This may not be the only way things can go wrong, but it
// is the only way we try to work around for the moment.
- const BasicBlock *LLVMBB = BB->getBasicBlock();
+ const BasicBlock *LLVMBB = FuncInfo->MBB->getBasicBlock();
const BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator());
if (Br && Br->isUnconditional()) { // Critical edge?
@@ -796,83 +695,100 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Initialize the Fast-ISel state, if needed.
FastISel *FastIS = 0;
if (EnableFastISel)
- FastIS = TLI.createFastISel(*MF, FuncInfo->ValueMap, FuncInfo->MBBMap,
- FuncInfo->StaticAllocaMap,
- FuncInfo->PHINodesToUpdate
-#ifndef NDEBUG
- , FuncInfo->CatchInfoLost
-#endif
- );
+ FastIS = TLI.createFastISel(*FuncInfo);
// Iterate over all basic blocks in the function.
for (Function::const_iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
const BasicBlock *LLVMBB = &*I;
- MachineBasicBlock *BB = FuncInfo->MBBMap[LLVMBB];
+ FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB];
+ FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
BasicBlock::const_iterator const Begin = LLVMBB->getFirstNonPHI();
BasicBlock::const_iterator const End = LLVMBB->end();
- BasicBlock::const_iterator BI = Begin;
+ BasicBlock::const_iterator BI = End;
+ FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
+
+ // Setup an EH landing-pad block.
+ if (FuncInfo->MBB->isLandingPad())
+ PrepareEHLandingPad();
+
// Lower any arguments needed in this block if this is the entry block.
if (LLVMBB == &Fn.getEntryBlock())
LowerArguments(LLVMBB);
- // Setup an EH landing-pad block.
- if (BB->isLandingPad())
- PrepareEHLandingPad(BB);
-
// Before doing SelectionDAG ISel, see if FastISel has been requested.
if (FastIS) {
+ FastIS->startNewBlock();
+
// Emit code for any incoming arguments. This must happen before
// beginning FastISel on the entry block.
if (LLVMBB == &Fn.getEntryBlock()) {
CurDAG->setRoot(SDB->getControlRoot());
SDB->clear();
- BB = CodeGenAndEmitDAG(BB);
+ CodeGenAndEmitDAG();
+
+ // If we inserted any instructions at the beginning, make a note of
+ // where they are, so we can be sure to emit subsequent instructions
+ // after them.
+ if (FuncInfo->InsertPt != FuncInfo->MBB->begin())
+ FastIS->setLastLocalValue(llvm::prior(FuncInfo->InsertPt));
+ else
+ FastIS->setLastLocalValue(0);
}
- FastIS->startNewBlock(BB);
+
// Do FastISel on as many instructions as possible.
- for (; BI != End; ++BI) {
+ for (; BI != Begin; --BI) {
+ const Instruction *Inst = llvm::prior(BI);
+
+ // If we no longer require this instruction, skip it.
+ if (!Inst->mayWriteToMemory() &&
+ !isa<TerminatorInst>(Inst) &&
+ !isa<DbgInfoIntrinsic>(Inst) &&
+ !FuncInfo->isExportedInst(Inst))
+ continue;
+
+ // Bottom-up: reset the insert pos at the top, after any local-value
+ // instructions.
+ FastIS->recomputeInsertPt();
+
// Try to select the instruction with FastISel.
- if (FastIS->SelectInstruction(BI))
+ if (FastIS->SelectInstruction(Inst))
continue;
// Then handle certain instructions as single-LLVM-Instruction blocks.
- if (isa<CallInst>(BI)) {
+ if (isa<CallInst>(Inst)) {
++NumFastIselFailures;
if (EnableFastISelVerbose || EnableFastISelAbort) {
dbgs() << "FastISel missed call: ";
- BI->dump();
+ Inst->dump();
}
- if (!BI->getType()->isVoidTy() && !BI->use_empty()) {
- unsigned &R = FuncInfo->ValueMap[BI];
+ if (!Inst->getType()->isVoidTy() && !Inst->use_empty()) {
+ unsigned &R = FuncInfo->ValueMap[Inst];
if (!R)
- R = FuncInfo->CreateRegForValue(BI);
+ R = FuncInfo->CreateRegs(Inst->getType());
}
bool HadTailCall = false;
- BB = SelectBasicBlock(BB, LLVMBB, BI, llvm::next(BI), HadTailCall);
+ SelectBasicBlock(Inst, BI, HadTailCall);
// If the call was emitted as a tail call, we're done with the block.
if (HadTailCall) {
- BI = End;
+ --BI;
break;
}
- // If the instruction was codegen'd with multiple blocks,
- // inform the FastISel object where to resume inserting.
- FastIS->setCurrentBlock(BB);
continue;
}
// Otherwise, give up on FastISel for the rest of the block.
// For now, be a little lenient about non-branch terminators.
- if (!isa<TerminatorInst>(BI) || isa<BranchInst>(BI)) {
+ if (!isa<TerminatorInst>(Inst) || isa<BranchInst>(Inst)) {
++NumFastIselFailures;
if (EnableFastISelVerbose || EnableFastISelAbort) {
dbgs() << "FastISel miss: ";
- BI->dump();
+ Inst->dump();
}
if (EnableFastISelAbort)
// The "fast" selector couldn't handle something and bailed.
@@ -881,17 +797,17 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
}
break;
}
+
+ FastIS->recomputeInsertPt();
}
// Run SelectionDAG instruction selection on the remainder of the block
// not handled by FastISel. If FastISel is not run, this is the entire
// block.
- if (BI != End) {
- bool HadTailCall;
- BB = SelectBasicBlock(BB, LLVMBB, BI, End, HadTailCall);
- }
+ bool HadTailCall;
+ SelectBasicBlock(Begin, BI, HadTailCall);
- FinishBasicBlock(BB);
+ FinishBasicBlock();
FuncInfo->PHINodesToUpdate.clear();
}
@@ -899,11 +815,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
}
void
-SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) {
+SelectionDAGISel::FinishBasicBlock() {
DEBUG(dbgs() << "Total amount of phi nodes to update: "
- << FuncInfo->PHINodesToUpdate.size() << "\n");
- DEBUG(for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i)
+ << FuncInfo->PHINodesToUpdate.size() << "\n";
+ for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i)
dbgs() << "Node " << i << " : ("
<< FuncInfo->PHINodesToUpdate[i].first
<< ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n");
@@ -917,11 +833,11 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) {
MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first;
assert(PHI->isPHI() &&
"This is not a machine PHI node that we are updating!");
- if (!BB->isSuccessor(PHI->getParent()))
+ if (!FuncInfo->MBB->isSuccessor(PHI->getParent()))
continue;
PHI->addOperand(
MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false));
- PHI->addOperand(MachineOperand::CreateMBB(BB));
+ PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB));
}
return;
}
@@ -930,33 +846,35 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) {
// Lower header first, if it wasn't already lowered
if (!SDB->BitTestCases[i].Emitted) {
// Set the current basic block to the mbb we wish to insert the code into
- BB = SDB->BitTestCases[i].Parent;
+ FuncInfo->MBB = SDB->BitTestCases[i].Parent;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
// Emit the code
- SDB->visitBitTestHeader(SDB->BitTestCases[i], BB);
+ SDB->visitBitTestHeader(SDB->BitTestCases[i], FuncInfo->MBB);
CurDAG->setRoot(SDB->getRoot());
SDB->clear();
- BB = CodeGenAndEmitDAG(BB);
+ CodeGenAndEmitDAG();
}
for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) {
// Set the current basic block to the mbb we wish to insert the code into
- BB = SDB->BitTestCases[i].Cases[j].ThisBB;
+ FuncInfo->MBB = SDB->BitTestCases[i].Cases[j].ThisBB;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
// Emit the code
if (j+1 != ej)
SDB->visitBitTestCase(SDB->BitTestCases[i].Cases[j+1].ThisBB,
SDB->BitTestCases[i].Reg,
SDB->BitTestCases[i].Cases[j],
- BB);
+ FuncInfo->MBB);
else
SDB->visitBitTestCase(SDB->BitTestCases[i].Default,
SDB->BitTestCases[i].Reg,
SDB->BitTestCases[i].Cases[j],
- BB);
+ FuncInfo->MBB);
CurDAG->setRoot(SDB->getRoot());
SDB->clear();
- BB = CodeGenAndEmitDAG(BB);
+ CodeGenAndEmitDAG();
}
// Update PHI Nodes
@@ -1001,22 +919,24 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) {
// Lower header first, if it wasn't already lowered
if (!SDB->JTCases[i].first.Emitted) {
// Set the current basic block to the mbb we wish to insert the code into
- BB = SDB->JTCases[i].first.HeaderBB;
+ FuncInfo->MBB = SDB->JTCases[i].first.HeaderBB;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
// Emit the code
SDB->visitJumpTableHeader(SDB->JTCases[i].second, SDB->JTCases[i].first,
- BB);
+ FuncInfo->MBB);
CurDAG->setRoot(SDB->getRoot());
SDB->clear();
- BB = CodeGenAndEmitDAG(BB);
+ CodeGenAndEmitDAG();
}
// Set the current basic block to the mbb we wish to insert the code into
- BB = SDB->JTCases[i].second.MBB;
+ FuncInfo->MBB = SDB->JTCases[i].second.MBB;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
// Emit the code
SDB->visitJumpTable(SDB->JTCases[i].second);
CurDAG->setRoot(SDB->getRoot());
SDB->clear();
- BB = CodeGenAndEmitDAG(BB);
+ CodeGenAndEmitDAG();
// Update PHI Nodes
for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size();
@@ -1034,11 +954,11 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) {
(MachineOperand::CreateMBB(SDB->JTCases[i].first.HeaderBB));
}
// JT BB. Just iterate over successors here
- if (BB->isSuccessor(PHIBB)) {
+ if (FuncInfo->MBB->isSuccessor(PHIBB)) {
PHI->addOperand
(MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
false));
- PHI->addOperand(MachineOperand::CreateMBB(BB));
+ PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB));
}
}
}
@@ -1050,10 +970,10 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) {
MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first;
assert(PHI->isPHI() &&
"This is not a machine PHI node that we are updating!");
- if (BB->isSuccessor(PHI->getParent())) {
+ if (FuncInfo->MBB->isSuccessor(PHI->getParent())) {
PHI->addOperand(
MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false));
- PHI->addOperand(MachineOperand::CreateMBB(BB));
+ PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB));
}
}
@@ -1061,7 +981,8 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) {
// additional DAGs necessary.
for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) {
// Set the current basic block to the mbb we wish to insert the code into
- MachineBasicBlock *ThisBB = BB = SDB->SwitchCases[i].ThisBB;
+ MachineBasicBlock *ThisBB = FuncInfo->MBB = SDB->SwitchCases[i].ThisBB;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
// Determine the unique successors.
SmallVector<MachineBasicBlock *, 2> Succs;
@@ -1071,21 +992,24 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) {
// Emit the code. Note that this could result in ThisBB being split, so
// we need to check for updates.
- SDB->visitSwitchCase(SDB->SwitchCases[i], BB);
+ SDB->visitSwitchCase(SDB->SwitchCases[i], FuncInfo->MBB);
CurDAG->setRoot(SDB->getRoot());
SDB->clear();
- ThisBB = CodeGenAndEmitDAG(BB);
+ CodeGenAndEmitDAG();
+ ThisBB = FuncInfo->MBB;
// Handle any PHI nodes in successors of this chunk, as if we were coming
// from the original BB before switch expansion. Note that PHI nodes can
// occur multiple times in PHINodesToUpdate. We have to be very careful to
// handle them the right number of times.
for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
- BB = Succs[i];
- // BB may have been removed from the CFG if a branch was constant folded.
- if (ThisBB->isSuccessor(BB)) {
- for (MachineBasicBlock::iterator Phi = BB->begin();
- Phi != BB->end() && Phi->isPHI();
+ FuncInfo->MBB = Succs[i];
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+ // FuncInfo->MBB may have been removed from the CFG if a branch was
+ // constant folded.
+ if (ThisBB->isSuccessor(FuncInfo->MBB)) {
+ for (MachineBasicBlock::iterator Phi = FuncInfo->MBB->begin();
+ Phi != FuncInfo->MBB->end() && Phi->isPHI();
++Phi) {
// This value for this PHI node is recorded in PHINodesToUpdate.
for (unsigned pn = 0; ; ++pn) {
@@ -1205,6 +1129,7 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {
Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0
Ops.push_back(InOps[InlineAsm::Op_AsmString]); // 1
Ops.push_back(InOps[InlineAsm::Op_MDNode]); // 2, !srcloc
+ Ops.push_back(InOps[InlineAsm::Op_IsAlignStack]); // 3
unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size();
if (InOps[e-1].getValueType() == MVT::Flag)
@@ -1701,7 +1626,7 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
SDValue(Res, ResNumResults-1));
if ((EmitNodeInfo & OPFL_FlagOutput) != 0)
- --ResNumResults;
+ --ResNumResults;
// Move the chain reference if needed.
if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 &&
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 3786bd197b85..6cae804422ce 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -278,7 +278,7 @@ std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const {
FlaggedNodes.push_back(N);
while (!FlaggedNodes.empty()) {
O << DOTGraphTraits<SelectionDAG*>
- ::getSimpleNodeLabel(FlaggedNodes.back(), DAG);
+ ::getSimpleNodeLabel(FlaggedNodes.back(), DAG);
FlaggedNodes.pop_back();
if (!FlaggedNodes.empty())
O << "\n ";
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 44a80d3362d1..4f3866956cac 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -20,6 +20,7 @@
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/GlobalVariable.h"
#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -261,6 +262,38 @@ static void InitLibcallNames(const char **Names) {
Names[RTLIB::MEMMOVE] = "memmove";
Names[RTLIB::MEMSET] = "memset";
Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume";
+ Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1";
+ Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2";
+ Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = "__sync_val_compare_and_swap_4";
+ Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = "__sync_val_compare_and_swap_8";
+ Names[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = "__sync_lock_test_and_set_1";
+ Names[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = "__sync_lock_test_and_set_2";
+ Names[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = "__sync_lock_test_and_set_4";
+ Names[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = "__sync_lock_test_and_set_8";
+ Names[RTLIB::SYNC_FETCH_AND_ADD_1] = "__sync_fetch_and_add_1";
+ Names[RTLIB::SYNC_FETCH_AND_ADD_2] = "__sync_fetch_and_add_2";
+ Names[RTLIB::SYNC_FETCH_AND_ADD_4] = "__sync_fetch_and_add_4";
+ Names[RTLIB::SYNC_FETCH_AND_ADD_8] = "__sync_fetch_and_add_8";
+ Names[RTLIB::SYNC_FETCH_AND_SUB_1] = "__sync_fetch_and_sub_1";
+ Names[RTLIB::SYNC_FETCH_AND_SUB_2] = "__sync_fetch_and_sub_2";
+ Names[RTLIB::SYNC_FETCH_AND_SUB_4] = "__sync_fetch_and_sub_4";
+ Names[RTLIB::SYNC_FETCH_AND_SUB_8] = "__sync_fetch_and_sub_8";
+ Names[RTLIB::SYNC_FETCH_AND_AND_1] = "__sync_fetch_and_and_1";
+ Names[RTLIB::SYNC_FETCH_AND_AND_2] = "__sync_fetch_and_and_2";
+ Names[RTLIB::SYNC_FETCH_AND_AND_4] = "__sync_fetch_and_and_4";
+ Names[RTLIB::SYNC_FETCH_AND_AND_8] = "__sync_fetch_and_and_8";
+ Names[RTLIB::SYNC_FETCH_AND_OR_1] = "__sync_fetch_and_or_1";
+ Names[RTLIB::SYNC_FETCH_AND_OR_2] = "__sync_fetch_and_or_2";
+ Names[RTLIB::SYNC_FETCH_AND_OR_4] = "__sync_fetch_and_or_4";
+ Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8";
+ Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1";
+ Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2";
+ Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and-xor_4";
+ Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8";
+ Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1";
+ Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2";
+ Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4";
+ Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8";
}
/// InitLibcallCallingConvs - Set default libcall CallingConvs.
@@ -546,9 +579,9 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
SchedPreferenceInfo = Sched::Latency;
JumpBufSize = 0;
JumpBufAlignment = 0;
- IfCvtBlockSizeLimit = 2;
- IfCvtDupBlockSizeLimit = 0;
PrefLoopAlignment = 0;
+ MinStackArgumentAlignment = 1;
+ ShouldFoldAtomicFences = false;
InitLibcallNames(LibcallRoutineNames);
InitCmpLibcallCCs(CmpLibcallCCs);
@@ -578,9 +611,9 @@ bool TargetLowering::canOpTrap(unsigned Op, EVT VT) const {
static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
- unsigned &NumIntermediates,
- EVT &RegisterVT,
- TargetLowering* TLI) {
+ unsigned &NumIntermediates,
+ EVT &RegisterVT,
+ TargetLowering *TLI) {
// Figure out the right, legal destination reg to copy into.
unsigned NumElts = VT.getVectorNumElements();
MVT EltTy = VT.getVectorElementType();
@@ -610,16 +643,12 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
EVT DestVT = TLI->getRegisterType(NewVT);
RegisterVT = DestVT;
- if (EVT(DestVT).bitsLT(NewVT)) {
- // Value is expanded, e.g. i64 -> i16.
+ if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
- } else {
- // Otherwise, promotion or legal types use the same number of registers as
- // the vector decimated to the appropriate level.
- return NumVectorRegs;
- }
- return 1;
+ // Otherwise, promotion or legal types use the same number of registers as
+ // the vector decimated to the appropriate level.
+ return NumVectorRegs;
}
/// computeRegisterProperties - Once all of the register classes are added,
@@ -705,39 +734,39 @@ void TargetLowering::computeRegisterProperties() {
for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
MVT VT = (MVT::SimpleValueType)i;
- if (!isTypeLegal(VT)) {
- MVT IntermediateVT;
- EVT RegisterVT;
- unsigned NumIntermediates;
- NumRegistersForVT[i] =
- getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates,
- RegisterVT, this);
- RegisterTypeForVT[i] = RegisterVT;
-
- // Determine if there is a legal wider type.
- bool IsLegalWiderType = false;
- EVT EltVT = VT.getVectorElementType();
- unsigned NElts = VT.getVectorNumElements();
- for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
- EVT SVT = (MVT::SimpleValueType)nVT;
- if (isTypeSynthesizable(SVT) && SVT.getVectorElementType() == EltVT &&
- SVT.getVectorNumElements() > NElts && NElts != 1) {
- TransformToType[i] = SVT;
- ValueTypeActions.setTypeAction(VT, Promote);
- IsLegalWiderType = true;
- break;
- }
+ if (isTypeLegal(VT)) continue;
+
+ MVT IntermediateVT;
+ EVT RegisterVT;
+ unsigned NumIntermediates;
+ NumRegistersForVT[i] =
+ getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates,
+ RegisterVT, this);
+ RegisterTypeForVT[i] = RegisterVT;
+
+ // Determine if there is a legal wider type.
+ bool IsLegalWiderType = false;
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NElts = VT.getVectorNumElements();
+ for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+ EVT SVT = (MVT::SimpleValueType)nVT;
+ if (isTypeSynthesizable(SVT) && SVT.getVectorElementType() == EltVT &&
+ SVT.getVectorNumElements() > NElts && NElts != 1) {
+ TransformToType[i] = SVT;
+ ValueTypeActions.setTypeAction(VT, Promote);
+ IsLegalWiderType = true;
+ break;
}
- if (!IsLegalWiderType) {
- EVT NVT = VT.getPow2VectorType();
- if (NVT == VT) {
- // Type is already a power of 2. The default action is to split.
- TransformToType[i] = MVT::Other;
- ValueTypeActions.setTypeAction(VT, Expand);
- } else {
- TransformToType[i] = NVT;
- ValueTypeActions.setTypeAction(VT, Promote);
- }
+ }
+ if (!IsLegalWiderType) {
+ EVT NVT = VT.getPow2VectorType();
+ if (NVT == VT) {
+ // Type is already a power of 2. The default action is to split.
+ TransformToType[i] = MVT::Other;
+ ValueTypeActions.setTypeAction(VT, Expand);
+ } else {
+ TransformToType[i] = NVT;
+ ValueTypeActions.setTypeAction(VT, Promote);
}
}
}
@@ -811,6 +840,65 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
return 1;
}
+/// Get the EVTs and ArgFlags collections that represent the legalized return
+/// type of the given function. This does not require a DAG or a return value,
+/// and is suitable for use before any DAGs for the function are constructed.
+/// TODO: Move this out of TargetLowering.cpp.
+void llvm::GetReturnInfo(const Type* ReturnType, Attributes attr,
+ SmallVectorImpl<ISD::OutputArg> &Outs,
+ const TargetLowering &TLI,
+ SmallVectorImpl<uint64_t> *Offsets) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, ReturnType, ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0) return;
+ unsigned Offset = 0;
+
+ for (unsigned j = 0, f = NumValues; j != f; ++j) {
+ EVT VT = ValueVTs[j];
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+ if (attr & Attribute::SExt)
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (attr & Attribute::ZExt)
+ ExtendKind = ISD::ZERO_EXTEND;
+
+ // FIXME: C calling convention requires the return type to be promoted to
+ // at least 32-bit. But this is not necessary for non-C calling
+ // conventions. The frontend should mark functions whose return values
+ // require promoting with signext or zeroext attributes.
+ if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
+ EVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32);
+ if (VT.bitsLT(MinVT))
+ VT = MinVT;
+ }
+
+ unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT);
+ EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT);
+ unsigned PartSize = TLI.getTargetData()->getTypeAllocSize(
+ PartVT.getTypeForEVT(ReturnType->getContext()));
+
+ // 'inreg' on function refers to return value
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+ if (attr & Attribute::InReg)
+ Flags.setInReg();
+
+ // Propagate extension type if any
+ if (attr & Attribute::SExt)
+ Flags.setSExt();
+ else if (attr & Attribute::ZExt)
+ Flags.setZExt();
+
+ for (unsigned i = 0; i < NumParts; ++i) {
+ Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true));
+ if (Offsets) {
+ Offsets->push_back(Offset);
+ Offset += PartSize;
+ }
+ }
+ }
+}
+
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area. This is the actual
/// alignment, not its logarithm.
@@ -1042,7 +1130,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask))
return true;
// If the operation can be done in a smaller type, do so.
- if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
return true;
// Output known-1 bits are only known if set in both the LHS & RHS.
@@ -1076,7 +1164,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (TLO.ShrinkDemandedConstant(Op, NewMask))
return true;
// If the operation can be done in a smaller type, do so.
- if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
return true;
// Output known-0 bits are only known if clear in both the LHS & RHS.
@@ -1101,7 +1189,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if ((KnownZero2 & NewMask) == NewMask)
return TLO.CombineTo(Op, Op.getOperand(1));
// If the operation can be done in a smaller type, do so.
- if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
return true;
// If all of the unknown bits are known to be zero on one side or the other
@@ -1498,13 +1586,17 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
break;
}
case ISD::AssertZext: {
- EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
- APInt InMask = APInt::getLowBitsSet(BitWidth,
- VT.getSizeInBits());
- if (SimplifyDemandedBits(Op.getOperand(0), InMask & NewMask,
+ // Demand all the bits of the input that are demanded in the output.
+ // The low bits are obvious; the high bits are demanded because we're
+ // asserting that they're zero here.
+ if (SimplifyDemandedBits(Op.getOperand(0), NewMask,
KnownZero, KnownOne, TLO, Depth+1))
return true;
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+
+ EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ APInt InMask = APInt::getLowBitsSet(BitWidth,
+ VT.getSizeInBits());
KnownZero |= ~InMask & NewMask;
break;
}
@@ -1544,7 +1636,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
KnownOne2, TLO, Depth+1))
return true;
// See if the operation should be performed at a smaller bit width.
- if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
return true;
}
// FALL THROUGH
@@ -2346,7 +2438,6 @@ const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{
/// vector. If it is invalid, don't add anything to Ops.
void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
char ConstraintLetter,
- bool hasMemory,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
switch (ConstraintLetter) {
@@ -2384,7 +2475,8 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
if (ConstraintLetter != 'n') {
int64_t Offs = GA->getOffset();
if (C) Offs += C->getZExtValue();
- Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),
+ Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),
+ C->getDebugLoc(),
Op.getValueType(), Offs));
return;
}
@@ -2507,18 +2599,18 @@ static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
/// 'm' over 'r', for example.
///
static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
- bool hasMemory, const TargetLowering &TLI,
+ const TargetLowering &TLI,
SDValue Op, SelectionDAG *DAG) {
assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
unsigned BestIdx = 0;
TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
int BestGenerality = -1;
-
+
// Loop over the options, keeping track of the most general one.
for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
TargetLowering::ConstraintType CType =
TLI.getConstraintType(OpInfo.Codes[i]);
-
+
// If this is an 'other' constraint, see if the operand is valid for it.
// For example, on X86 we might have an 'rI' constraint. If the operand
// is an integer in the range [0..31] we want to use I (saving a load
@@ -2527,7 +2619,7 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
assert(OpInfo.Codes[i].size() == 1 &&
"Unhandled multi-letter 'other' constraint");
std::vector<SDValue> ResultOps;
- TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i][0], hasMemory,
+ TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i][0],
ResultOps, *DAG);
if (!ResultOps.empty()) {
BestType = CType;
@@ -2536,6 +2628,11 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
}
}
+ // Things with matching constraints can only be registers, per gcc
+ // documentation. This mainly affects "g" constraints.
+ if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
+ continue;
+
// This constraint letter is more general than the previous one, use it.
int Generality = getConstraintGenerality(CType);
if (Generality > BestGenerality) {
@@ -2554,7 +2651,6 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
/// OpInfo.ConstraintCode and OpInfo.ConstraintType.
void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
SDValue Op,
- bool hasMemory,
SelectionDAG *DAG) const {
assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
@@ -2563,7 +2659,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
OpInfo.ConstraintCode = OpInfo.Codes[0];
OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
} else {
- ChooseConstraint(OpInfo, hasMemory, *this, Op, DAG);
+ ChooseConstraint(OpInfo, *this, Op, DAG);
}
// 'X' matches anything.
diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp
index 5240bef5a5ff..6ab0cb03c065 100644
--- a/lib/CodeGen/ShadowStackGC.cpp
+++ b/lib/CodeGen/ShadowStackGC.cpp
@@ -31,6 +31,7 @@
#include "llvm/CodeGen/GCStrategy.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Module.h"
+#include "llvm/Support/CallSite.h"
#include "llvm/Support/IRBuilder.h"
using namespace llvm;
@@ -158,7 +159,8 @@ namespace {
// Create a new invoke instruction.
Args.clear();
- Args.append(CI->op_begin() + 1, CI->op_end());
+ CallSite CS(CI);
+ Args.append(CS.arg_begin(), CS.arg_end());
InvokeInst *II = InvokeInst::Create(CI->getCalledValue(),
NewBB, CleanupBB,
@@ -194,7 +196,7 @@ Constant *ShadowStackGC::GetFrameMap(Function &F) {
unsigned NumMeta = 0;
SmallVector<Constant*,16> Metadata;
for (unsigned I = 0; I != Roots.size(); ++I) {
- Constant *C = cast<Constant>(Roots[I].first->getOperand(2));
+ Constant *C = cast<Constant>(Roots[I].first->getArgOperand(1));
if (!C->isNullValue())
NumMeta = I + 1;
Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr));
@@ -322,16 +324,16 @@ void ShadowStackGC::CollectRoots(Function &F) {
assert(Roots.empty() && "Not cleaned up?");
- SmallVector<std::pair<CallInst*,AllocaInst*>,16> MetaRoots;
+ SmallVector<std::pair<CallInst*, AllocaInst*>, 16> MetaRoots;
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;)
if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++))
if (Function *F = CI->getCalledFunction())
if (F->getIntrinsicID() == Intrinsic::gcroot) {
- std::pair<CallInst*,AllocaInst*> Pair = std::make_pair(
- CI, cast<AllocaInst>(CI->getOperand(1)->stripPointerCasts()));
- if (IsNullValue(CI->getOperand(2)))
+ std::pair<CallInst*, AllocaInst*> Pair = std::make_pair(
+ CI, cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts()));
+ if (IsNullValue(CI->getArgOperand(1)))
Roots.push_back(Pair);
else
MetaRoots.push_back(Pair);
diff --git a/lib/CodeGen/SimpleHazardRecognizer.h b/lib/CodeGen/SimpleHazardRecognizer.h
deleted file mode 100644
index f69feaf9e570..000000000000
--- a/lib/CodeGen/SimpleHazardRecognizer.h
+++ /dev/null
@@ -1,89 +0,0 @@
-//=- llvm/CodeGen/SimpleHazardRecognizer.h - Scheduling Support -*- C++ -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the SimpleHazardRecognizer class, which
-// implements hazard-avoidance heuristics for scheduling, based on the
-// scheduling itineraries specified for the target.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_SIMPLEHAZARDRECOGNIZER_H
-#define LLVM_CODEGEN_SIMPLEHAZARDRECOGNIZER_H
-
-#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-
-namespace llvm {
- /// SimpleHazardRecognizer - A *very* simple hazard recognizer. It uses
- /// a coarse classification and attempts to avoid that instructions of
- /// a given class aren't grouped too densely together.
- class SimpleHazardRecognizer : public ScheduleHazardRecognizer {
- /// Class - A simple classification for SUnits.
- enum Class {
- Other, Load, Store
- };
-
- /// Window - The Class values of the most recently issued
- /// instructions.
- Class Window[8];
-
- /// getClass - Classify the given SUnit.
- Class getClass(const SUnit *SU) {
- const MachineInstr *MI = SU->getInstr();
- const TargetInstrDesc &TID = MI->getDesc();
- if (TID.mayLoad())
- return Load;
- if (TID.mayStore())
- return Store;
- return Other;
- }
-
- /// Step - Rotate the existing entries in Window and insert the
- /// given class value in position as the most recent.
- void Step(Class C) {
- std::copy(Window+1, array_endof(Window), Window);
- Window[array_lengthof(Window)-1] = C;
- }
-
- public:
- SimpleHazardRecognizer() : Window() {
- Reset();
- }
-
- virtual HazardType getHazardType(SUnit *SU) {
- Class C = getClass(SU);
- if (C == Other)
- return NoHazard;
- unsigned Score = 0;
- for (unsigned i = 0; i != array_lengthof(Window); ++i)
- if (Window[i] == C)
- Score += i + 1;
- if (Score > array_lengthof(Window) * 2)
- return Hazard;
- return NoHazard;
- }
-
- virtual void Reset() {
- for (unsigned i = 0; i != array_lengthof(Window); ++i)
- Window[i] = Other;
- }
-
- virtual void EmitInstruction(SUnit *SU) {
- Step(getClass(SU));
- }
-
- virtual void AdvanceCycle() {
- Step(Other);
- }
- };
-}
-
-#endif
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index ed3c243ff3e4..e69d3e4fa78a 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -99,15 +99,23 @@ void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const {
///
/// This returns true if an interval was modified.
///
-bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
- LiveInterval &IntB,
+bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(const CoalescerPair &CP,
MachineInstr *CopyMI) {
+ // Bail if there is no dst interval - can happen when merging physical subreg
+ // operations.
+ if (!li_->hasInterval(CP.getDstReg()))
+ return false;
+
+ LiveInterval &IntA =
+ li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
+ LiveInterval &IntB =
+ li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex();
// BValNo is a value number in B that is defined by a copy from A. 'B3' in
// the example above.
LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx);
- assert(BLR != IntB.end() && "Live range not found!");
+ if (BLR == IntB.end()) return false;
VNInfo *BValNo = BLR->valno;
// Get the location that B is defined at. Two options: either this value has
@@ -119,7 +127,8 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
// AValNo is the value number in A that defines the copy, A3 in the example.
SlotIndex CopyUseIdx = CopyIdx.getUseIndex();
LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx);
- assert(ALR != IntA.end() && "Live range not found!");
+ // The live range might not exist after fun with physreg coalescing.
+ if (ALR == IntA.end()) return false;
VNInfo *AValNo = ALR->valno;
// If it's re-defined by an early clobber somewhere in the live range, then
// it's not safe to eliminate the copy. FIXME: This is a temporary workaround.
@@ -145,26 +154,21 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
// If AValNo is defined as a copy from IntB, we can potentially process this.
// Get the instruction that defines this value number.
- unsigned SrcReg = li_->getVNInfoSourceReg(AValNo);
- if (!SrcReg) return false; // Not defined by a copy.
-
- // If the value number is not defined by a copy instruction, ignore it.
-
- // If the source register comes from an interval other than IntB, we can't
- // handle this.
- if (SrcReg != IntB.reg) return false;
+ if (!CP.isCoalescable(AValNo->getCopy()))
+ return false;
// Get the LiveRange in IntB that this value number starts with.
LiveInterval::iterator ValLR =
IntB.FindLiveRangeContaining(AValNo->def.getPrevSlot());
- assert(ValLR != IntB.end() && "Live range not found!");
+ if (ValLR == IntB.end())
+ return false;
// Make sure that the end of the live range is inside the same block as
// CopyMI.
MachineInstr *ValLREndInst =
li_->getInstructionFromIndex(ValLR->end.getPrevSlot());
- if (!ValLREndInst ||
- ValLREndInst->getParent() != CopyMI->getParent()) return false;
+ if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent())
+ return false;
// Okay, we now know that ValLR ends in the same block that the CopyMI
// live-range starts. If there are no intervening live ranges between them in
@@ -207,6 +211,8 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
// physreg has sub-registers, update their live intervals as well.
if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) {
+ if (!li_->hasInterval(*SR))
+ continue;
LiveInterval &SRLI = li_->getInterval(*SR);
SRLI.addRange(LiveRange(FillerStart, FillerEnd,
SRLI.getNextValue(FillerStart, 0, true,
@@ -216,7 +222,6 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
// Okay, merge "B1" into the same value number as "B0".
if (BValNo != ValLR->valno) {
- IntB.addKills(ValLR->valno, BValNo->kills);
IntB.MergeValueNumberInto(BValNo, ValLR->valno);
}
DEBUG({
@@ -230,13 +235,12 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true);
if (UIdx != -1) {
ValLREndInst->getOperand(UIdx).setIsKill(false);
- ValLR->valno->removeKill(FillerStart);
}
// If the copy instruction was killing the destination register before the
// merge, find the last use and trim the live range. That will also add the
// isKill marker.
- if (ALR->valno->isKill(CopyIdx))
+ if (ALR->end == CopyIdx)
TrimLiveIntervalToLastUse(CopyUseIdx, CopyMI->getParent(), IntA, ALR);
++numExtends;
@@ -304,23 +308,31 @@ TransferImplicitOps(MachineInstr *MI, MachineInstr *NewMI) {
///
/// This returns true if an interval was modified.
///
-bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
- LiveInterval &IntB,
+bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
MachineInstr *CopyMI) {
- SlotIndex CopyIdx =
- li_->getInstructionIndex(CopyMI).getDefIndex();
-
// FIXME: For now, only eliminate the copy by commuting its def when the
// source register is a virtual register. We want to guard against cases
// where the copy is a back edge copy and commuting the def lengthen the
// live interval of the source register to the entire loop.
- if (TargetRegisterInfo::isPhysicalRegister(IntA.reg))
+ if (CP.isPhys() && CP.isFlipped())
+ return false;
+
+ // Bail if there is no dst interval.
+ if (!li_->hasInterval(CP.getDstReg()))
return false;
+ SlotIndex CopyIdx =
+ li_->getInstructionIndex(CopyMI).getDefIndex();
+
+ LiveInterval &IntA =
+ li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
+ LiveInterval &IntB =
+ li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
+
// BValNo is a value number in B that is defined by a copy from A. 'B3' in
// the example above.
LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx);
- assert(BLR != IntB.end() && "Live range not found!");
+ if (BLR == IntB.end()) return false;
VNInfo *BValNo = BLR->valno;
// Get the location that B is defined at. Two options: either this value has
@@ -342,6 +354,8 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
AValNo->isUnused() || AValNo->hasPHIKill())
return false;
MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def);
+ if (!DefMI)
+ return false;
const TargetInstrDesc &TID = DefMI->getDesc();
if (!TID.isCommutable())
return false;
@@ -380,7 +394,8 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
// clobbers from the superreg.
if (BHasSubRegs)
for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR)
- if (HasOtherReachingDefs(IntA, li_->getInterval(*SR), AValNo, 0))
+ if (li_->hasInterval(*SR) &&
+ HasOtherReachingDefs(IntA, li_->getInterval(*SR), AValNo, 0))
return false;
// If some of the uses of IntA.reg is already coalesced away, return false.
@@ -413,7 +428,6 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
bool BHasPHIKill = BValNo->hasPHIKill();
SmallVector<VNInfo*, 4> BDeadValNos;
- VNInfo::KillSet BKills;
std::map<SlotIndex, SlotIndex> BExtend;
// If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
@@ -424,8 +438,6 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
// C = A<kill>
// ...
// = B
- //
- // then do not add kills of A to the newly created B interval.
bool Extended = BLR->end > ALR->end && ALR->end != ALR->start;
if (Extended)
BExtend[ALR->end] = BLR->end;
@@ -448,34 +460,38 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
if (ULR == IntA.end() || ULR->valno != AValNo)
continue;
- UseMO.setReg(NewReg);
+ if (TargetRegisterInfo::isPhysicalRegister(NewReg))
+ UseMO.substPhysReg(NewReg, *tri_);
+ else
+ UseMO.setReg(NewReg);
if (UseMI == CopyMI)
continue;
if (UseMO.isKill()) {
if (Extended)
UseMO.setIsKill(false);
- else
- BKills.push_back(UseIdx.getDefIndex());
}
unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
- if (!tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
+ if (UseMI->isCopy()) {
+ if (UseMI->getOperand(0).getReg() != IntB.reg ||
+ UseMI->getOperand(0).getSubReg())
+ continue;
+ } else if (tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)){
+ if (DstReg != IntB.reg || DstSubIdx)
+ continue;
+ } else
continue;
- if (DstReg == IntB.reg && DstSubIdx == 0) {
- // This copy will become a noop. If it's defining a new val#,
- // remove that val# as well. However this live range is being
- // extended to the end of the existing live range defined by the copy.
- SlotIndex DefIdx = UseIdx.getDefIndex();
- const LiveRange *DLR = IntB.getLiveRangeContaining(DefIdx);
- BHasPHIKill |= DLR->valno->hasPHIKill();
- assert(DLR->valno->def == DefIdx);
- BDeadValNos.push_back(DLR->valno);
- BExtend[DLR->start] = DLR->end;
- JoinedCopies.insert(UseMI);
- // If this is a kill but it's going to be removed, the last use
- // of the same val# is the new kill.
- if (UseMO.isKill())
- BKills.pop_back();
- }
+ // This copy will become a noop. If it's defining a new val#,
+ // remove that val# as well. However this live range is being
+ // extended to the end of the existing live range defined by the copy.
+ SlotIndex DefIdx = UseIdx.getDefIndex();
+ const LiveRange *DLR = IntB.getLiveRangeContaining(DefIdx);
+ if (!DLR)
+ continue;
+ BHasPHIKill |= DLR->valno->hasPHIKill();
+ assert(DLR->valno->def == DefIdx);
+ BDeadValNos.push_back(DLR->valno);
+ BExtend[DLR->start] = DLR->end;
+ JoinedCopies.insert(UseMI);
}
// We need to insert a new liverange: [ALR.start, LastUse). It may be we can
@@ -490,24 +506,21 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
VNInfo *DeadVNI = BDeadValNos[i];
if (BHasSubRegs) {
for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) {
+ if (!li_->hasInterval(*SR))
+ continue;
LiveInterval &SRLI = li_->getInterval(*SR);
- const LiveRange *SRLR = SRLI.getLiveRangeContaining(DeadVNI->def);
- SRLI.removeValNo(SRLR->valno);
+ if (const LiveRange *SRLR = SRLI.getLiveRangeContaining(DeadVNI->def))
+ SRLI.removeValNo(SRLR->valno);
}
}
IntB.removeValNo(BDeadValNos[i]);
}
// Extend BValNo by merging in IntA live ranges of AValNo. Val# definition
- // is updated. Kills are also updated.
+ // is updated.
VNInfo *ValNo = BValNo;
ValNo->def = AValNo->def;
ValNo->setCopy(0);
- for (unsigned j = 0, ee = ValNo->kills.size(); j != ee; ++j) {
- if (ValNo->kills[j] != BLR->end)
- BKills.push_back(ValNo->kills[j]);
- }
- ValNo->kills.clear();
for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
AI != AE; ++AI) {
if (AI->valno != AValNo) continue;
@@ -517,18 +530,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
if (EI != BExtend.end())
End = EI->second;
IntB.addRange(LiveRange(AI->start, End, ValNo));
-
- // If the IntB live range is assigned to a physical register, and if that
- // physreg has sub-registers, update their live intervals as well.
- if (BHasSubRegs) {
- for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) {
- LiveInterval &SRLI = li_->getInterval(*SR);
- SRLI.MergeInClobberRange(*li_, AI->start, End,
- li_->getVNInfoAllocator());
- }
- }
}
- IntB.addKills(ValNo, BKills);
ValNo->setHasPHIKill(BHasPHIKill);
DEBUG({
@@ -621,7 +623,11 @@ SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(SlotIndex CopyIdx,
// of last use.
LastUse->setIsKill();
removeRange(li, LastUseIdx.getDefIndex(), LR->end, li_, tri_);
- LR->valno->addKill(LastUseIdx.getDefIndex());
+ if (LastUseMI->isCopy()) {
+ MachineOperand &DefMO = LastUseMI->getOperand(0);
+ if (DefMO.getReg() == li.reg && !DefMO.getSubReg())
+ DefMO.setIsDead();
+ }
unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
if (tii_->isMoveInstr(*LastUseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
DstReg == li.reg && DstSubIdx == 0) {
@@ -663,6 +669,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
ValNo->isUnused() || ValNo->hasPHIKill())
return false;
MachineInstr *DefMI = li_->getInstructionFromIndex(ValNo->def);
+ assert(DefMI && "Defining instruction disappeared");
const TargetInstrDesc &TID = DefMI->getDesc();
if (!TID.isAsCheapAsAMove())
return false;
@@ -701,33 +708,20 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
return false;
}
- SlotIndex DefIdx = CopyIdx.getDefIndex();
- const LiveRange *DLR= li_->getInterval(DstReg).getLiveRangeContaining(DefIdx);
- DLR->valno->setCopy(0);
- // Don't forget to update sub-register intervals.
- if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
- for (const unsigned* SR = tri_->getSubRegisters(DstReg); *SR; ++SR) {
- if (!li_->hasInterval(*SR))
- continue;
- const LiveRange *DLR =
- li_->getInterval(*SR).getLiveRangeContaining(DefIdx);
- if (DLR && DLR->valno->getCopy() == CopyMI)
- DLR->valno->setCopy(0);
- }
- }
+ RemoveCopyFlag(DstReg, CopyMI);
// If copy kills the source register, find the last use and propagate
// kill.
bool checkForDeadDef = false;
MachineBasicBlock *MBB = CopyMI->getParent();
- if (SrcLR->valno->isKill(DefIdx))
+ if (SrcLR->end == CopyIdx.getDefIndex())
if (!TrimLiveIntervalToLastUse(CopyIdx, MBB, SrcInt, SrcLR)) {
checkForDeadDef = true;
}
MachineBasicBlock::iterator MII =
llvm::next(MachineBasicBlock::iterator(CopyMI));
- tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, tri_);
+ tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, *tri_);
MachineInstr *NewMI = prior(MII);
if (checkForDeadDef) {
@@ -747,24 +741,8 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
MachineOperand &MO = CopyMI->getOperand(i);
if (MO.isReg() && MO.isImplicit())
NewMI->addOperand(MO);
- if (MO.isDef() && li_->hasInterval(MO.getReg())) {
- unsigned Reg = MO.getReg();
- const LiveRange *DLR =
- li_->getInterval(Reg).getLiveRangeContaining(DefIdx);
- if (DLR && DLR->valno->getCopy() == CopyMI)
- DLR->valno->setCopy(0);
- // Handle subregs as well
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- for (const unsigned* SR = tri_->getSubRegisters(Reg); *SR; ++SR) {
- if (!li_->hasInterval(*SR))
- continue;
- const LiveRange *DLR =
- li_->getInterval(*SR).getLiveRangeContaining(DefIdx);
- if (DLR && DLR->valno->getCopy() == CopyMI)
- DLR->valno->setCopy(0);
- }
- }
- }
+ if (MO.isDef())
+ RemoveCopyFlag(MO.getReg(), CopyMI);
}
TransferImplicitOps(CopyMI, NewMI);
@@ -783,84 +761,72 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
/// being updated is not zero, make sure to set it to the correct physical
/// subregister.
void
-SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg,
- unsigned SubIdx) {
- bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
- if (DstIsPhys && SubIdx) {
- // Figure out the real physical register we are updating with.
- DstReg = tri_->getSubReg(DstReg, SubIdx);
- SubIdx = 0;
- }
-
- // Copy the register use-list before traversing it. We may be adding operands
- // and invalidating pointers.
- SmallVector<std::pair<MachineInstr*, unsigned>, 32> reglist;
- for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg),
- E = mri_->reg_end(); I != E; ++I)
- reglist.push_back(std::make_pair(&*I, I.getOperandNo()));
-
- for (unsigned N=0; N != reglist.size(); ++N) {
- MachineInstr *UseMI = reglist[N].first;
- MachineOperand &O = UseMI->getOperand(reglist[N].second);
- unsigned OldSubIdx = O.getSubReg();
+SimpleRegisterCoalescing::UpdateRegDefsUses(const CoalescerPair &CP) {
+ bool DstIsPhys = CP.isPhys();
+ unsigned SrcReg = CP.getSrcReg();
+ unsigned DstReg = CP.getDstReg();
+ unsigned SubIdx = CP.getSubIdx();
+
+ for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg);
+ MachineInstr *UseMI = I.skipInstruction();) {
+ // A PhysReg copy that won't be coalesced can perhaps be rematerialized
+ // instead.
if (DstIsPhys) {
- unsigned UseDstReg = DstReg;
- if (OldSubIdx)
- UseDstReg = tri_->getSubReg(DstReg, OldSubIdx);
-
unsigned CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx;
if (tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg,
CopySrcSubIdx, CopyDstSubIdx) &&
- CopySrcSubIdx == 0 &&
- CopyDstSubIdx == 0 &&
- CopySrcReg != CopyDstReg &&
- CopySrcReg == SrcReg && CopyDstReg != UseDstReg) {
- // If the use is a copy and it won't be coalesced away, and its source
- // is defined by a trivial computation, try to rematerialize it instead.
- if (!JoinedCopies.count(UseMI) &&
- ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg,
- CopyDstSubIdx, UseMI))
- continue;
- }
+ CopySrcSubIdx == 0 && CopyDstSubIdx == 0 &&
+ CopySrcReg != CopyDstReg && CopySrcReg == SrcReg &&
+ CopyDstReg != DstReg && !JoinedCopies.count(UseMI) &&
+ ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg, 0,
+ UseMI))
+ continue;
- O.setReg(UseDstReg);
- O.setSubReg(0);
- if (OldSubIdx) {
- // Def and kill of subregister of a virtual register actually defs and
- // kills the whole register. Add imp-defs and imp-kills as needed.
- if (O.isDef()) {
- if(O.isDead())
- UseMI->addRegisterDead(DstReg, tri_, true);
- else
- UseMI->addRegisterDefined(DstReg, tri_);
- } else if (!O.isUndef() &&
- (O.isKill() ||
- UseMI->isRegTiedToDefOperand(&O-&UseMI->getOperand(0))))
- UseMI->addRegisterKilled(DstReg, tri_, true);
- }
+ if (UseMI->isCopy() &&
+ !UseMI->getOperand(1).getSubReg() &&
+ !UseMI->getOperand(0).getSubReg() &&
+ UseMI->getOperand(1).getReg() == SrcReg &&
+ UseMI->getOperand(0).getReg() != SrcReg &&
+ UseMI->getOperand(0).getReg() != DstReg &&
+ !JoinedCopies.count(UseMI) &&
+ ReMaterializeTrivialDef(li_->getInterval(SrcReg),
+ UseMI->getOperand(0).getReg(), 0, UseMI))
+ continue;
+ }
- DEBUG({
- dbgs() << "\t\tupdated: ";
- if (!UseMI->isDebugValue())
- dbgs() << li_->getInstructionIndex(UseMI) << "\t";
- dbgs() << *UseMI;
- });
- continue;
+ SmallVector<unsigned,8> Ops;
+ bool Reads, Writes;
+ tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
+ bool Kills = false, Deads = false;
+
+ // Replace SrcReg with DstReg in all UseMI operands.
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ MachineOperand &MO = UseMI->getOperand(Ops[i]);
+ Kills |= MO.isKill();
+ Deads |= MO.isDead();
+
+ if (DstIsPhys)
+ MO.substPhysReg(DstReg, *tri_);
+ else
+ MO.substVirtReg(DstReg, SubIdx, *tri_);
}
- // Sub-register indexes goes from small to large. e.g.
- // RAX: 1 -> AL, 2 -> AX, 3 -> EAX
- // EAX: 1 -> AL, 2 -> AX
- // So RAX's sub-register 2 is AX, RAX's sub-regsiter 3 is EAX, whose
- // sub-register 2 is also AX.
- //
- // FIXME: Properly compose subreg indices for all targets.
- //
- if (SubIdx && OldSubIdx && SubIdx != OldSubIdx)
- ;
- else if (SubIdx)
- O.setSubReg(SubIdx);
- O.setReg(DstReg);
+ // This instruction is a copy that will be removed.
+ if (JoinedCopies.count(UseMI))
+ continue;
+
+ if (SubIdx) {
+ // If UseMI was a simple SrcReg def, make sure we didn't turn it into a
+ // read-modify-write of DstReg.
+ if (Deads)
+ UseMI->addRegisterDead(DstReg, tri_);
+ else if (!Reads && Writes)
+ UseMI->addRegisterDefined(DstReg, tri_);
+
+ // Kill flags apply to the whole physical register.
+ if (DstIsPhys && Kills)
+ UseMI->addRegisterKilled(DstReg, tri_);
+ }
DEBUG({
dbgs() << "\t\tupdated: ";
@@ -869,15 +835,15 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg,
dbgs() << *UseMI;
});
+
// After updating the operand, check if the machine instruction has
// become a copy. If so, update its val# information.
- if (JoinedCopies.count(UseMI))
+ const TargetInstrDesc &TID = UseMI->getDesc();
+ if (DstIsPhys || TID.getNumDefs() != 1 || TID.getNumOperands() <= 2)
continue;
- const TargetInstrDesc &TID = UseMI->getDesc();
unsigned CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx;
- if (TID.getNumDefs() == 1 && TID.getNumOperands() > 2 &&
- tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg,
+ if (tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg,
CopySrcSubIdx, CopyDstSubIdx) &&
CopySrcReg != CopyDstReg &&
(TargetRegisterInfo::isVirtualRegister(CopyDstReg) ||
@@ -945,6 +911,27 @@ bool SimpleRegisterCoalescing::RemoveDeadDef(LiveInterval &li,
return removeIntervalIfEmpty(li, li_, tri_);
}
+void SimpleRegisterCoalescing::RemoveCopyFlag(unsigned DstReg,
+ const MachineInstr *CopyMI) {
+ SlotIndex DefIdx = li_->getInstructionIndex(CopyMI).getDefIndex();
+ if (li_->hasInterval(DstReg)) {
+ LiveInterval &LI = li_->getInterval(DstReg);
+ if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
+ if (LR->valno->getCopy() == CopyMI)
+ LR->valno->setCopy(0);
+ }
+ if (!TargetRegisterInfo::isPhysicalRegister(DstReg))
+ return;
+ for (const unsigned* AS = tri_->getAliasSet(DstReg); *AS; ++AS) {
+ if (!li_->hasInterval(*AS))
+ continue;
+ LiveInterval &LI = li_->getInterval(*AS);
+ if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
+ if (LR->valno->getCopy() == CopyMI)
+ LR->valno->setCopy(0);
+ }
+}
+
/// PropagateDeadness - Propagate the dead marker to the instruction which
/// defines the val#.
static void PropagateDeadness(LiveInterval &li, MachineInstr *CopyMI,
@@ -978,8 +965,8 @@ SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li,
// Live-in to the function but dead. Remove it from entry live-in set.
if (mf_->begin()->isLiveIn(li.reg))
mf_->begin()->removeLiveIn(li.reg);
- const LiveRange *LR = li.getLiveRangeContaining(CopyIdx);
- removeRange(li, LR->start, LR->end, li_, tri_);
+ if (const LiveRange *LR = li.getLiveRangeContaining(CopyIdx))
+ removeRange(li, LR->start, LR->end, li_, tri_);
return removeIntervalIfEmpty(li, li_, tri_);
}
@@ -1017,147 +1004,12 @@ SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li,
// val#, then propagate the dead marker.
PropagateDeadness(li, CopyMI, RemoveStart, li_, tri_);
++numDeadValNo;
-
- if (LR->valno->isKill(RemoveEnd))
- LR->valno->removeKill(RemoveEnd);
}
removeRange(li, RemoveStart, RemoveEnd, li_, tri_);
return removeIntervalIfEmpty(li, li_, tri_);
}
-/// CanCoalesceWithImpDef - Returns true if the specified copy instruction
-/// from an implicit def to another register can be coalesced away.
-bool SimpleRegisterCoalescing::CanCoalesceWithImpDef(MachineInstr *CopyMI,
- LiveInterval &li,
- LiveInterval &ImpLi) const{
- if (!CopyMI->killsRegister(ImpLi.reg))
- return false;
- // Make sure this is the only use.
- for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(ImpLi.reg),
- UE = mri_->use_end(); UI != UE;) {
- MachineInstr *UseMI = &*UI;
- ++UI;
- if (CopyMI == UseMI || JoinedCopies.count(UseMI))
- continue;
- return false;
- }
- return true;
-}
-
-
-/// isWinToJoinVRWithSrcPhysReg - Return true if it's worth while to join a
-/// a virtual destination register with physical source register.
-bool
-SimpleRegisterCoalescing::isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI,
- MachineBasicBlock *CopyMBB,
- LiveInterval &DstInt,
- LiveInterval &SrcInt) {
- // If the virtual register live interval is long but it has low use desity,
- // do not join them, instead mark the physical register as its allocation
- // preference.
- const TargetRegisterClass *RC = mri_->getRegClass(DstInt.reg);
- unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
- unsigned Length = li_->getApproximateInstructionCount(DstInt);
- if (Length > Threshold &&
- std::distance(mri_->use_nodbg_begin(DstInt.reg),
- mri_->use_nodbg_end()) * Threshold < Length)
- return false;
-
- // If the virtual register live interval extends into a loop, turn down
- // aggressiveness.
- SlotIndex CopyIdx =
- li_->getInstructionIndex(CopyMI).getDefIndex();
- const MachineLoop *L = loopInfo->getLoopFor(CopyMBB);
- if (!L) {
- // Let's see if the virtual register live interval extends into the loop.
- LiveInterval::iterator DLR = DstInt.FindLiveRangeContaining(CopyIdx);
- assert(DLR != DstInt.end() && "Live range not found!");
- DLR = DstInt.FindLiveRangeContaining(DLR->end.getNextSlot());
- if (DLR != DstInt.end()) {
- CopyMBB = li_->getMBBFromIndex(DLR->start);
- L = loopInfo->getLoopFor(CopyMBB);
- }
- }
-
- if (!L || Length <= Threshold)
- return true;
-
- SlotIndex UseIdx = CopyIdx.getUseIndex();
- LiveInterval::iterator SLR = SrcInt.FindLiveRangeContaining(UseIdx);
- MachineBasicBlock *SMBB = li_->getMBBFromIndex(SLR->start);
- if (loopInfo->getLoopFor(SMBB) != L) {
- if (!loopInfo->isLoopHeader(CopyMBB))
- return false;
- // If vr's live interval extends pass the loop header, do not join.
- for (MachineBasicBlock::succ_iterator SI = CopyMBB->succ_begin(),
- SE = CopyMBB->succ_end(); SI != SE; ++SI) {
- MachineBasicBlock *SuccMBB = *SI;
- if (SuccMBB == CopyMBB)
- continue;
- if (DstInt.overlaps(li_->getMBBStartIdx(SuccMBB),
- li_->getMBBEndIdx(SuccMBB)))
- return false;
- }
- }
- return true;
-}
-
-/// isWinToJoinVRWithDstPhysReg - Return true if it's worth while to join a
-/// copy from a virtual source register to a physical destination register.
-bool
-SimpleRegisterCoalescing::isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI,
- MachineBasicBlock *CopyMBB,
- LiveInterval &DstInt,
- LiveInterval &SrcInt) {
- // If the virtual register live interval is long but it has low use density,
- // do not join them, instead mark the physical register as its allocation
- // preference.
- const TargetRegisterClass *RC = mri_->getRegClass(SrcInt.reg);
- unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
- unsigned Length = li_->getApproximateInstructionCount(SrcInt);
- if (Length > Threshold &&
- std::distance(mri_->use_nodbg_begin(SrcInt.reg),
- mri_->use_nodbg_end()) * Threshold < Length)
- return false;
-
- if (SrcInt.empty())
- // Must be implicit_def.
- return false;
-
- // If the virtual register live interval is defined or cross a loop, turn
- // down aggressiveness.
- SlotIndex CopyIdx =
- li_->getInstructionIndex(CopyMI).getDefIndex();
- SlotIndex UseIdx = CopyIdx.getUseIndex();
- LiveInterval::iterator SLR = SrcInt.FindLiveRangeContaining(UseIdx);
- assert(SLR != SrcInt.end() && "Live range not found!");
- SLR = SrcInt.FindLiveRangeContaining(SLR->start.getPrevSlot());
- if (SLR == SrcInt.end())
- return true;
- MachineBasicBlock *SMBB = li_->getMBBFromIndex(SLR->start);
- const MachineLoop *L = loopInfo->getLoopFor(SMBB);
-
- if (!L || Length <= Threshold)
- return true;
-
- if (loopInfo->getLoopFor(CopyMBB) != L) {
- if (SMBB != L->getLoopLatch())
- return false;
- // If vr's live interval is extended from before the loop latch, do not
- // join.
- for (MachineBasicBlock::pred_iterator PI = SMBB->pred_begin(),
- PE = SMBB->pred_end(); PI != PE; ++PI) {
- MachineBasicBlock *PredMBB = *PI;
- if (PredMBB == SMBB)
- continue;
- if (SrcInt.overlaps(li_->getMBBStartIdx(PredMBB),
- li_->getMBBEndIdx(PredMBB)))
- return false;
- }
- }
- return true;
-}
/// isWinToJoinCrossClass - Return true if it's profitable to coalesce
/// two virtual registers from different register classes.
@@ -1203,157 +1055,6 @@ SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned SrcReg,
return true;
}
-/// HasIncompatibleSubRegDefUse - If we are trying to coalesce a virtual
-/// register with a physical register, check if any of the virtual register
-/// operand is a sub-register use or def. If so, make sure it won't result
-/// in an illegal extract_subreg or insert_subreg instruction. e.g.
-/// vr1024 = extract_subreg vr1025, 1
-/// ...
-/// vr1024 = mov8rr AH
-/// If vr1024 is coalesced with AH, the extract_subreg is now illegal since
-/// AH does not have a super-reg whose sub-register 1 is AH.
-bool
-SimpleRegisterCoalescing::HasIncompatibleSubRegDefUse(MachineInstr *CopyMI,
- unsigned VirtReg,
- unsigned PhysReg) {
- for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(VirtReg),
- E = mri_->reg_end(); I != E; ++I) {
- MachineOperand &O = I.getOperand();
- if (O.isDebug())
- continue;
- MachineInstr *MI = &*I;
- if (MI == CopyMI || JoinedCopies.count(MI))
- continue;
- unsigned SubIdx = O.getSubReg();
- if (SubIdx && !tri_->getSubReg(PhysReg, SubIdx))
- return true;
- if (MI->isExtractSubreg()) {
- SubIdx = MI->getOperand(2).getImm();
- if (O.isUse() && !tri_->getSubReg(PhysReg, SubIdx))
- return true;
- if (O.isDef()) {
- unsigned SrcReg = MI->getOperand(1).getReg();
- const TargetRegisterClass *RC =
- TargetRegisterInfo::isPhysicalRegister(SrcReg)
- ? tri_->getPhysicalRegisterRegClass(SrcReg)
- : mri_->getRegClass(SrcReg);
- if (!tri_->getMatchingSuperReg(PhysReg, SubIdx, RC))
- return true;
- }
- }
- if (MI->isInsertSubreg() || MI->isSubregToReg()) {
- SubIdx = MI->getOperand(3).getImm();
- if (VirtReg == MI->getOperand(0).getReg()) {
- if (!tri_->getSubReg(PhysReg, SubIdx))
- return true;
- } else {
- unsigned DstReg = MI->getOperand(0).getReg();
- const TargetRegisterClass *RC =
- TargetRegisterInfo::isPhysicalRegister(DstReg)
- ? tri_->getPhysicalRegisterRegClass(DstReg)
- : mri_->getRegClass(DstReg);
- if (!tri_->getMatchingSuperReg(PhysReg, SubIdx, RC))
- return true;
- }
- }
- }
- return false;
-}
-
-
-/// CanJoinExtractSubRegToPhysReg - Return true if it's possible to coalesce
-/// an extract_subreg where dst is a physical register, e.g.
-/// cl = EXTRACT_SUBREG reg1024, 1
-bool
-SimpleRegisterCoalescing::CanJoinExtractSubRegToPhysReg(unsigned DstReg,
- unsigned SrcReg, unsigned SubIdx,
- unsigned &RealDstReg) {
- const TargetRegisterClass *RC = mri_->getRegClass(SrcReg);
- RealDstReg = tri_->getMatchingSuperReg(DstReg, SubIdx, RC);
- if (!RealDstReg) {
- DEBUG(dbgs() << "\tIncompatible source regclass: "
- << "none of the super-registers of " << tri_->getName(DstReg)
- << " are in " << RC->getName() << ".\n");
- return false;
- }
-
- LiveInterval &RHS = li_->getInterval(SrcReg);
- // For this type of EXTRACT_SUBREG, conservatively
- // check if the live interval of the source register interfere with the
- // actual super physical register we are trying to coalesce with.
- if (li_->hasInterval(RealDstReg) &&
- RHS.overlaps(li_->getInterval(RealDstReg))) {
- DEBUG({
- dbgs() << "\t\tInterfere with register ";
- li_->getInterval(RealDstReg).print(dbgs(), tri_);
- });
- return false; // Not coalescable
- }
- for (const unsigned* SR = tri_->getSubRegisters(RealDstReg); *SR; ++SR)
- // Do not check DstReg or its sub-register. JoinIntervals() will take care
- // of that.
- if (*SR != DstReg &&
- !tri_->isSubRegister(DstReg, *SR) &&
- li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
- DEBUG({
- dbgs() << "\t\tInterfere with sub-register ";
- li_->getInterval(*SR).print(dbgs(), tri_);
- });
- return false; // Not coalescable
- }
- return true;
-}
-
-/// CanJoinInsertSubRegToPhysReg - Return true if it's possible to coalesce
-/// an insert_subreg where src is a physical register, e.g.
-/// reg1024 = INSERT_SUBREG reg1024, c1, 0
-bool
-SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg,
- unsigned SrcReg, unsigned SubIdx,
- unsigned &RealSrcReg) {
- const TargetRegisterClass *RC = mri_->getRegClass(DstReg);
- RealSrcReg = tri_->getMatchingSuperReg(SrcReg, SubIdx, RC);
- if (!RealSrcReg) {
- DEBUG(dbgs() << "\tIncompatible destination regclass: "
- << "none of the super-registers of " << tri_->getName(SrcReg)
- << " are in " << RC->getName() << ".\n");
- return false;
- }
-
- LiveInterval &LHS = li_->getInterval(DstReg);
- if (li_->hasInterval(RealSrcReg) &&
- LHS.overlaps(li_->getInterval(RealSrcReg))) {
- DEBUG({
- dbgs() << "\t\tInterfere with register ";
- li_->getInterval(RealSrcReg).print(dbgs(), tri_);
- });
- return false; // Not coalescable
- }
- for (const unsigned* SR = tri_->getSubRegisters(RealSrcReg); *SR; ++SR)
- // Do not check SrcReg or its sub-register. JoinIntervals() will take care
- // of that.
- if (*SR != SrcReg &&
- !tri_->isSubRegister(SrcReg, *SR) &&
- li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) {
- DEBUG({
- dbgs() << "\t\tInterfere with sub-register ";
- li_->getInterval(*SR).print(dbgs(), tri_);
- });
- return false; // Not coalescable
- }
- return true;
-}
-
-/// getRegAllocPreference - Return register allocation preference register.
-///
-static unsigned getRegAllocPreference(unsigned Reg, MachineFunction &MF,
- MachineRegisterInfo *MRI,
- const TargetRegisterInfo *TRI) {
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
- return 0;
- std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(Reg);
- return TRI->ResolveRegAllocHint(Hint.first, Hint.second, MF);
-}
/// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
/// which are the src/dst of the copy instruction CopyMI. This returns true
@@ -1369,354 +1070,97 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
DEBUG(dbgs() << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI);
- unsigned SrcReg, DstReg, SrcSubIdx = 0, DstSubIdx = 0;
- bool isExtSubReg = CopyMI->isExtractSubreg();
- bool isInsSubReg = CopyMI->isInsertSubreg();
- bool isSubRegToReg = CopyMI->isSubregToReg();
- unsigned SubIdx = 0;
- if (isExtSubReg) {
- DstReg = CopyMI->getOperand(0).getReg();
- DstSubIdx = CopyMI->getOperand(0).getSubReg();
- SrcReg = CopyMI->getOperand(1).getReg();
- SrcSubIdx = CopyMI->getOperand(2).getImm();
- } else if (isInsSubReg || isSubRegToReg) {
- DstReg = CopyMI->getOperand(0).getReg();
- DstSubIdx = CopyMI->getOperand(3).getImm();
- SrcReg = CopyMI->getOperand(2).getReg();
- SrcSubIdx = CopyMI->getOperand(2).getSubReg();
- if (SrcSubIdx && SrcSubIdx != DstSubIdx) {
- // r1025 = INSERT_SUBREG r1025, r1024<2>, 2 Then r1024 has already been
- // coalesced to a larger register so the subreg indices cancel out.
- DEBUG(dbgs() << "\tSource of insert_subreg or subreg_to_reg is already "
- "coalesced to another register.\n");
- return false; // Not coalescable.
- }
- } else if (tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
- if (SrcSubIdx && DstSubIdx && SrcSubIdx != DstSubIdx) {
- // e.g. %reg16404:1<def> = MOV8rr %reg16412:2<kill>
- Again = true;
- return false; // Not coalescable.
- }
- } else {
- llvm_unreachable("Unrecognized copy instruction!");
+ CoalescerPair CP(*tii_, *tri_);
+ if (!CP.setRegisters(CopyMI)) {
+ DEBUG(dbgs() << "\tNot coalescable.\n");
+ return false;
}
// If they are already joined we continue.
- if (SrcReg == DstReg) {
+ if (CP.getSrcReg() == CP.getDstReg()) {
DEBUG(dbgs() << "\tCopy already coalesced.\n");
return false; // Not coalescable.
}
- bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
- bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
-
- // If they are both physical registers, we cannot join them.
- if (SrcIsPhys && DstIsPhys) {
- DEBUG(dbgs() << "\tCan not coalesce physregs.\n");
- return false; // Not coalescable.
- }
-
- // We only join virtual registers with allocatable physical registers.
- if (SrcIsPhys && !allocatableRegs_[SrcReg]) {
- DEBUG(dbgs() << "\tSrc reg is unallocatable physreg.\n");
- return false; // Not coalescable.
- }
- if (DstIsPhys && !allocatableRegs_[DstReg]) {
- DEBUG(dbgs() << "\tDst reg is unallocatable physreg.\n");
- return false; // Not coalescable.
- }
-
- // We cannot handle dual subreg indices and mismatched classes at the same
- // time.
- if (SrcSubIdx && DstSubIdx && differingRegisterClasses(SrcReg, DstReg)) {
- DEBUG(dbgs() << "\tCannot handle subreg indices and mismatched classes.\n");
- return false;
- }
+ DEBUG(dbgs() << "\tConsidering merging %reg" << CP.getSrcReg());
- // Check that a physical source register is compatible with dst regclass
- if (SrcIsPhys) {
- unsigned SrcSubReg = SrcSubIdx ?
- tri_->getSubReg(SrcReg, SrcSubIdx) : SrcReg;
- const TargetRegisterClass *DstRC = mri_->getRegClass(DstReg);
- const TargetRegisterClass *DstSubRC = DstRC;
- if (DstSubIdx)
- DstSubRC = DstRC->getSubRegisterRegClass(DstSubIdx);
- assert(DstSubRC && "Illegal subregister index");
- if (!DstSubRC->contains(SrcSubReg)) {
- DEBUG(dbgs() << "\tIncompatible destination regclass: "
- << "none of the super-registers of "
- << tri_->getName(SrcSubReg) << " are in "
- << DstSubRC->getName() << ".\n");
- return false; // Not coalescable.
- }
- }
-
- // Check that a physical dst register is compatible with source regclass
- if (DstIsPhys) {
- unsigned DstSubReg = DstSubIdx ?
- tri_->getSubReg(DstReg, DstSubIdx) : DstReg;
- const TargetRegisterClass *SrcRC = mri_->getRegClass(SrcReg);
- const TargetRegisterClass *SrcSubRC = SrcRC;
- if (SrcSubIdx)
- SrcSubRC = SrcRC->getSubRegisterRegClass(SrcSubIdx);
- assert(SrcSubRC && "Illegal subregister index");
- if (!SrcSubRC->contains(DstSubReg)) {
- DEBUG(dbgs() << "\tIncompatible source regclass: "
- << "none of the super-registers of "
- << tri_->getName(DstSubReg) << " are in "
- << SrcSubRC->getName() << ".\n");
- (void)DstSubReg;
- return false; // Not coalescable.
+ // Enforce policies.
+ if (CP.isPhys()) {
+ DEBUG(dbgs() <<" with physreg %" << tri_->getName(CP.getDstReg()) << "\n");
+ // Only coalesce to allocatable physreg.
+ if (!allocatableRegs_[CP.getDstReg()]) {
+ DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n");
+ return false; // Not coalescable.
}
- }
-
- // Should be non-null only when coalescing to a sub-register class.
- bool CrossRC = false;
- const TargetRegisterClass *SrcRC= SrcIsPhys ? 0 : mri_->getRegClass(SrcReg);
- const TargetRegisterClass *DstRC= DstIsPhys ? 0 : mri_->getRegClass(DstReg);
- const TargetRegisterClass *NewRC = NULL;
- unsigned RealDstReg = 0;
- unsigned RealSrcReg = 0;
- if (isExtSubReg || isInsSubReg || isSubRegToReg) {
- SubIdx = CopyMI->getOperand(isExtSubReg ? 2 : 3).getImm();
- if (SrcIsPhys && isExtSubReg) {
- // r1024 = EXTRACT_SUBREG EAX, 0 then r1024 is really going to be
- // coalesced with AX.
- unsigned DstSubIdx = CopyMI->getOperand(0).getSubReg();
- if (DstSubIdx) {
- // r1024<2> = EXTRACT_SUBREG EAX, 2. Then r1024 has already been
- // coalesced to a larger register so the subreg indices cancel out.
- if (DstSubIdx != SubIdx) {
- DEBUG(dbgs() << "\t Sub-register indices mismatch.\n");
- return false; // Not coalescable.
- }
- } else
- SrcReg = tri_->getSubReg(SrcReg, SubIdx);
- SubIdx = 0;
- } else if (DstIsPhys && (isInsSubReg || isSubRegToReg)) {
- // EAX = INSERT_SUBREG EAX, r1024, 0
- unsigned SrcSubIdx = CopyMI->getOperand(2).getSubReg();
- if (SrcSubIdx) {
- // EAX = INSERT_SUBREG EAX, r1024<2>, 2 Then r1024 has already been
- // coalesced to a larger register so the subreg indices cancel out.
- if (SrcSubIdx != SubIdx) {
- DEBUG(dbgs() << "\t Sub-register indices mismatch.\n");
- return false; // Not coalescable.
- }
- } else
- DstReg = tri_->getSubReg(DstReg, SubIdx);
- SubIdx = 0;
- } else if ((DstIsPhys && isExtSubReg) ||
- (SrcIsPhys && (isInsSubReg || isSubRegToReg))) {
- if (!isSubRegToReg && CopyMI->getOperand(1).getSubReg()) {
- DEBUG(dbgs() << "\tSrc of extract_subreg already coalesced with reg"
- << " of a super-class.\n");
- return false; // Not coalescable.
- }
-
- // FIXME: The following checks are somewhat conservative. Perhaps a better
- // way to implement this is to treat this as coalescing a vr with the
- // super physical register.
- if (isExtSubReg) {
- if (!CanJoinExtractSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealDstReg))
- return false; // Not coalescable
- } else {
- if (!CanJoinInsertSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealSrcReg))
- return false; // Not coalescable
- }
- SubIdx = 0;
- } else {
- unsigned OldSubIdx = isExtSubReg ? CopyMI->getOperand(0).getSubReg()
- : CopyMI->getOperand(2).getSubReg();
- if (OldSubIdx) {
- if (OldSubIdx == SubIdx && !differingRegisterClasses(SrcReg, DstReg))
- // r1024<2> = EXTRACT_SUBREG r1025, 2. Then r1024 has already been
- // coalesced to a larger register so the subreg indices cancel out.
- // Also check if the other larger register is of the same register
- // class as the would be resulting register.
- SubIdx = 0;
- else {
- DEBUG(dbgs() << "\t Sub-register indices mismatch.\n");
- return false; // Not coalescable.
- }
- }
- if (SubIdx) {
- if (!DstIsPhys && !SrcIsPhys) {
- if (isInsSubReg || isSubRegToReg) {
- NewRC = tri_->getMatchingSuperRegClass(DstRC, SrcRC, SubIdx);
- } else // extract_subreg {
- NewRC = tri_->getMatchingSuperRegClass(SrcRC, DstRC, SubIdx);
- }
- if (!NewRC) {
- DEBUG(dbgs() << "\t Conflicting sub-register indices.\n");
- return false; // Not coalescable
- }
+ } else {
+ DEBUG({
+ dbgs() << " with reg%" << CP.getDstReg();
+ if (CP.getSubIdx())
+ dbgs() << ":" << tri_->getSubRegIndexName(CP.getSubIdx());
+ dbgs() << " to " << CP.getNewRC()->getName() << "\n";
+ });
- if (!isWinToJoinCrossClass(SrcReg, DstReg, SrcRC, DstRC, NewRC)) {
- DEBUG(dbgs() << "\tAvoid coalescing to constrained register class: "
- << SrcRC->getName() << "/"
- << DstRC->getName() << " -> "
- << NewRC->getName() << ".\n");
- Again = true; // May be possible to coalesce later.
- return false;
- }
- }
- }
- } else if (differingRegisterClasses(SrcReg, DstReg)) {
- if (DisableCrossClassJoin)
- return false;
- CrossRC = true;
-
- // FIXME: What if the result of a EXTRACT_SUBREG is then coalesced
- // with another? If it's the resulting destination register, then
- // the subidx must be propagated to uses (but only those defined
- // by the EXTRACT_SUBREG). If it's being coalesced into another
- // register, it should be safe because register is assumed to have
- // the register class of the super-register.
-
- // Process moves where one of the registers have a sub-register index.
- MachineOperand *DstMO = CopyMI->findRegisterDefOperand(DstReg);
- MachineOperand *SrcMO = CopyMI->findRegisterUseOperand(SrcReg);
- SubIdx = DstMO->getSubReg();
- if (SubIdx) {
- if (SrcMO->getSubReg())
- // FIXME: can we handle this?
+ // Avoid constraining virtual register regclass too much.
+ if (CP.isCrossClass()) {
+ if (DisableCrossClassJoin) {
+ DEBUG(dbgs() << "\tCross-class joins disabled.\n");
return false;
- // This is not an insert_subreg but it looks like one.
- // e.g. %reg1024:4 = MOV32rr %EAX
- isInsSubReg = true;
- if (SrcIsPhys) {
- if (!CanJoinInsertSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealSrcReg))
- return false; // Not coalescable
- SubIdx = 0;
- }
- } else {
- SubIdx = SrcMO->getSubReg();
- if (SubIdx) {
- // This is not a extract_subreg but it looks like one.
- // e.g. %cl = MOV16rr %reg1024:1
- isExtSubReg = true;
- if (DstIsPhys) {
- if (!CanJoinExtractSubRegToPhysReg(DstReg, SrcReg, SubIdx,RealDstReg))
- return false; // Not coalescable
- SubIdx = 0;
- }
- }
- }
-
- // Now determine the register class of the joined register.
- if (!SrcIsPhys && !DstIsPhys) {
- if (isExtSubReg) {
- NewRC =
- SubIdx ? tri_->getMatchingSuperRegClass(SrcRC, DstRC, SubIdx) : SrcRC;
- } else if (isInsSubReg) {
- NewRC =
- SubIdx ? tri_->getMatchingSuperRegClass(DstRC, SrcRC, SubIdx) : DstRC;
- } else {
- NewRC = getCommonSubClass(SrcRC, DstRC);
- }
-
- if (!NewRC) {
- DEBUG(dbgs() << "\tDisjoint regclasses: "
- << SrcRC->getName() << ", "
- << DstRC->getName() << ".\n");
- return false; // Not coalescable.
}
-
- // If we are joining two virtual registers and the resulting register
- // class is more restrictive (fewer register, smaller size). Check if it's
- // worth doing the merge.
- if (!isWinToJoinCrossClass(SrcReg, DstReg, SrcRC, DstRC, NewRC)) {
+ if (!isWinToJoinCrossClass(CP.getSrcReg(), CP.getDstReg(),
+ mri_->getRegClass(CP.getSrcReg()),
+ mri_->getRegClass(CP.getDstReg()),
+ CP.getNewRC())) {
DEBUG(dbgs() << "\tAvoid coalescing to constrained register class: "
- << SrcRC->getName() << "/"
- << DstRC->getName() << " -> "
- << NewRC->getName() << ".\n");
- // Allow the coalescer to try again in case either side gets coalesced to
- // a physical register that's compatible with the other side. e.g.
- // r1024 = MOV32to32_ r1025
- // But later r1024 is assigned EAX then r1025 may be coalesced with EAX.
+ << CP.getNewRC()->getName() << ".\n");
Again = true; // May be possible to coalesce later.
return false;
}
}
- }
-
- // Will it create illegal extract_subreg / insert_subreg?
- if (SrcIsPhys && HasIncompatibleSubRegDefUse(CopyMI, DstReg, SrcReg))
- return false;
- if (DstIsPhys && HasIncompatibleSubRegDefUse(CopyMI, SrcReg, DstReg))
- return false;
-
- LiveInterval &SrcInt = li_->getInterval(SrcReg);
- LiveInterval &DstInt = li_->getInterval(DstReg);
- assert(SrcInt.reg == SrcReg && DstInt.reg == DstReg &&
- "Register mapping is horribly broken!");
- DEBUG({
- dbgs() << "\t\tInspecting ";
- if (SrcRC) dbgs() << SrcRC->getName() << ": ";
- SrcInt.print(dbgs(), tri_);
- dbgs() << "\n\t\t and ";
- if (DstRC) dbgs() << DstRC->getName() << ": ";
- DstInt.print(dbgs(), tri_);
- dbgs() << "\n";
- });
+ // When possible, let DstReg be the larger interval.
+ if (!CP.getSubIdx() && li_->getInterval(CP.getSrcReg()).ranges.size() >
+ li_->getInterval(CP.getDstReg()).ranges.size())
+ CP.flip();
+ }
+
+ // We need to be careful about coalescing a source physical register with a
+ // virtual register. Once the coalescing is done, it cannot be broken and
+ // these are not spillable! If the destination interval uses are far away,
+ // think twice about coalescing them!
+ // FIXME: Why are we skipping this test for partial copies?
+ // CodeGen/X86/phys_subreg_coalesce-3.ll needs it.
+ if (!CP.isPartial() && CP.isPhys()) {
+ LiveInterval &JoinVInt = li_->getInterval(CP.getSrcReg());
+
+ // Don't join with physregs that have a ridiculous number of live
+ // ranges. The data structure performance is really bad when that
+ // happens.
+ if (li_->hasInterval(CP.getDstReg()) &&
+ li_->getInterval(CP.getDstReg()).ranges.size() > 1000) {
+ mri_->setRegAllocationHint(CP.getSrcReg(), 0, CP.getDstReg());
+ ++numAborts;
+ DEBUG(dbgs()
+ << "\tPhysical register live interval too complicated, abort!\n");
+ return false;
+ }
- // Save a copy of the virtual register live interval. We'll manually
- // merge this into the "real" physical register live interval this is
- // coalesced with.
- OwningPtr<LiveInterval> SavedLI;
- if (RealDstReg)
- SavedLI.reset(li_->dupInterval(&SrcInt));
- else if (RealSrcReg)
- SavedLI.reset(li_->dupInterval(&DstInt));
-
- if (!isExtSubReg && !isInsSubReg && !isSubRegToReg) {
- // Check if it is necessary to propagate "isDead" property.
- MachineOperand *mopd = CopyMI->findRegisterDefOperand(DstReg, false);
- bool isDead = mopd->isDead();
-
- // We need to be careful about coalescing a source physical register with a
- // virtual register. Once the coalescing is done, it cannot be broken and
- // these are not spillable! If the destination interval uses are far away,
- // think twice about coalescing them!
- if (!isDead && (SrcIsPhys || DstIsPhys)) {
- // If the virtual register live interval is long but it has low use
- // density, do not join them, instead mark the physical register as its
- // allocation preference.
- LiveInterval &JoinVInt = SrcIsPhys ? DstInt : SrcInt;
- LiveInterval &JoinPInt = SrcIsPhys ? SrcInt : DstInt;
- unsigned JoinVReg = SrcIsPhys ? DstReg : SrcReg;
- unsigned JoinPReg = SrcIsPhys ? SrcReg : DstReg;
-
- // Don't join with physregs that have a ridiculous number of live
- // ranges. The data structure performance is really bad when that
- // happens.
- if (JoinPInt.ranges.size() > 1000) {
- mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg);
- ++numAborts;
- DEBUG(dbgs()
- << "\tPhysical register live interval too complicated, abort!\n");
- return false;
- }
+ const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg());
+ unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
+ unsigned Length = li_->getApproximateInstructionCount(JoinVInt);
+ if (Length > Threshold &&
+ std::distance(mri_->use_nodbg_begin(CP.getSrcReg()),
+ mri_->use_nodbg_end()) * Threshold < Length) {
+ // Before giving up coalescing, if definition of source is defined by
+ // trivial computation, try rematerializing it.
+ if (!CP.isFlipped() &&
+ ReMaterializeTrivialDef(JoinVInt, CP.getDstReg(), 0, CopyMI))
+ return true;
- const TargetRegisterClass *RC = mri_->getRegClass(JoinVReg);
- unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
- unsigned Length = li_->getApproximateInstructionCount(JoinVInt);
- if (Length > Threshold &&
- std::distance(mri_->use_nodbg_begin(JoinVReg),
- mri_->use_nodbg_end()) * Threshold < Length) {
- // Before giving up coalescing, if definition of source is defined by
- // trivial computation, try rematerializing it.
- if (ReMaterializeTrivialDef(SrcInt, DstReg, DstSubIdx, CopyMI))
- return true;
-
- mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg);
- ++numAborts;
- DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
- Again = true; // May be possible to coalesce later.
- return false;
- }
+ mri_->setRegAllocationHint(CP.getSrcReg(), 0, CP.getDstReg());
+ ++numAborts;
+ DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
+ Again = true; // May be possible to coalesce later.
+ return false;
}
}
@@ -1724,32 +1168,24 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
// Otherwise, if one of the intervals being joined is a physreg, this method
// always canonicalizes DstInt to be it. The output "SrcInt" will not have
// been modified, so we can use this information below to update aliases.
- bool Swapped = false;
- // If SrcInt is implicitly defined, it's safe to coalesce.
- if (SrcInt.empty()) {
- if (!CanCoalesceWithImpDef(CopyMI, DstInt, SrcInt)) {
- // Only coalesce an empty interval (defined by implicit_def) with
- // another interval which has a valno defined by the CopyMI and the CopyMI
- // is a kill of the implicit def.
- DEBUG(dbgs() << "\tNot profitable!\n");
- return false;
- }
- } else if (!JoinIntervals(DstInt, SrcInt, Swapped)) {
+ if (!JoinIntervals(CP)) {
// Coalescing failed.
// If definition of source is defined by trivial computation, try
// rematerializing it.
- if (!isExtSubReg && !isInsSubReg && !isSubRegToReg &&
- ReMaterializeTrivialDef(SrcInt, DstReg, DstSubIdx, CopyMI))
+ if (!CP.isFlipped() &&
+ ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()),
+ CP.getDstReg(), 0, CopyMI))
return true;
// If we can eliminate the copy without merging the live ranges, do so now.
- if (!isExtSubReg && !isInsSubReg && !isSubRegToReg &&
- (AdjustCopiesBackFrom(SrcInt, DstInt, CopyMI) ||
- RemoveCopyByCommutingDef(SrcInt, DstInt, CopyMI))) {
- JoinedCopies.insert(CopyMI);
- DEBUG(dbgs() << "\tTrivial!\n");
- return true;
+ if (!CP.isPartial()) {
+ if (AdjustCopiesBackFrom(CP, CopyMI) ||
+ RemoveCopyByCommutingDef(CP, CopyMI)) {
+ JoinedCopies.insert(CopyMI);
+ DEBUG(dbgs() << "\tTrivial!\n");
+ return true;
+ }
}
// Otherwise, we are unable to join the intervals.
@@ -1758,86 +1194,32 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
return false;
}
- LiveInterval *ResSrcInt = &SrcInt;
- LiveInterval *ResDstInt = &DstInt;
- if (Swapped) {
- std::swap(SrcReg, DstReg);
- std::swap(ResSrcInt, ResDstInt);
- }
- assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
- "LiveInterval::join didn't work right!");
-
- // If we're about to merge live ranges into a physical register live interval,
- // we have to update any aliased register's live ranges to indicate that they
- // have clobbered values for this range.
- if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
- // If this is a extract_subreg where dst is a physical register, e.g.
- // cl = EXTRACT_SUBREG reg1024, 1
- // then create and update the actual physical register allocated to RHS.
- if (RealDstReg || RealSrcReg) {
- LiveInterval &RealInt =
- li_->getOrCreateInterval(RealDstReg ? RealDstReg : RealSrcReg);
- for (LiveInterval::const_vni_iterator I = SavedLI->vni_begin(),
- E = SavedLI->vni_end(); I != E; ++I) {
- const VNInfo *ValNo = *I;
- VNInfo *NewValNo = RealInt.getNextValue(ValNo->def, ValNo->getCopy(),
- false, // updated at *
- li_->getVNInfoAllocator());
- NewValNo->setFlags(ValNo->getFlags()); // * updated here.
- RealInt.addKills(NewValNo, ValNo->kills);
- RealInt.MergeValueInAsValue(*SavedLI, ValNo, NewValNo);
- }
- RealInt.weight += SavedLI->weight;
- DstReg = RealDstReg ? RealDstReg : RealSrcReg;
- }
-
- // Update the liveintervals of sub-registers.
- for (const unsigned *AS = tri_->getSubRegisters(DstReg); *AS; ++AS)
- li_->getOrCreateInterval(*AS).MergeInClobberRanges(*li_, *ResSrcInt,
- li_->getVNInfoAllocator());
- }
-
- // If this is a EXTRACT_SUBREG, make sure the result of coalescing is the
- // larger super-register.
- if ((isExtSubReg || isInsSubReg || isSubRegToReg) &&
- !SrcIsPhys && !DstIsPhys) {
- if ((isExtSubReg && !Swapped) ||
- ((isInsSubReg || isSubRegToReg) && Swapped)) {
- ResSrcInt->Copy(*ResDstInt, mri_, li_->getVNInfoAllocator());
- std::swap(SrcReg, DstReg);
- std::swap(ResSrcInt, ResDstInt);
- }
- }
-
// Coalescing to a virtual register that is of a sub-register class of the
// other. Make sure the resulting register is set to the right register class.
- if (CrossRC)
+ if (CP.isCrossClass()) {
++numCrossRCs;
-
- // This may happen even if it's cross-rc coalescing. e.g.
- // %reg1026<def> = SUBREG_TO_REG 0, %reg1037<kill>, 4
- // reg1026 -> GR64, reg1037 -> GR32_ABCD. The resulting register will have to
- // be allocate a register from GR64_ABCD.
- if (NewRC)
- mri_->setRegClass(DstReg, NewRC);
+ mri_->setRegClass(CP.getDstReg(), CP.getNewRC());
+ }
// Remember to delete the copy instruction.
JoinedCopies.insert(CopyMI);
- UpdateRegDefsUses(SrcReg, DstReg, SubIdx);
+ UpdateRegDefsUses(CP);
// If we have extended the live range of a physical register, make sure we
// update live-in lists as well.
- if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
- const LiveInterval &VRegInterval = li_->getInterval(SrcReg);
+ if (CP.isPhys()) {
SmallVector<MachineBasicBlock*, 16> BlockSeq;
- for (LiveInterval::const_iterator I = VRegInterval.begin(),
- E = VRegInterval.end(); I != E; ++I ) {
+ // JoinIntervals invalidates the VNInfos in SrcInt, but we only need the
+ // ranges for this, and they are preserved.
+ LiveInterval &SrcInt = li_->getInterval(CP.getSrcReg());
+ for (LiveInterval::const_iterator I = SrcInt.begin(), E = SrcInt.end();
+ I != E; ++I ) {
li_->findLiveInMBBs(I->start, I->end, BlockSeq);
for (unsigned idx = 0, size = BlockSeq.size(); idx != size; ++idx) {
MachineBasicBlock &block = *BlockSeq[idx];
- if (!block.isLiveIn(DstReg))
- block.addLiveIn(DstReg);
+ if (!block.isLiveIn(CP.getDstReg()))
+ block.addLiveIn(CP.getDstReg());
}
BlockSeq.clear();
}
@@ -1845,32 +1227,17 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
// SrcReg is guarateed to be the register whose live interval that is
// being merged.
- li_->removeInterval(SrcReg);
+ li_->removeInterval(CP.getSrcReg());
// Update regalloc hint.
- tri_->UpdateRegAllocHint(SrcReg, DstReg, *mf_);
-
- // Manually deleted the live interval copy.
- if (SavedLI) {
- SavedLI->clear();
- SavedLI.reset();
- }
-
- // If resulting interval has a preference that no longer fits because of subreg
- // coalescing, just clear the preference.
- unsigned Preference = getRegAllocPreference(ResDstInt->reg, *mf_, mri_, tri_);
- if (Preference && (isExtSubReg || isInsSubReg || isSubRegToReg) &&
- TargetRegisterInfo::isVirtualRegister(ResDstInt->reg)) {
- const TargetRegisterClass *RC = mri_->getRegClass(ResDstInt->reg);
- if (!RC->contains(Preference))
- mri_->setRegAllocationHint(ResDstInt->reg, 0, 0);
- }
+ tri_->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *mf_);
DEBUG({
- dbgs() << "\t\tJoined. Result = ";
- ResDstInt->print(dbgs(), tri_);
- dbgs() << "\n";
- });
+ LiveInterval &DstInt = li_->getInterval(CP.getDstReg());
+ dbgs() << "\tJoined. Result = ";
+ DstInt.print(dbgs(), tri_);
+ dbgs() << "\n";
+ });
++numJoins;
return true;
@@ -1927,263 +1294,53 @@ static unsigned ComputeUltimateVN(VNInfo *VNI,
return ThisValNoAssignments[VN] = UltimateVN;
}
-static bool InVector(VNInfo *Val, const SmallVector<VNInfo*, 8> &V) {
- return std::find(V.begin(), V.end(), Val) != V.end();
-}
-
-static bool isValNoDefMove(const MachineInstr *MI, unsigned DR, unsigned SR,
- const TargetInstrInfo *TII,
- const TargetRegisterInfo *TRI) {
- unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
- if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
- ;
- else if (MI->isExtractSubreg()) {
- DstReg = MI->getOperand(0).getReg();
- SrcReg = MI->getOperand(1).getReg();
- } else if (MI->isSubregToReg() ||
- MI->isInsertSubreg()) {
- DstReg = MI->getOperand(0).getReg();
- SrcReg = MI->getOperand(2).getReg();
- } else
- return false;
- return (SrcReg == SR || TRI->isSuperRegister(SR, SrcReg)) &&
- (DstReg == DR || TRI->isSuperRegister(DR, DstReg));
-}
-
-/// RangeIsDefinedByCopyFromReg - Return true if the specified live range of
-/// the specified live interval is defined by a copy from the specified
-/// register.
-bool SimpleRegisterCoalescing::RangeIsDefinedByCopyFromReg(LiveInterval &li,
- LiveRange *LR,
- unsigned Reg) {
- unsigned SrcReg = li_->getVNInfoSourceReg(LR->valno);
- if (SrcReg == Reg)
- return true;
- // FIXME: Do isPHIDef and isDefAccurate both need to be tested?
- if ((LR->valno->isPHIDef() || !LR->valno->isDefAccurate()) &&
- TargetRegisterInfo::isPhysicalRegister(li.reg) &&
- *tri_->getSuperRegisters(li.reg)) {
- // It's a sub-register live interval, we may not have precise information.
- // Re-compute it.
- MachineInstr *DefMI = li_->getInstructionFromIndex(LR->start);
- if (DefMI && isValNoDefMove(DefMI, li.reg, Reg, tii_, tri_)) {
- // Cache computed info.
- LR->valno->def = LR->start;
- LR->valno->setCopy(DefMI);
- return true;
- }
- }
- return false;
-}
-
-
-/// ValueLiveAt - Return true if the LiveRange pointed to by the given
-/// iterator, or any subsequent range with the same value number,
-/// is live at the given point.
-bool SimpleRegisterCoalescing::ValueLiveAt(LiveInterval::iterator LRItr,
- LiveInterval::iterator LREnd,
- SlotIndex defPoint) const {
- for (const VNInfo *valno = LRItr->valno;
- (LRItr != LREnd) && (LRItr->valno == valno); ++LRItr) {
- if (LRItr->contains(defPoint))
- return true;
- }
-
- return false;
-}
-
-
-/// SimpleJoin - Attempt to joint the specified interval into this one. The
-/// caller of this method must guarantee that the RHS only contains a single
-/// value number and that the RHS is not defined by a copy from this
-/// interval. This returns false if the intervals are not joinable, or it
-/// joins them and returns true.
-bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){
- assert(RHS.containsOneValue());
-
- // Some number (potentially more than one) value numbers in the current
- // interval may be defined as copies from the RHS. Scan the overlapping
- // portions of the LHS and RHS, keeping track of this and looking for
- // overlapping live ranges that are NOT defined as copies. If these exist, we
- // cannot coalesce.
-
- LiveInterval::iterator LHSIt = LHS.begin(), LHSEnd = LHS.end();
- LiveInterval::iterator RHSIt = RHS.begin(), RHSEnd = RHS.end();
-
- if (LHSIt->start < RHSIt->start) {
- LHSIt = std::upper_bound(LHSIt, LHSEnd, RHSIt->start);
- if (LHSIt != LHS.begin()) --LHSIt;
- } else if (RHSIt->start < LHSIt->start) {
- RHSIt = std::upper_bound(RHSIt, RHSEnd, LHSIt->start);
- if (RHSIt != RHS.begin()) --RHSIt;
- }
-
- SmallVector<VNInfo*, 8> EliminatedLHSVals;
-
- while (1) {
- // Determine if these live intervals overlap.
- bool Overlaps = false;
- if (LHSIt->start <= RHSIt->start)
- Overlaps = LHSIt->end > RHSIt->start;
- else
- Overlaps = RHSIt->end > LHSIt->start;
-
- // If the live intervals overlap, there are two interesting cases: if the
- // LHS interval is defined by a copy from the RHS, it's ok and we record
- // that the LHS value # is the same as the RHS. If it's not, then we cannot
- // coalesce these live ranges and we bail out.
- if (Overlaps) {
- // If we haven't already recorded that this value # is safe, check it.
- if (!InVector(LHSIt->valno, EliminatedLHSVals)) {
- // If it's re-defined by an early clobber somewhere in the live range,
- // then conservatively abort coalescing.
- if (LHSIt->valno->hasRedefByEC())
- return false;
- // Copy from the RHS?
- if (!RangeIsDefinedByCopyFromReg(LHS, LHSIt, RHS.reg))
- return false; // Nope, bail out.
-
- if (ValueLiveAt(LHSIt, LHS.end(), RHSIt->valno->def))
- // Here is an interesting situation:
- // BB1:
- // vr1025 = copy vr1024
- // ..
- // BB2:
- // vr1024 = op
- // = vr1025
- // Even though vr1025 is copied from vr1024, it's not safe to
- // coalesce them since the live range of vr1025 intersects the
- // def of vr1024. This happens because vr1025 is assigned the
- // value of the previous iteration of vr1024.
+/// JoinIntervals - Attempt to join these two intervals. On failure, this
+/// returns false.
+bool SimpleRegisterCoalescing::JoinIntervals(CoalescerPair &CP) {
+ LiveInterval &RHS = li_->getInterval(CP.getSrcReg());
+ DEBUG({ dbgs() << "\t\tRHS = "; RHS.print(dbgs(), tri_); dbgs() << "\n"; });
+
+ // If a live interval is a physical register, check for interference with any
+ // aliases. The interference check implemented here is a bit more conservative
+ // than the full interfeence check below. We allow overlapping live ranges
+ // only when one is a copy of the other.
+ if (CP.isPhys()) {
+ for (const unsigned *AS = tri_->getAliasSet(CP.getDstReg()); *AS; ++AS){
+ if (!li_->hasInterval(*AS))
+ continue;
+ const LiveInterval &LHS = li_->getInterval(*AS);
+ LiveInterval::const_iterator LI = LHS.begin();
+ for (LiveInterval::const_iterator RI = RHS.begin(), RE = RHS.end();
+ RI != RE; ++RI) {
+ LI = std::lower_bound(LI, LHS.end(), RI->start);
+ // Does LHS have an overlapping live range starting before RI?
+ if ((LI != LHS.begin() && LI[-1].end > RI->start) &&
+ (RI->start != RI->valno->def ||
+ !CP.isCoalescable(li_->getInstructionFromIndex(RI->start)))) {
+ DEBUG({
+ dbgs() << "\t\tInterference from alias: ";
+ LHS.print(dbgs(), tri_);
+ dbgs() << "\n\t\tOverlap at " << RI->start << " and no copy.\n";
+ });
return false;
- EliminatedLHSVals.push_back(LHSIt->valno);
- }
-
- // We know this entire LHS live range is okay, so skip it now.
- if (++LHSIt == LHSEnd) break;
- continue;
- }
+ }
- if (LHSIt->end < RHSIt->end) {
- if (++LHSIt == LHSEnd) break;
- } else {
- // One interesting case to check here. It's possible that we have
- // something like "X3 = Y" which defines a new value number in the LHS,
- // and is the last use of this liverange of the RHS. In this case, we
- // want to notice this copy (so that it gets coalesced away) even though
- // the live ranges don't actually overlap.
- if (LHSIt->start == RHSIt->end) {
- if (InVector(LHSIt->valno, EliminatedLHSVals)) {
- // We already know that this value number is going to be merged in
- // if coalescing succeeds. Just skip the liverange.
- if (++LHSIt == LHSEnd) break;
- } else {
- // If it's re-defined by an early clobber somewhere in the live range,
- // then conservatively abort coalescing.
- if (LHSIt->valno->hasRedefByEC())
+ // Check that LHS ranges beginning in this range are copies.
+ for (; LI != LHS.end() && LI->start < RI->end; ++LI) {
+ if (LI->start != LI->valno->def ||
+ !CP.isCoalescable(li_->getInstructionFromIndex(LI->start))) {
+ DEBUG({
+ dbgs() << "\t\tInterference from alias: ";
+ LHS.print(dbgs(), tri_);
+ dbgs() << "\n\t\tDef at " << LI->start << " is not a copy.\n";
+ });
return false;
- // Otherwise, if this is a copy from the RHS, mark it as being merged
- // in.
- if (RangeIsDefinedByCopyFromReg(LHS, LHSIt, RHS.reg)) {
- if (ValueLiveAt(LHSIt, LHS.end(), RHSIt->valno->def))
- // Here is an interesting situation:
- // BB1:
- // vr1025 = copy vr1024
- // ..
- // BB2:
- // vr1024 = op
- // = vr1025
- // Even though vr1025 is copied from vr1024, it's not safe to
- // coalesced them since live range of vr1025 intersects the
- // def of vr1024. This happens because vr1025 is assigned the
- // value of the previous iteration of vr1024.
- return false;
- EliminatedLHSVals.push_back(LHSIt->valno);
-
- // We know this entire LHS live range is okay, so skip it now.
- if (++LHSIt == LHSEnd) break;
}
}
}
-
- if (++RHSIt == RHSEnd) break;
- }
- }
-
- // If we got here, we know that the coalescing will be successful and that
- // the value numbers in EliminatedLHSVals will all be merged together. Since
- // the most common case is that EliminatedLHSVals has a single number, we
- // optimize for it: if there is more than one value, we merge them all into
- // the lowest numbered one, then handle the interval as if we were merging
- // with one value number.
- VNInfo *LHSValNo = NULL;
- if (EliminatedLHSVals.size() > 1) {
- // Loop through all the equal value numbers merging them into the smallest
- // one.
- VNInfo *Smallest = EliminatedLHSVals[0];
- for (unsigned i = 1, e = EliminatedLHSVals.size(); i != e; ++i) {
- if (EliminatedLHSVals[i]->id < Smallest->id) {
- // Merge the current notion of the smallest into the smaller one.
- LHS.MergeValueNumberInto(Smallest, EliminatedLHSVals[i]);
- Smallest = EliminatedLHSVals[i];
- } else {
- // Merge into the smallest.
- LHS.MergeValueNumberInto(EliminatedLHSVals[i], Smallest);
- }
}
- LHSValNo = Smallest;
- } else if (EliminatedLHSVals.empty()) {
- if (TargetRegisterInfo::isPhysicalRegister(LHS.reg) &&
- *tri_->getSuperRegisters(LHS.reg))
- // Imprecise sub-register information. Can't handle it.
- return false;
- llvm_unreachable("No copies from the RHS?");
- } else {
- LHSValNo = EliminatedLHSVals[0];
- }
-
- // Okay, now that there is a single LHS value number that we're merging the
- // RHS into, update the value number info for the LHS to indicate that the
- // value number is defined where the RHS value number was.
- const VNInfo *VNI = RHS.getValNumInfo(0);
- LHSValNo->def = VNI->def;
- LHSValNo->setCopy(VNI->getCopy());
-
- // Okay, the final step is to loop over the RHS live intervals, adding them to
- // the LHS.
- if (VNI->hasPHIKill())
- LHSValNo->setHasPHIKill(true);
- LHS.addKills(LHSValNo, VNI->kills);
- LHS.MergeRangesInAsValue(RHS, LHSValNo);
-
- LHS.ComputeJoinedWeight(RHS);
-
- // Update regalloc hint if both are virtual registers.
- if (TargetRegisterInfo::isVirtualRegister(LHS.reg) &&
- TargetRegisterInfo::isVirtualRegister(RHS.reg)) {
- std::pair<unsigned, unsigned> RHSPref = mri_->getRegAllocationHint(RHS.reg);
- std::pair<unsigned, unsigned> LHSPref = mri_->getRegAllocationHint(LHS.reg);
- if (RHSPref != LHSPref)
- mri_->setRegAllocationHint(LHS.reg, RHSPref.first, RHSPref.second);
}
- // Update the liveintervals of sub-registers.
- if (TargetRegisterInfo::isPhysicalRegister(LHS.reg))
- for (const unsigned *AS = tri_->getSubRegisters(LHS.reg); *AS; ++AS)
- li_->getOrCreateInterval(*AS).MergeInClobberRanges(*li_, LHS,
- li_->getVNInfoAllocator());
-
- return true;
-}
-
-/// JoinIntervals - Attempt to join these two intervals. On failure, this
-/// returns false. Otherwise, if one of the intervals being joined is a
-/// physreg, this method always canonicalizes LHS to be it. The output
-/// "RHS" will not have been modified, so we can use this information
-/// below to update aliases.
-bool
-SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
- bool &Swapped) {
// Compute the final value assignment, assuming that the live ranges can be
// coalesced.
SmallVector<int, 16> LHSValNoAssignments;
@@ -2192,203 +1349,87 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
DenseMap<VNInfo*, VNInfo*> RHSValsDefinedFromLHS;
SmallVector<VNInfo*, 16> NewVNInfo;
- // If a live interval is a physical register, conservatively check if any
- // of its sub-registers is overlapping the live interval of the virtual
- // register. If so, do not coalesce.
- if (TargetRegisterInfo::isPhysicalRegister(LHS.reg) &&
- *tri_->getSubRegisters(LHS.reg)) {
- // If it's coalescing a virtual register to a physical register, estimate
- // its live interval length. This is the *cost* of scanning an entire live
- // interval. If the cost is low, we'll do an exhaustive check instead.
-
- // If this is something like this:
- // BB1:
- // v1024 = op
- // ...
- // BB2:
- // ...
- // RAX = v1024
- //
- // That is, the live interval of v1024 crosses a bb. Then we can't rely on
- // less conservative check. It's possible a sub-register is defined before
- // v1024 (or live in) and live out of BB1.
- if (RHS.containsOneValue() &&
- li_->intervalIsInOneMBB(RHS) &&
- li_->getApproximateInstructionCount(RHS) <= 10) {
- // Perform a more exhaustive check for some common cases.
- if (li_->conflictsWithSubPhysRegRef(RHS, LHS.reg, true, JoinedCopies))
- return false;
- } else {
- for (const unsigned* SR = tri_->getSubRegisters(LHS.reg); *SR; ++SR)
- if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
- DEBUG({
- dbgs() << "\tInterfere with sub-register ";
- li_->getInterval(*SR).print(dbgs(), tri_);
- });
- return false;
- }
- }
- } else if (TargetRegisterInfo::isPhysicalRegister(RHS.reg) &&
- *tri_->getSubRegisters(RHS.reg)) {
- if (LHS.containsOneValue() &&
- li_->getApproximateInstructionCount(LHS) <= 10) {
- // Perform a more exhaustive check for some common cases.
- if (li_->conflictsWithSubPhysRegRef(LHS, RHS.reg, false, JoinedCopies))
- return false;
- } else {
- for (const unsigned* SR = tri_->getSubRegisters(RHS.reg); *SR; ++SR)
- if (li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) {
- DEBUG({
- dbgs() << "\tInterfere with sub-register ";
- li_->getInterval(*SR).print(dbgs(), tri_);
- });
- return false;
- }
- }
- }
+ LiveInterval &LHS = li_->getOrCreateInterval(CP.getDstReg());
+ DEBUG({ dbgs() << "\t\tLHS = "; LHS.print(dbgs(), tri_); dbgs() << "\n"; });
- // Compute ultimate value numbers for the LHS and RHS values.
- if (RHS.containsOneValue()) {
- // Copies from a liveinterval with a single value are simple to handle and
- // very common, handle the special case here. This is important, because
- // often RHS is small and LHS is large (e.g. a physreg).
-
- // Find out if the RHS is defined as a copy from some value in the LHS.
- int RHSVal0DefinedFromLHS = -1;
- int RHSValID = -1;
- VNInfo *RHSValNoInfo = NULL;
- VNInfo *RHSValNoInfo0 = RHS.getValNumInfo(0);
- unsigned RHSSrcReg = li_->getVNInfoSourceReg(RHSValNoInfo0);
- if (RHSSrcReg == 0 || RHSSrcReg != LHS.reg) {
- // If RHS is not defined as a copy from the LHS, we can use simpler and
- // faster checks to see if the live ranges are coalescable. This joiner
- // can't swap the LHS/RHS intervals though.
- if (!TargetRegisterInfo::isPhysicalRegister(RHS.reg)) {
- return SimpleJoin(LHS, RHS);
- } else {
- RHSValNoInfo = RHSValNoInfo0;
- }
- } else {
- // It was defined as a copy from the LHS, find out what value # it is.
- RHSValNoInfo =
- LHS.getLiveRangeContaining(RHSValNoInfo0->def.getPrevSlot())->valno;
- RHSValID = RHSValNoInfo->id;
- RHSVal0DefinedFromLHS = RHSValID;
- }
+ // Loop over the value numbers of the LHS, seeing if any are defined from
+ // the RHS.
+ for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
+ i != e; ++i) {
+ VNInfo *VNI = *i;
+ if (VNI->isUnused() || VNI->getCopy() == 0) // Src not defined by a copy?
+ continue;
- LHSValNoAssignments.resize(LHS.getNumValNums(), -1);
- RHSValNoAssignments.resize(RHS.getNumValNums(), -1);
- NewVNInfo.resize(LHS.getNumValNums(), NULL);
-
- // Okay, *all* of the values in LHS that are defined as a copy from RHS
- // should now get updated.
- for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
- i != e; ++i) {
- VNInfo *VNI = *i;
- unsigned VN = VNI->id;
- if (unsigned LHSSrcReg = li_->getVNInfoSourceReg(VNI)) {
- if (LHSSrcReg != RHS.reg) {
- // If this is not a copy from the RHS, its value number will be
- // unmodified by the coalescing.
- NewVNInfo[VN] = VNI;
- LHSValNoAssignments[VN] = VN;
- } else if (RHSValID == -1) {
- // Otherwise, it is a copy from the RHS, and we don't already have a
- // value# for it. Keep the current value number, but remember it.
- LHSValNoAssignments[VN] = RHSValID = VN;
- NewVNInfo[VN] = RHSValNoInfo;
- LHSValsDefinedFromRHS[VNI] = RHSValNoInfo0;
- } else {
- // Otherwise, use the specified value #.
- LHSValNoAssignments[VN] = RHSValID;
- if (VN == (unsigned)RHSValID) { // Else this val# is dead.
- NewVNInfo[VN] = RHSValNoInfo;
- LHSValsDefinedFromRHS[VNI] = RHSValNoInfo0;
- }
- }
- } else {
- NewVNInfo[VN] = VNI;
- LHSValNoAssignments[VN] = VN;
- }
- }
+ // Never join with a register that has EarlyClobber redefs.
+ if (VNI->hasRedefByEC())
+ return false;
- assert(RHSValID != -1 && "Didn't find value #?");
- RHSValNoAssignments[0] = RHSValID;
- if (RHSVal0DefinedFromLHS != -1) {
- // This path doesn't go through ComputeUltimateVN so just set
- // it to anything.
- RHSValsDefinedFromLHS[RHSValNoInfo0] = (VNInfo*)1;
- }
- } else {
- // Loop over the value numbers of the LHS, seeing if any are defined from
- // the RHS.
- for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
- i != e; ++i) {
- VNInfo *VNI = *i;
- if (VNI->isUnused() || VNI->getCopy() == 0) // Src not defined by a copy?
- continue;
+ // DstReg is known to be a register in the LHS interval. If the src is
+ // from the RHS interval, we can use its value #.
+ if (!CP.isCoalescable(VNI->getCopy()))
+ continue;
- // DstReg is known to be a register in the LHS interval. If the src is
- // from the RHS interval, we can use its value #.
- if (li_->getVNInfoSourceReg(VNI) != RHS.reg)
- continue;
+ // Figure out the value # from the RHS.
+ LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot());
+ // The copy could be to an aliased physreg.
+ if (!lr) continue;
+ LHSValsDefinedFromRHS[VNI] = lr->valno;
+ }
- // Figure out the value # from the RHS.
- LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot());
- assert(lr && "Cannot find live range");
- LHSValsDefinedFromRHS[VNI] = lr->valno;
- }
+ // Loop over the value numbers of the RHS, seeing if any are defined from
+ // the LHS.
+ for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
+ i != e; ++i) {
+ VNInfo *VNI = *i;
+ if (VNI->isUnused() || VNI->getCopy() == 0) // Src not defined by a copy?
+ continue;
- // Loop over the value numbers of the RHS, seeing if any are defined from
- // the LHS.
- for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
- i != e; ++i) {
- VNInfo *VNI = *i;
- if (VNI->isUnused() || VNI->getCopy() == 0) // Src not defined by a copy?
- continue;
+ // Never join with a register that has EarlyClobber redefs.
+ if (VNI->hasRedefByEC())
+ return false;
- // DstReg is known to be a register in the RHS interval. If the src is
- // from the LHS interval, we can use its value #.
- if (li_->getVNInfoSourceReg(VNI) != LHS.reg)
- continue;
+ // DstReg is known to be a register in the RHS interval. If the src is
+ // from the LHS interval, we can use its value #.
+ if (!CP.isCoalescable(VNI->getCopy()))
+ continue;
- // Figure out the value # from the LHS.
- LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot());
- assert(lr && "Cannot find live range");
- RHSValsDefinedFromLHS[VNI] = lr->valno;
- }
+ // Figure out the value # from the LHS.
+ LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot());
+ // The copy could be to an aliased physreg.
+ if (!lr) continue;
+ RHSValsDefinedFromLHS[VNI] = lr->valno;
+ }
- LHSValNoAssignments.resize(LHS.getNumValNums(), -1);
- RHSValNoAssignments.resize(RHS.getNumValNums(), -1);
- NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums());
+ LHSValNoAssignments.resize(LHS.getNumValNums(), -1);
+ RHSValNoAssignments.resize(RHS.getNumValNums(), -1);
+ NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums());
- for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
- i != e; ++i) {
- VNInfo *VNI = *i;
- unsigned VN = VNI->id;
- if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused())
- continue;
- ComputeUltimateVN(VNI, NewVNInfo,
- LHSValsDefinedFromRHS, RHSValsDefinedFromLHS,
- LHSValNoAssignments, RHSValNoAssignments);
+ for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
+ i != e; ++i) {
+ VNInfo *VNI = *i;
+ unsigned VN = VNI->id;
+ if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused())
+ continue;
+ ComputeUltimateVN(VNI, NewVNInfo,
+ LHSValsDefinedFromRHS, RHSValsDefinedFromLHS,
+ LHSValNoAssignments, RHSValNoAssignments);
+ }
+ for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
+ i != e; ++i) {
+ VNInfo *VNI = *i;
+ unsigned VN = VNI->id;
+ if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused())
+ continue;
+ // If this value number isn't a copy from the LHS, it's a new number.
+ if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) {
+ NewVNInfo.push_back(VNI);
+ RHSValNoAssignments[VN] = NewVNInfo.size()-1;
+ continue;
}
- for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
- i != e; ++i) {
- VNInfo *VNI = *i;
- unsigned VN = VNI->id;
- if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused())
- continue;
- // If this value number isn't a copy from the LHS, it's a new number.
- if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) {
- NewVNInfo.push_back(VNI);
- RHSValNoAssignments[VN] = NewVNInfo.size()-1;
- continue;
- }
- ComputeUltimateVN(VNI, NewVNInfo,
- RHSValsDefinedFromLHS, LHSValsDefinedFromRHS,
- RHSValNoAssignments, LHSValNoAssignments);
- }
+ ComputeUltimateVN(VNI, NewVNInfo,
+ RHSValsDefinedFromLHS, LHSValsDefinedFromRHS,
+ RHSValNoAssignments, LHSValNoAssignments);
}
// Armed with the mappings of LHS/RHS values to ultimate values, walk the
@@ -2399,15 +1440,17 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
LiveInterval::const_iterator JE = RHS.end();
// Skip ahead until the first place of potential sharing.
- if (I->start < J->start) {
- I = std::upper_bound(I, IE, J->start);
- if (I != LHS.begin()) --I;
- } else if (J->start < I->start) {
- J = std::upper_bound(J, JE, I->start);
- if (J != RHS.begin()) --J;
+ if (I != IE && J != JE) {
+ if (I->start < J->start) {
+ I = std::upper_bound(I, IE, J->start);
+ if (I != LHS.begin()) --I;
+ } else if (J->start < I->start) {
+ J = std::upper_bound(J, JE, I->start);
+ if (J != RHS.begin()) --J;
+ }
}
- while (1) {
+ while (I != IE && J != JE) {
// Determine if these two live ranges overlap.
bool Overlaps;
if (I->start < J->start) {
@@ -2429,13 +1472,10 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
return false;
}
- if (I->end < J->end) {
+ if (I->end < J->end)
++I;
- if (I == IE) break;
- } else {
+ else
++J;
- if (J == JE) break;
- }
}
// Update kill info. Some live ranges are extended due to copy coalescing.
@@ -2443,10 +1483,8 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
E = LHSValsDefinedFromRHS.end(); I != E; ++I) {
VNInfo *VNI = I->first;
unsigned LHSValID = LHSValNoAssignments[VNI->id];
- NewVNInfo[LHSValID]->removeKill(VNI->def);
if (VNI->hasPHIKill())
NewVNInfo[LHSValID]->setHasPHIKill(true);
- RHS.addKills(NewVNInfo[LHSValID], VNI->kills);
}
// Update kill info. Some live ranges are extended due to copy coalescing.
@@ -2454,25 +1492,19 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
E = RHSValsDefinedFromLHS.end(); I != E; ++I) {
VNInfo *VNI = I->first;
unsigned RHSValID = RHSValNoAssignments[VNI->id];
- NewVNInfo[RHSValID]->removeKill(VNI->def);
if (VNI->hasPHIKill())
NewVNInfo[RHSValID]->setHasPHIKill(true);
- LHS.addKills(NewVNInfo[RHSValID], VNI->kills);
}
+ if (LHSValNoAssignments.empty())
+ LHSValNoAssignments.push_back(-1);
+ if (RHSValNoAssignments.empty())
+ RHSValNoAssignments.push_back(-1);
+
// If we get here, we know that we can coalesce the live ranges. Ask the
// intervals to coalesce themselves now.
- if ((RHS.ranges.size() > LHS.ranges.size() &&
- TargetRegisterInfo::isVirtualRegister(LHS.reg)) ||
- TargetRegisterInfo::isPhysicalRegister(RHS.reg)) {
- RHS.join(LHS, &RHSValNoAssignments[0], &LHSValNoAssignments[0], NewVNInfo,
- mri_);
- Swapped = true;
- } else {
- LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo,
- mri_);
- Swapped = false;
- }
+ LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo,
+ mri_);
return true;
}
@@ -2513,15 +1545,10 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB,
// If this isn't a copy nor a extract_subreg, we can't join intervals.
unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
bool isInsUndef = false;
- if (Inst->isExtractSubreg()) {
+ if (Inst->isCopy()) {
DstReg = Inst->getOperand(0).getReg();
SrcReg = Inst->getOperand(1).getReg();
- } else if (Inst->isInsertSubreg()) {
- DstReg = Inst->getOperand(0).getReg();
- SrcReg = Inst->getOperand(2).getReg();
- if (Inst->getOperand(1).isUndef())
- isInsUndef = true;
- } else if (Inst->isInsertSubreg() || Inst->isSubregToReg()) {
+ } else if (Inst->isSubregToReg()) {
DstReg = Inst->getOperand(0).getReg();
SrcReg = Inst->getOperand(2).getReg();
} else if (!tii_->isMoveInstr(*Inst, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
@@ -2650,6 +1677,8 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start,
E = mri_->use_nodbg_end(); I != E; ++I) {
MachineOperand &Use = I.getOperand();
MachineInstr *UseMI = Use.getParent();
+ if (UseMI->isIdentityCopy())
+ continue;
unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
if (tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
SrcReg == DstReg && SrcSubIdx == DstSubIdx)
@@ -2680,7 +1709,8 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start,
// Ignore identity copies.
unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
- if (!(tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
+ if (!MI->isIdentityCopy() &&
+ !(tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
SrcReg == DstReg && SrcSubIdx == DstSubIdx))
for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) {
MachineOperand &Use = MI->getOperand(i);
@@ -2750,10 +1780,9 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
// Delete all coalesced copies.
bool DoDelete = true;
if (!tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
- assert((MI->isExtractSubreg() || MI->isInsertSubreg() ||
- MI->isSubregToReg()) && "Unrecognized copy instruction");
- DstReg = MI->getOperand(0).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(DstReg))
+ assert(MI->isCopyLike() && "Unrecognized copy instruction");
+ SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(SrcReg))
// Do not delete extract_subreg, insert_subreg of physical
// registers unless the definition is dead. e.g.
// %DO<def> = INSERT_SUBREG %D0<undef>, %S0<kill>, 1
@@ -2762,7 +1791,7 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
DoDelete = false;
}
if (MI->allDefsAreDead()) {
- LiveInterval &li = li_->getInterval(DstReg);
+ LiveInterval &li = li_->getInterval(SrcReg);
if (!ShortenDeadCopySrcLiveRange(li, MI))
ShortenDeadCopyLiveRange(li, MI);
DoDelete = true;
@@ -2812,12 +1841,13 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
// If the move will be an identity move delete it
bool isMove= tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx);
- if (isMove && SrcReg == DstReg && SrcSubIdx == DstSubIdx) {
+ if (MI->isIdentityCopy() ||
+ (isMove && SrcReg == DstReg && SrcSubIdx == DstSubIdx)) {
if (li_->hasInterval(SrcReg)) {
LiveInterval &RegInt = li_->getInterval(SrcReg);
// If def of this move instruction is dead, remove its live range
- // from the dstination register's live interval.
- if (MI->registerDefIsDead(DstReg)) {
+ // from the destination register's live interval.
+ if (MI->allDefsAreDead()) {
if (!ShortenDeadCopySrcLiveRange(RegInt, MI))
ShortenDeadCopyLiveRange(RegInt, MI);
}
@@ -2832,17 +1862,13 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
// Check for now unnecessary kill flags.
if (li_->isNotInMIMap(MI)) continue;
- SlotIndex UseIdx = li_->getInstructionIndex(MI).getUseIndex();
+ SlotIndex DefIdx = li_->getInstructionIndex(MI).getDefIndex();
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || !MO.isKill()) continue;
unsigned reg = MO.getReg();
if (!reg || !li_->hasInterval(reg)) continue;
- LiveInterval &LI = li_->getInterval(reg);
- const LiveRange *LR = LI.getLiveRangeContaining(UseIdx);
- if (!LR ||
- (!LR->valno->isKill(UseIdx.getDefIndex()) &&
- LR->valno->def != UseIdx.getDefIndex()))
+ if (!li_->getInterval(reg).killedAt(DefIdx))
MO.setIsKill(false);
}
}
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h
index 1be04f32aa69..e154da60affa 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.h
+++ b/lib/CodeGen/SimpleRegisterCoalescing.h
@@ -105,21 +105,12 @@ namespace llvm {
/// possible to coalesce this interval, but it may be possible if other
/// things get coalesced, then it returns true by reference in 'Again'.
bool JoinCopy(CopyRec &TheCopy, bool &Again);
-
+
/// JoinIntervals - Attempt to join these two intervals. On failure, this
- /// returns false. Otherwise, if one of the intervals being joined is a
- /// physreg, this method always canonicalizes DestInt to be it. The output
- /// "SrcInt" will not have been modified, so we can use this information
- /// below to update aliases.
- bool JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, bool &Swapped);
-
- /// SimpleJoin - Attempt to join the specified interval into this one. The
- /// caller of this method must guarantee that the RHS only contains a single
- /// value number and that the RHS is not defined by a copy from this
- /// interval. This returns false if the intervals are not joinable, or it
- /// joins them and returns true.
- bool SimpleJoin(LiveInterval &LHS, LiveInterval &RHS);
-
+ /// returns false. The output "SrcInt" will not have been modified, so we can
+ /// use this information below to update aliases.
+ bool JoinIntervals(CoalescerPair &CP);
+
/// Return true if the two specified registers belong to different register
/// classes. The registers may be either phys or virt regs.
bool differingRegisterClasses(unsigned RegA, unsigned RegB) const;
@@ -128,8 +119,7 @@ namespace llvm {
/// the source value number is defined by a copy from the destination reg
/// see if we can merge these two destination reg valno# into a single
/// value number, eliminating a copy.
- bool AdjustCopiesBackFrom(LiveInterval &IntA, LiveInterval &IntB,
- MachineInstr *CopyMI);
+ bool AdjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI);
/// HasOtherReachingDefs - Return true if there are definitions of IntB
/// other than BValNo val# that can reach uses of AValno val# of IntA.
@@ -140,8 +130,7 @@ namespace llvm {
/// If the source value number is defined by a commutable instruction and
/// its other operand is coalesced to the copy dest register, see if we
/// can transform the copy into a noop by commuting the definition.
- bool RemoveCopyByCommutingDef(LiveInterval &IntA, LiveInterval &IntB,
- MachineInstr *CopyMI);
+ bool RemoveCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI);
/// TrimLiveIntervalToLastUse - If there is a last use in the same basic
/// block as the copy instruction, trim the ive interval to the last use
@@ -155,28 +144,6 @@ namespace llvm {
bool ReMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg,
unsigned DstSubIdx, MachineInstr *CopyMI);
- /// CanCoalesceWithImpDef - Returns true if the specified copy instruction
- /// from an implicit def to another register can be coalesced away.
- bool CanCoalesceWithImpDef(MachineInstr *CopyMI,
- LiveInterval &li, LiveInterval &ImpLi) const;
-
- /// TurnCopiesFromValNoToImpDefs - The specified value# is defined by an
- /// implicit_def and it is being removed. Turn all copies from this value#
- /// into implicit_defs.
- void TurnCopiesFromValNoToImpDefs(LiveInterval &li, VNInfo *VNI);
-
- /// isWinToJoinVRWithSrcPhysReg - Return true if it's worth while to join a
- /// a virtual destination register with physical source register.
- bool isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI,
- MachineBasicBlock *CopyMBB,
- LiveInterval &DstInt, LiveInterval &SrcInt);
-
- /// isWinToJoinVRWithDstPhysReg - Return true if it's worth while to join a
- /// copy from a virtual source register to a physical destination register.
- bool isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI,
- MachineBasicBlock *CopyMBB,
- LiveInterval &DstInt, LiveInterval &SrcInt);
-
/// isWinToJoinCrossClass - Return true if it's profitable to coalesce
/// two virtual registers from different register classes.
bool isWinToJoinCrossClass(unsigned SrcReg,
@@ -185,43 +152,12 @@ namespace llvm {
const TargetRegisterClass *DstRC,
const TargetRegisterClass *NewRC);
- /// HasIncompatibleSubRegDefUse - If we are trying to coalesce a virtual
- /// register with a physical register, check if any of the virtual register
- /// operand is a sub-register use or def. If so, make sure it won't result
- /// in an illegal extract_subreg or insert_subreg instruction.
- bool HasIncompatibleSubRegDefUse(MachineInstr *CopyMI,
- unsigned VirtReg, unsigned PhysReg);
-
- /// CanJoinExtractSubRegToPhysReg - Return true if it's possible to coalesce
- /// an extract_subreg where dst is a physical register, e.g.
- /// cl = EXTRACT_SUBREG reg1024, 1
- bool CanJoinExtractSubRegToPhysReg(unsigned DstReg, unsigned SrcReg,
- unsigned SubIdx, unsigned &RealDstReg);
-
- /// CanJoinInsertSubRegToPhysReg - Return true if it's possible to coalesce
- /// an insert_subreg where src is a physical register, e.g.
- /// reg1024 = INSERT_SUBREG reg1024, c1, 0
- bool CanJoinInsertSubRegToPhysReg(unsigned DstReg, unsigned SrcReg,
- unsigned SubIdx, unsigned &RealDstReg);
-
- /// ValueLiveAt - Return true if the LiveRange pointed to by the given
- /// iterator, or any subsequent range with the same value number,
- /// is live at the given point.
- bool ValueLiveAt(LiveInterval::iterator LRItr, LiveInterval::iterator LREnd,
- SlotIndex defPoint) const;
-
- /// RangeIsDefinedByCopyFromReg - Return true if the specified live range of
- /// the specified live interval is defined by a copy from the specified
- /// register.
- bool RangeIsDefinedByCopyFromReg(LiveInterval &li, LiveRange *LR,
- unsigned Reg);
-
/// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
/// update the subregister number if it is not zero. If DstReg is a
/// physical register and the existing subregister number of the def / use
/// being updated is not zero, make sure to set it to the correct physical
/// subregister.
- void UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx);
+ void UpdateRegDefsUses(const CoalescerPair &CP);
/// ShortenDeadCopyLiveRange - Shorten a live range defined by a dead copy.
/// Return true if live interval is removed.
@@ -238,6 +174,10 @@ namespace llvm {
/// it as well.
bool RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI);
+ /// RemoveCopyFlag - If DstReg is no longer defined by CopyMI, clear the
+ /// VNInfo copy flag for DstReg and all aliases.
+ void RemoveCopyFlag(unsigned DstReg, const MachineInstr *CopyMI);
+
/// lastRegisterUse - Returns the last use of the specific register between
/// cycles Start and End or NULL if there are no uses.
MachineOperand *lastRegisterUse(SlotIndex Start, SlotIndex End,
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 059e8d6c19aa..e90869d600dd 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -46,6 +46,8 @@ namespace {
Constant *UnregisterFn;
Constant *BuiltinSetjmpFn;
Constant *FrameAddrFn;
+ Constant *StackAddrFn;
+ Constant *StackRestoreFn;
Constant *LSDAAddrFn;
Value *PersonalityFn;
Constant *SelectorFn;
@@ -69,7 +71,7 @@ namespace {
void insertCallSiteStore(Instruction *I, int Number, Value *CallSite);
void markInvokeCallSite(InvokeInst *II, int InvokeNo, Value *CallSite,
SwitchInst *CatchSwitch);
- void splitLiveRangesLiveAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes);
+ void splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes);
bool insertSjLjEHSupport(Function &F);
};
} // end anonymous namespace
@@ -107,6 +109,8 @@ bool SjLjEHPass::doInitialization(Module &M) {
PointerType::getUnqual(FunctionContextTy),
(Type *)0);
FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress);
+ StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave);
+ StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
BuiltinSetjmpFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp);
LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda);
SelectorFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_selector);
@@ -175,8 +179,10 @@ static void MarkBlocksLiveIn(BasicBlock *BB, std::set<BasicBlock*> &LiveBBs) {
/// we spill into a stack location, guaranteeing that there is nothing live
/// across the unwind edge. This process also splits all critical edges
/// coming out of invoke's.
+/// FIXME: Move this function to a common utility file (Local.cpp?) so
+/// both SjLj and LowerInvoke can use it.
void SjLjEHPass::
-splitLiveRangesLiveAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) {
+splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) {
// First step, split all critical edges from invoke instructions.
for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
InvokeInst *II = Invokes[i];
@@ -198,16 +204,33 @@ splitLiveRangesLiveAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) {
++AfterAllocaInsertPt;
for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
AI != E; ++AI) {
- // This is always a no-op cast because we're casting AI to AI->getType() so
- // src and destination types are identical. BitCast is the only possibility.
- CastInst *NC = new BitCastInst(
- AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt);
- AI->replaceAllUsesWith(NC);
- // Normally its is forbidden to replace a CastInst's operand because it
- // could cause the opcode to reflect an illegal conversion. However, we're
- // replacing it here with the same value it was constructed with to simply
- // make NC its user.
- NC->setOperand(0, AI);
+ const Type *Ty = AI->getType();
+ // Aggregate types can't be cast, but are legal argument types, so we have
+ // to handle them differently. We use an extract/insert pair as a
+ // lightweight method to achieve the same goal.
+ if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
+ Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt);
+ Instruction *NI = InsertValueInst::Create(AI, EI, 0);
+ NI->insertAfter(EI);
+ AI->replaceAllUsesWith(NI);
+ // Set the operand of the instructions back to the AllocaInst.
+ EI->setOperand(0, AI);
+ NI->setOperand(0, AI);
+ } else {
+ // This is always a no-op cast because we're casting AI to AI->getType()
+ // so src and destination types are identical. BitCast is the only
+ // possibility.
+ CastInst *NC = new BitCastInst(
+ AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt);
+ AI->replaceAllUsesWith(NC);
+ // Set the operand of the cast instruction back to the AllocaInst.
+ // Normally it's forbidden to replace a CastInst's operand because it
+ // could cause the opcode to reflect an illegal conversion. However,
+ // we're replacing it here with the same value it was constructed with.
+ // We do this because the above replaceAllUsesWith() clobbered the
+ // operand, but we want this one to remain.
+ NC->setOperand(0, AI);
+ }
}
// Finally, scan the code looking for instructions with bad live ranges.
@@ -266,6 +289,9 @@ splitLiveRangesLiveAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) {
}
// If we decided we need a spill, do it.
+ // FIXME: Spilling this way is overkill, as it forces all uses of
+ // the value to be reloaded from the stack slot, even those that aren't
+ // in the unwind blocks. We should be more selective.
if (NeedsSpill) {
++NumSpilled;
DemoteRegToStack(*Inst, true);
@@ -294,22 +320,34 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
// If we don't have any invokes or unwinds, there's nothing to do.
if (Unwinds.empty() && Invokes.empty()) return false;
- // Find the eh.selector.* and eh.exception calls. We'll use the first
- // eh.selector to determine the right personality function to use. For
- // SJLJ, we always use the same personality for the whole function,
- // not on a per-selector basis.
+ // Find the eh.selector.*, eh.exception and alloca calls.
+ //
+ // Remember any allocas() that aren't in the entry block, as the
+ // jmpbuf saved SP will need to be updated for them.
+ //
+ // We'll use the first eh.selector to determine the right personality
+ // function to use. For SJLJ, we always use the same personality for the
+ // whole function, not on a per-selector basis.
// FIXME: That's a bit ugly. Better way?
SmallVector<CallInst*,16> EH_Selectors;
SmallVector<CallInst*,16> EH_Exceptions;
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ SmallVector<Instruction*,16> JmpbufUpdatePoints;
+ // Note: Skip the entry block since there's nothing there that interests
+ // us. eh.selector and eh.exception shouldn't ever be there, and we
+ // want to disregard any allocas that are there.
+ for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) {
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
if (CallInst *CI = dyn_cast<CallInst>(I)) {
if (CI->getCalledFunction() == SelectorFn) {
- if (!PersonalityFn) PersonalityFn = CI->getOperand(2);
+ if (!PersonalityFn) PersonalityFn = CI->getArgOperand(1);
EH_Selectors.push_back(CI);
} else if (CI->getCalledFunction() == ExceptionFn) {
EH_Exceptions.push_back(CI);
+ } else if (CI->getCalledFunction() == StackRestoreFn) {
+ JmpbufUpdatePoints.push_back(CI);
}
+ } else if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
+ JmpbufUpdatePoints.push_back(AI);
}
}
}
@@ -329,7 +367,7 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
// we spill into a stack location, guaranteeing that there is nothing live
// across the unwind edge. This process also splits all critical edges
// coming out of invoke's.
- splitLiveRangesLiveAcrossInvokes(Invokes);
+ splitLiveRangesAcrossInvokes(Invokes);
BasicBlock *EntryBB = F.begin();
// Create an alloca for the incoming jump buffer ptr and the new jump buffer
@@ -419,7 +457,7 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
// Populate the Function Context
// 1. LSDA address
// 2. Personality function address
- // 3. jmpbuf (save FP and call eh.sjlj.setjmp)
+ // 3. jmpbuf (save SP, FP and call eh.sjlj.setjmp)
// LSDA address
Idxs[0] = Zero;
@@ -440,31 +478,41 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
new StoreInst(PersonalityFn, PersonalityFieldPtr, true,
EntryBB->getTerminator());
- // Save the frame pointer.
+ // Save the frame pointer.
Idxs[1] = ConstantInt::get(Int32Ty, 5);
- Value *FieldPtr
+ Value *JBufPtr
= GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
"jbuf_gep",
EntryBB->getTerminator());
Idxs[1] = ConstantInt::get(Int32Ty, 0);
- Value *ElemPtr =
- GetElementPtrInst::Create(FieldPtr, Idxs, Idxs+2, "jbuf_fp_gep",
+ Value *FramePtr =
+ GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_fp_gep",
EntryBB->getTerminator());
Value *Val = CallInst::Create(FrameAddrFn,
ConstantInt::get(Int32Ty, 0),
"fp",
EntryBB->getTerminator());
- new StoreInst(Val, ElemPtr, true, EntryBB->getTerminator());
- // Call the setjmp instrinsic. It fills in the rest of the jmpbuf
+ new StoreInst(Val, FramePtr, true, EntryBB->getTerminator());
+
+ // Save the stack pointer.
+ Idxs[1] = ConstantInt::get(Int32Ty, 2);
+ Value *StackPtr =
+ GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_sp_gep",
+ EntryBB->getTerminator());
+
+ Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator());
+ new StoreInst(Val, StackPtr, true, EntryBB->getTerminator());
+
+ // Call the setjmp instrinsic. It fills in the rest of the jmpbuf.
Value *SetjmpArg =
- CastInst::Create(Instruction::BitCast, FieldPtr,
+ CastInst::Create(Instruction::BitCast, JBufPtr,
Type::getInt8PtrTy(F.getContext()), "",
EntryBB->getTerminator());
Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg,
"dispatch",
EntryBB->getTerminator());
- // check the return value of the setjmp. non-zero goes to dispatcher
+ // check the return value of the setjmp. non-zero goes to dispatcher.
Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
ICmpInst::ICMP_EQ, DispatchVal, Zero,
"notunwind");
@@ -509,6 +557,16 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
Unwinds[i]->eraseFromParent();
}
+ // Following any allocas not in the entry block, update the saved SP
+ // in the jmpbuf to the new value.
+ for (unsigned i = 0, e = JmpbufUpdatePoints.size(); i != e; ++i) {
+ Instruction *AI = JmpbufUpdatePoints[i];
+ Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
+ StackAddr->insertAfter(AI);
+ Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true);
+ StoreStackAddr->insertAfter(StackAddr);
+ }
+
// Finally, for any returns from this function, if this function contains an
// invoke, add a call to unregister the function context.
for (unsigned i = 0, e = Returns.size(); i != e; ++i)
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index 6110ef5d2f05..7a227cf02d57 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -213,9 +213,11 @@ void SlotIndexes::dump() const {
// Print a SlotIndex to a raw_ostream.
void SlotIndex::print(raw_ostream &os) const {
- os << getIndex();
+ os << entry().getIndex();
if (isPHI())
os << "*";
+ else
+ os << "LudS"[getSlot()];
}
// Dump a SlotIndex to stderr.
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index a7b2efe11825..56bcb2824ae8 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -14,18 +14,20 @@
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <set>
using namespace llvm;
namespace {
- enum SpillerName { trivial, standard, splitting };
+ enum SpillerName { trivial, standard, splitting, inline_ };
}
static cl::opt<SpillerName>
@@ -35,6 +37,7 @@ spillerOpt("spiller",
cl::values(clEnumVal(trivial, "trivial spiller"),
clEnumVal(standard, "default spiller"),
clEnumVal(splitting, "splitting spiller"),
+ clEnumValN(inline_, "inline", "inline spiller"),
clEnumValEnd),
cl::init(standard));
@@ -53,8 +56,8 @@ protected:
const TargetInstrInfo *tii;
const TargetRegisterInfo *tri;
VirtRegMap *vrm;
-
- /// Construct a spiller base.
+
+ /// Construct a spiller base.
SpillerBase(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm)
: mf(mf), lis(lis), vrm(vrm)
{
@@ -67,7 +70,8 @@ protected:
/// Add spill ranges for every use/def of the live interval, inserting loads
/// immediately before each use, and stores after each def. No folding or
/// remat is attempted.
- std::vector<LiveInterval*> trivialSpillEverywhere(LiveInterval *li) {
+ void trivialSpillEverywhere(LiveInterval *li,
+ std::vector<LiveInterval*> &newIntervals) {
DEBUG(dbgs() << "Spilling everywhere " << *li << "\n");
assert(li->weight != HUGE_VALF &&
@@ -78,8 +82,6 @@ protected:
DEBUG(dbgs() << "Trivial spill everywhere of reg" << li->reg << "\n");
- std::vector<LiveInterval*> added;
-
const TargetRegisterClass *trc = mri->getRegClass(li->reg);
unsigned ss = vrm->assignVirt2StackSlot(li->reg);
@@ -96,7 +98,7 @@ protected:
do {
++regItr;
} while (regItr != mri->reg_end() && (&*regItr == mi));
-
+
// Collect uses & defs for this instr.
SmallVector<unsigned, 2> indices;
bool hasUse = false;
@@ -116,7 +118,7 @@ protected:
vrm->assignVirt2StackSlot(newVReg, ss);
LiveInterval *newLI = &lis->getOrCreateInterval(newVReg);
newLI->weight = HUGE_VALF;
-
+
// Update the reg operands & kill flags.
for (unsigned i = 0; i < indices.size(); ++i) {
unsigned mopIdx = indices[i];
@@ -136,10 +138,10 @@ protected:
MachineInstr *loadInstr(prior(miItr));
SlotIndex loadIndex =
lis->InsertMachineInstrInMaps(loadInstr).getDefIndex();
+ vrm->addSpillSlotUse(ss, loadInstr);
SlotIndex endIndex = loadIndex.getNextIndex();
VNInfo *loadVNI =
newLI->getNextValue(loadIndex, 0, true, lis->getVNInfoAllocator());
- loadVNI->addKill(endIndex);
newLI->addRange(LiveRange(loadIndex, endIndex, loadVNI));
}
@@ -150,17 +152,15 @@ protected:
MachineInstr *storeInstr(llvm::next(miItr));
SlotIndex storeIndex =
lis->InsertMachineInstrInMaps(storeInstr).getDefIndex();
+ vrm->addSpillSlotUse(ss, storeInstr);
SlotIndex beginIndex = storeIndex.getPrevIndex();
VNInfo *storeVNI =
newLI->getNextValue(beginIndex, 0, true, lis->getVNInfoAllocator());
- storeVNI->addKill(storeIndex);
newLI->addRange(LiveRange(beginIndex, storeIndex, storeVNI));
}
- added.push_back(newLI);
+ newIntervals.push_back(newLI);
}
-
- return added;
}
};
@@ -176,11 +176,12 @@ public:
TrivialSpiller(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm)
: SpillerBase(mf, lis, vrm) {}
- std::vector<LiveInterval*> spill(LiveInterval *li,
- SmallVectorImpl<LiveInterval*> &spillIs,
- SlotIndex*) {
+ void spill(LiveInterval *li,
+ std::vector<LiveInterval*> &newIntervals,
+ SmallVectorImpl<LiveInterval*> &,
+ SlotIndex*) {
// Ignore spillIs - we don't use it.
- return trivialSpillEverywhere(li);
+ trivialSpillEverywhere(li, newIntervals);
}
};
@@ -200,10 +201,13 @@ public:
: lis(lis), loopInfo(loopInfo), vrm(vrm) {}
/// Falls back on LiveIntervals::addIntervalsForSpills.
- std::vector<LiveInterval*> spill(LiveInterval *li,
- SmallVectorImpl<LiveInterval*> &spillIs,
- SlotIndex*) {
- return lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm);
+ void spill(LiveInterval *li,
+ std::vector<LiveInterval*> &newIntervals,
+ SmallVectorImpl<LiveInterval*> &spillIs,
+ SlotIndex*) {
+ std::vector<LiveInterval*> added =
+ lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm);
+ newIntervals.insert(newIntervals.end(), added.begin(), added.end());
}
};
@@ -214,7 +218,7 @@ namespace {
/// When a call to spill is placed this spiller will first try to break the
/// interval up into its component values (one new interval per value).
/// If this fails, or if a call is placed to spill a previously split interval
-/// then the spiller falls back on the standard spilling mechanism.
+/// then the spiller falls back on the standard spilling mechanism.
class SplittingSpiller : public StandardSpiller {
public:
SplittingSpiller(MachineFunction *mf, LiveIntervals *lis,
@@ -226,22 +230,21 @@ public:
tri = mf->getTarget().getRegisterInfo();
}
- std::vector<LiveInterval*> spill(LiveInterval *li,
- SmallVectorImpl<LiveInterval*> &spillIs,
- SlotIndex *earliestStart) {
-
- if (worthTryingToSplit(li)) {
- return tryVNISplit(li, earliestStart);
- }
- // else
- return StandardSpiller::spill(li, spillIs, earliestStart);
+ void spill(LiveInterval *li,
+ std::vector<LiveInterval*> &newIntervals,
+ SmallVectorImpl<LiveInterval*> &spillIs,
+ SlotIndex *earliestStart) {
+ if (worthTryingToSplit(li))
+ tryVNISplit(li, earliestStart);
+ else
+ StandardSpiller::spill(li, newIntervals, spillIs, earliestStart);
}
private:
MachineRegisterInfo *mri;
const TargetInstrInfo *tii;
- const TargetRegisterInfo *tri;
+ const TargetRegisterInfo *tri;
DenseSet<LiveInterval*> alreadySplit;
bool worthTryingToSplit(LiveInterval *li) const {
@@ -258,18 +261,18 @@ private:
SmallVector<VNInfo*, 4> vnis;
std::copy(li->vni_begin(), li->vni_end(), std::back_inserter(vnis));
-
+
for (SmallVectorImpl<VNInfo*>::iterator vniItr = vnis.begin(),
vniEnd = vnis.end(); vniItr != vniEnd; ++vniItr) {
VNInfo *vni = *vniItr;
-
- // Skip unused VNIs, or VNIs with no kills.
- if (vni->isUnused() || vni->kills.empty())
+
+ // Skip unused VNIs.
+ if (vni->isUnused())
continue;
DEBUG(dbgs() << " Extracted Val #" << vni->id << " as ");
LiveInterval *splitInterval = extractVNI(li, vni);
-
+
if (splitInterval != 0) {
DEBUG(dbgs() << *splitInterval << "\n");
added.push_back(splitInterval);
@@ -281,12 +284,12 @@ private:
} else {
DEBUG(dbgs() << "0\n");
}
- }
+ }
DEBUG(dbgs() << "Original LI: " << *li << "\n");
// If there original interval still contains some live ranges
- // add it to added and alreadySplit.
+ // add it to added and alreadySplit.
if (!li->empty()) {
added.push_back(li);
alreadySplit.insert(li);
@@ -302,16 +305,15 @@ private:
/// Extract the given value number from the interval.
LiveInterval* extractVNI(LiveInterval *li, VNInfo *vni) const {
assert(vni->isDefAccurate() || vni->isPHIDef());
- assert(!vni->kills.empty());
- // Create a new vreg and live interval, copy VNI kills & ranges over.
+ // Create a new vreg and live interval, copy VNI ranges over.
const TargetRegisterClass *trc = mri->getRegClass(li->reg);
unsigned newVReg = mri->createVirtualRegister(trc);
vrm->grow();
LiveInterval *newLI = &lis->getOrCreateInterval(newVReg);
VNInfo *newVNI = newLI->createValueCopy(vni, lis->getVNInfoAllocator());
- // Start by copying all live ranges in the VN to the new interval.
+ // Start by copying all live ranges in the VN to the new interval.
for (LiveInterval::iterator rItr = li->begin(), rEnd = li->end();
rItr != rEnd; ++rItr) {
if (rItr->valno == vni) {
@@ -319,7 +321,7 @@ private:
}
}
- // Erase the old VNI & ranges.
+ // Erase the old VNI & ranges.
li->removeValNo(vni);
// Collect all current uses of the register belonging to the given VNI.
@@ -336,15 +338,13 @@ private:
// Insert a copy at the start of the MBB. The range proceeding the
// copy will be attached to the original LiveInterval.
MachineBasicBlock *defMBB = lis->getMBBFromIndex(newVNI->def);
- tii->copyRegToReg(*defMBB, defMBB->begin(), newVReg, li->reg, trc, trc,
- DebugLoc());
- MachineInstr *copyMI = defMBB->begin();
- copyMI->addRegisterKilled(li->reg, tri);
+ MachineInstr *copyMI = BuildMI(*defMBB, defMBB->begin(), DebugLoc(),
+ tii->get(TargetOpcode::COPY), newVReg)
+ .addReg(li->reg, RegState::Kill);
SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
VNInfo *phiDefVNI = li->getNextValue(lis->getMBBStartIdx(defMBB),
0, false, lis->getVNInfoAllocator());
phiDefVNI->setIsPHIDef(true);
- phiDefVNI->addKill(copyIdx.getDefIndex());
li->addRange(LiveRange(phiDefVNI->def, copyIdx.getDefIndex(), phiDefVNI));
LiveRange *oldPHIDefRange =
newLI->getLiveRangeContaining(lis->getMBBStartIdx(defMBB));
@@ -367,8 +367,8 @@ private:
newVNI->setIsPHIDef(false); // not a PHI def anymore.
newVNI->setIsDefAccurate(true);
} else {
- // non-PHI def. Rename the def. If it's two-addr that means renaming the use
- // and inserting a new copy too.
+ // non-PHI def. Rename the def. If it's two-addr that means renaming the
+ // use and inserting a new copy too.
MachineInstr *defInst = lis->getInstructionFromIndex(newVNI->def);
// We'll rename this now, so we can remove it from uses.
uses.erase(defInst);
@@ -384,38 +384,26 @@ private:
twoAddrUseIsUndef = true;
}
}
-
+
SlotIndex defIdx = lis->getInstructionIndex(defInst);
newVNI->def = defIdx.getDefIndex();
if (isTwoAddr && !twoAddrUseIsUndef) {
MachineBasicBlock *defMBB = defInst->getParent();
- tii->copyRegToReg(*defMBB, defInst, newVReg, li->reg, trc, trc,
- DebugLoc());
- MachineInstr *copyMI = prior(MachineBasicBlock::iterator(defInst));
+ MachineInstr *copyMI = BuildMI(*defMBB, defInst, DebugLoc(),
+ tii->get(TargetOpcode::COPY), newVReg)
+ .addReg(li->reg, RegState::Kill);
SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
- copyMI->addRegisterKilled(li->reg, tri);
LiveRange *origUseRange =
li->getLiveRangeContaining(newVNI->def.getUseIndex());
- VNInfo *origUseVNI = origUseRange->valno;
origUseRange->end = copyIdx.getDefIndex();
- bool updatedKills = false;
- for (unsigned k = 0; k < origUseVNI->kills.size(); ++k) {
- if (origUseVNI->kills[k] == defIdx.getDefIndex()) {
- origUseVNI->kills[k] = copyIdx.getDefIndex();
- updatedKills = true;
- break;
- }
- }
- assert(updatedKills && "Failed to update VNI kill list.");
VNInfo *copyVNI = newLI->getNextValue(copyIdx.getDefIndex(), copyMI,
true, lis->getVNInfoAllocator());
- copyVNI->addKill(defIdx.getDefIndex());
LiveRange copyRange(copyIdx.getDefIndex(),defIdx.getDefIndex(),copyVNI);
newLI->addRange(copyRange);
- }
+ }
}
-
+
for (std::set<MachineInstr*>::iterator
usesItr = uses.begin(), usesEnd = uses.end();
usesItr != usesEnd; ++usesItr) {
@@ -435,7 +423,7 @@ private:
// Check if this instr is two address.
unsigned useOpIdx = useInst->findRegisterUseOperandIdx(li->reg);
bool isTwoAddress = useInst->isRegTiedToDefOperand(useOpIdx);
-
+
// Rename uses (and defs for two-address instrs).
for (unsigned i = 0; i < useInst->getNumOperands(); ++i) {
MachineOperand &mo = useInst->getOperand(i);
@@ -451,10 +439,9 @@ private:
// reg.
MachineBasicBlock *useMBB = useInst->getParent();
MachineBasicBlock::iterator useItr(useInst);
- tii->copyRegToReg(*useMBB, llvm::next(useItr), li->reg, newVReg, trc, trc,
- DebugLoc());
- MachineInstr *copyMI = llvm::next(useItr);
- copyMI->addRegisterKilled(newVReg, tri);
+ MachineInstr *copyMI = BuildMI(*useMBB, llvm::next(useItr), DebugLoc(),
+ tii->get(TargetOpcode::COPY), newVReg)
+ .addReg(li->reg, RegState::Kill);
SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
// Change the old two-address defined range & vni to start at
@@ -470,56 +457,44 @@ private:
VNInfo *copyVNI =
newLI->getNextValue(useIdx.getDefIndex(), 0, true,
lis->getVNInfoAllocator());
- copyVNI->addKill(copyIdx.getDefIndex());
LiveRange copyRange(useIdx.getDefIndex(),copyIdx.getDefIndex(),copyVNI);
newLI->addRange(copyRange);
}
}
-
- // Iterate over any PHI kills - we'll need to insert new copies for them.
- for (VNInfo::KillSet::iterator
- killItr = newVNI->kills.begin(), killEnd = newVNI->kills.end();
- killItr != killEnd; ++killItr) {
- SlotIndex killIdx(*killItr);
- if (killItr->isPHI()) {
- MachineBasicBlock *killMBB = lis->getMBBFromIndex(killIdx);
- LiveRange *oldKillRange =
- newLI->getLiveRangeContaining(killIdx);
-
- assert(oldKillRange != 0 && "No kill range?");
-
- tii->copyRegToReg(*killMBB, killMBB->getFirstTerminator(),
- li->reg, newVReg, trc, trc,
- DebugLoc());
- MachineInstr *copyMI = prior(killMBB->getFirstTerminator());
- copyMI->addRegisterKilled(newVReg, tri);
- SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
- // Save the current end. We may need it to add a new range if the
- // current range runs of the end of the MBB.
- SlotIndex newKillRangeEnd = oldKillRange->end;
- oldKillRange->end = copyIdx.getDefIndex();
+ // Iterate over any PHI kills - we'll need to insert new copies for them.
+ for (LiveInterval::iterator LRI = newLI->begin(), LRE = newLI->end();
+ LRI != LRE; ++LRI) {
+ if (LRI->valno != newVNI || LRI->end.isPHI())
+ continue;
+ SlotIndex killIdx = LRI->end;
+ MachineBasicBlock *killMBB = lis->getMBBFromIndex(killIdx);
+ MachineInstr *copyMI = BuildMI(*killMBB, killMBB->getFirstTerminator(),
+ DebugLoc(), tii->get(TargetOpcode::COPY),
+ li->reg)
+ .addReg(newVReg, RegState::Kill);
+ SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
- if (newKillRangeEnd != lis->getMBBEndIdx(killMBB)) {
- assert(newKillRangeEnd > lis->getMBBEndIdx(killMBB) &&
- "PHI kill range doesn't reach kill-block end. Not sane.");
- newLI->addRange(LiveRange(lis->getMBBEndIdx(killMBB),
- newKillRangeEnd, newVNI));
- }
+ // Save the current end. We may need it to add a new range if the
+ // current range runs of the end of the MBB.
+ SlotIndex newKillRangeEnd = LRI->end;
+ LRI->end = copyIdx.getDefIndex();
- *killItr = oldKillRange->end;
- VNInfo *newKillVNI = li->getNextValue(copyIdx.getDefIndex(),
- copyMI, true,
- lis->getVNInfoAllocator());
- newKillVNI->addKill(lis->getMBBTerminatorGap(killMBB));
- newKillVNI->setHasPHIKill(true);
- li->addRange(LiveRange(copyIdx.getDefIndex(),
- lis->getMBBEndIdx(killMBB),
- newKillVNI));
+ if (newKillRangeEnd != lis->getMBBEndIdx(killMBB)) {
+ assert(newKillRangeEnd > lis->getMBBEndIdx(killMBB) &&
+ "PHI kill range doesn't reach kill-block end. Not sane.");
+ newLI->addRange(LiveRange(lis->getMBBEndIdx(killMBB),
+ newKillRangeEnd, newVNI));
}
+ VNInfo *newKillVNI = li->getNextValue(copyIdx.getDefIndex(),
+ copyMI, true,
+ lis->getVNInfoAllocator());
+ newKillVNI->setHasPHIKill(true);
+ li->addRange(LiveRange(copyIdx.getDefIndex(),
+ lis->getMBBEndIdx(killMBB),
+ newKillVNI));
}
-
newVNI->setHasPHIKill(false);
return newLI;
@@ -530,6 +505,13 @@ private:
} // end anonymous namespace
+namespace llvm {
+Spiller *createInlineSpiller(MachineFunction*,
+ LiveIntervals*,
+ const MachineLoopInfo*,
+ VirtRegMap*);
+}
+
llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis,
const MachineLoopInfo *loopInfo,
VirtRegMap *vrm) {
@@ -538,5 +520,6 @@ llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis,
case trivial: return new TrivialSpiller(mf, lis, vrm);
case standard: return new StandardSpiller(lis, loopInfo, vrm);
case splitting: return new SplittingSpiller(mf, lis, loopInfo, vrm);
+ case inline_: return createInlineSpiller(mf, lis, loopInfo, vrm);
}
}
diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h
index dda52e871fea..450447b3933a 100644
--- a/lib/CodeGen/Spiller.h
+++ b/lib/CodeGen/Spiller.h
@@ -33,11 +33,19 @@ namespace llvm {
public:
virtual ~Spiller() = 0;
- /// Spill the given live range. The method used will depend on the Spiller
- /// implementation selected.
- virtual std::vector<LiveInterval*> spill(LiveInterval *li,
- SmallVectorImpl<LiveInterval*> &spillIs,
- SlotIndex *earliestIndex = 0) = 0;
+ /// spill - Spill the given live interval. The method used will depend on
+ /// the Spiller implementation selected.
+ ///
+ /// @param li The live interval to be spilled.
+ /// @param spillIs A list of intervals that are about to be spilled,
+ /// and so cannot be used for remat etc.
+ /// @param newIntervals The newly created intervals will be appended here.
+ /// @param earliestIndex The earliest point for splitting. (OK, it's another
+ /// pointer to the allocator guts).
+ virtual void spill(LiveInterval *li,
+ std::vector<LiveInterval*> &newIntervals,
+ SmallVectorImpl<LiveInterval*> &spillIs,
+ SlotIndex *earliestIndex = 0) = 0;
};
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index 8a6a727a1f97..ca5c28ce010c 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -136,7 +136,7 @@ bool StackProtector::RequiresStackProtector() const {
bool StackProtector::InsertStackProtectors() {
BasicBlock *FailBB = 0; // The basic block to jump to if check fails.
AllocaInst *AI = 0; // Place on stack that stores the stack guard.
- Constant *StackGuardVar = 0; // The stack guard variable.
+ Value *StackGuardVar = 0; // The stack guard variable.
for (Function::iterator I = F->begin(), E = F->end(); I != E; ) {
BasicBlock *BB = I++;
@@ -153,9 +153,17 @@ bool StackProtector::InsertStackProtectors() {
// StackGuard = load __stack_chk_guard
// call void @llvm.stackprotect.create(StackGuard, StackGuardSlot)
//
- PointerType *PtrTy = PointerType::getUnqual(
- Type::getInt8Ty(RI->getContext()));
- StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy);
+ const PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext());
+ unsigned AddressSpace, Offset;
+ if (TLI->getStackCookieLocation(AddressSpace, Offset)) {
+ Constant *OffsetVal =
+ ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset);
+
+ StackGuardVar = ConstantExpr::getIntToPtr(OffsetVal,
+ PointerType::get(PtrTy, AddressSpace));
+ } else {
+ StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy);
+ }
BasicBlock &Entry = F->getEntryBlock();
Instruction *InsPt = &Entry.front();
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index 7f3b452f0a5a..eff3c33e3daa 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -508,8 +509,7 @@ bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII,
// Abort the use is actually a sub-register def. We don't have enough
// information to figure out if it is really legal.
- if (MO.getSubReg() || MII->isExtractSubreg() ||
- MII->isInsertSubreg() || MII->isSubregToReg())
+ if (MO.getSubReg() || MII->isSubregToReg())
return false;
const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI);
@@ -571,7 +571,7 @@ bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII,
// Abort the use is actually a sub-register use. We don't have enough
// information to figure out if it is really legal.
- if (MO.getSubReg() || MII->isExtractSubreg())
+ if (MO.getSubReg())
return false;
const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI);
@@ -610,8 +610,8 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
DEBUG(MI->dump());
++NumLoadElim;
} else {
- TII->copyRegToReg(*MBB, MI, DstReg, Reg, RC, RC,
- MI->getDebugLoc());
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ DstReg).addReg(Reg);
++NumRegRepl;
}
@@ -627,8 +627,8 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
DEBUG(MI->dump());
++NumStoreElim;
} else {
- TII->copyRegToReg(*MBB, MI, Reg, SrcReg, RC, RC,
- MI->getDebugLoc());
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY), Reg)
+ .addReg(SrcReg);
++NumRegRepl;
}
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
index 142398cc1642..59315cf67282 100644
--- a/lib/CodeGen/StrongPHIElimination.cpp
+++ b/lib/CodeGen/StrongPHIElimination.cpp
@@ -25,6 +25,7 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterCoalescer.h"
@@ -695,9 +696,8 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB,
// Insert copy from curr.second to a temporary at
// the Phi defining curr.second
MachineBasicBlock::iterator PI = MRI.getVRegDef(curr.second);
- TII->copyRegToReg(*PI->getParent(), PI, t,
- curr.second, RC, RC, DebugLoc());
-
+ BuildMI(*PI->getParent(), PI, DebugLoc(), TII->get(TargetOpcode::COPY),
+ t).addReg(curr.second);
DEBUG(dbgs() << "Inserted copy from " << curr.second << " to " << t
<< "\n");
@@ -712,8 +712,8 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB,
}
// Insert copy from map[curr.first] to curr.second
- TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), curr.second,
- map[curr.first], RC, RC, DebugLoc());
+ BuildMI(*MBB, MBB->getFirstTerminator(), DebugLoc(),
+ TII->get(TargetOpcode::COPY), curr.second).addReg(map[curr.first]);
map[curr.first] = curr.second;
DEBUG(dbgs() << "Inserted copy from " << curr.first << " to "
<< curr.second << "\n");
@@ -761,8 +761,8 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB,
// Insert a copy from dest to a new temporary t at the end of b
unsigned t = MF->getRegInfo().createVirtualRegister(RC);
- TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), t,
- curr.second, RC, RC, DebugLoc());
+ BuildMI(*MBB, MBB->getFirstTerminator(), DebugLoc(),
+ TII->get(TargetOpcode::COPY), t).addReg(curr.second);
map[curr.second] = t;
MachineBasicBlock::iterator TI = MBB->getFirstTerminator();
@@ -830,9 +830,6 @@ void StrongPHIElimination::InsertCopies(MachineDomTreeNode* MDTN,
LiveInterval& Int = LI.getInterval(I->getOperand(i).getReg());
VNInfo* FirstVN = *Int.vni_begin();
FirstVN->setHasPHIKill(false);
- if (I->getOperand(i).isKill())
- FirstVN->addKill(LI.getInstructionIndex(I).getUseIndex());
-
LiveRange LR (LI.getMBBStartIdx(I->getParent()),
LI.getInstructionIndex(I).getUseIndex().getNextSlot(),
FirstVN);
@@ -959,9 +956,8 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
} else {
// Insert a last-minute copy if a conflict was detected.
const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
- const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(I->first);
- TII->copyRegToReg(*SI->second, SI->second->getFirstTerminator(),
- I->first, SI->first, RC, RC, DebugLoc());
+ BuildMI(*SI->second, SI->second->getFirstTerminator(), DebugLoc(),
+ TII->get(TargetOpcode::COPY), I->first).addReg(SI->first);
LI.renumber();
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index f2e2a76f00eb..075db803bd23 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineSSAUpdater.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -559,11 +560,9 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
}
MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
- const TargetRegisterClass *RC = MRI->getRegClass(CopyInfos[i].first);
- TII->copyRegToReg(*PredBB, Loc, CopyInfos[i].first,
- CopyInfos[i].second, RC,RC, DebugLoc());
- MachineInstr *CopyMI = prior(Loc);
- Copies.push_back(CopyMI);
+ Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ CopyInfos[i].first).addReg(CopyInfos[i].second));
}
NumInstrDups += TailBB->size() - 1; // subtract one for removed branch
@@ -618,11 +617,10 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
}
MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator();
for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
- const TargetRegisterClass *RC = MRI->getRegClass(CopyInfos[i].first);
- TII->copyRegToReg(*PrevBB, Loc, CopyInfos[i].first,
- CopyInfos[i].second, RC, RC, DebugLoc());
- MachineInstr *CopyMI = prior(Loc);
- Copies.push_back(CopyMI);
+ Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ CopyInfos[i].first)
+ .addReg(CopyInfos[i].second));
}
} else {
// No PHIs to worry about, just splice the instructions over.
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index 0ad6619ac4fd..cdacb98e0e88 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/ADT/SmallVector.h"
@@ -21,11 +22,34 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PostRAHazardRecognizer.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
+/// after it, replacing it with an unconditional branch to NewDest.
+void
+TargetInstrInfoImpl::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+ MachineBasicBlock *NewDest) const {
+ MachineBasicBlock *MBB = Tail->getParent();
+
+ // Remove all the old successors of MBB from the CFG.
+ while (!MBB->succ_empty())
+ MBB->removeSuccessor(MBB->succ_begin());
+
+ // Remove all the dead instructions from the end of MBB.
+ MBB->erase(Tail, MBB->end());
+
+ // If MBB isn't immediately before MBB, insert a branch to it.
+ if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(NewDest))
+ InsertBranch(*MBB, NewDest, 0, SmallVector<MachineOperand, 0>(),
+ Tail->getDebugLoc());
+ MBB->addSuccessor(NewDest);
+}
+
// commuteInstruction - The default implementation of this method just exchanges
// the two operands returned by findCommutedOpIndices.
MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
@@ -136,17 +160,9 @@ void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB,
unsigned DestReg,
unsigned SubIdx,
const MachineInstr *Orig,
- const TargetRegisterInfo *TRI) const {
+ const TargetRegisterInfo &TRI) const {
MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
- MachineOperand &MO = MI->getOperand(0);
- if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
- MO.setReg(DestReg);
- MO.setSubReg(SubIdx);
- } else if (SubIdx) {
- MO.setReg(TRI->getSubReg(DestReg, SubIdx));
- } else {
- MO.setReg(DestReg);
- }
+ MI->substituteRegister(MI->getOperand(0).getReg(), DestReg, SubIdx, TRI);
MBB.insert(I, MI);
}
@@ -175,6 +191,47 @@ TargetInstrInfoImpl::GetFunctionSizeInBytes(const MachineFunction &MF) const {
return FnSize;
}
+// If the COPY instruction in MI can be folded to a stack operation, return
+// the register class to use.
+static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI,
+ unsigned FoldIdx) {
+ assert(MI->isCopy() && "MI must be a COPY instruction");
+ if (MI->getNumOperands() != 2)
+ return 0;
+ assert(FoldIdx<2 && "FoldIdx refers no nonexistent operand");
+
+ const MachineOperand &FoldOp = MI->getOperand(FoldIdx);
+ const MachineOperand &LiveOp = MI->getOperand(1-FoldIdx);
+
+ if (FoldOp.getSubReg() || LiveOp.getSubReg())
+ return 0;
+
+ unsigned FoldReg = FoldOp.getReg();
+ unsigned LiveReg = LiveOp.getReg();
+
+ assert(TargetRegisterInfo::isVirtualRegister(FoldReg) &&
+ "Cannot fold physregs");
+
+ const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ const TargetRegisterClass *RC = MRI.getRegClass(FoldReg);
+
+ if (TargetRegisterInfo::isPhysicalRegister(LiveOp.getReg()))
+ return RC->contains(LiveOp.getReg()) ? RC : 0;
+
+ const TargetRegisterClass *LiveRC = MRI.getRegClass(LiveReg);
+ if (RC == LiveRC || RC->hasSubClass(LiveRC))
+ return RC;
+
+ // FIXME: Allow folding when register classes are memory compatible.
+ return 0;
+}
+
+bool TargetInstrInfoImpl::
+canFoldMemoryOperand(const MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops) const {
+ return MI->isCopy() && Ops.size() == 1 && canFoldCopy(MI, Ops[0]);
+}
+
/// foldMemoryOperand - Attempt to fold a load or store of the specified stack
/// slot into the specified machine instruction for the specified operand(s).
/// If this is possible, a new instruction is returned with the specified
@@ -182,10 +239,9 @@ TargetInstrInfoImpl::GetFunctionSizeInBytes(const MachineFunction &MF) const {
/// removing the old instruction and adding the new one in the instruction
/// stream.
MachineInstr*
-TargetInstrInfo::foldMemoryOperand(MachineFunction &MF,
- MachineInstr* MI,
+TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const {
+ int FI) const {
unsigned Flags = 0;
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
if (MI->getOperand(Ops[i]).isDef())
@@ -193,34 +249,56 @@ TargetInstrInfo::foldMemoryOperand(MachineFunction &MF,
else
Flags |= MachineMemOperand::MOLoad;
+ MachineBasicBlock *MBB = MI->getParent();
+ assert(MBB && "foldMemoryOperand needs an inserted instruction");
+ MachineFunction &MF = *MBB->getParent();
+
// Ask the target to do the actual folding.
- MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FrameIndex);
- if (!NewMI) return 0;
+ if (MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FI)) {
+ // Add a memory operand, foldMemoryOperandImpl doesn't do that.
+ assert((!(Flags & MachineMemOperand::MOStore) ||
+ NewMI->getDesc().mayStore()) &&
+ "Folded a def to a non-store!");
+ assert((!(Flags & MachineMemOperand::MOLoad) ||
+ NewMI->getDesc().mayLoad()) &&
+ "Folded a use to a non-load!");
+ const MachineFrameInfo &MFI = *MF.getFrameInfo();
+ assert(MFI.getObjectOffset(FI) != -1);
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
+ Flags, /*Offset=*/0,
+ MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ NewMI->addMemOperand(MF, MMO);
- assert((!(Flags & MachineMemOperand::MOStore) ||
- NewMI->getDesc().mayStore()) &&
- "Folded a def to a non-store!");
- assert((!(Flags & MachineMemOperand::MOLoad) ||
- NewMI->getDesc().mayLoad()) &&
- "Folded a use to a non-load!");
- const MachineFrameInfo &MFI = *MF.getFrameInfo();
- assert(MFI.getObjectOffset(FrameIndex) != -1);
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIndex),
- Flags, /*Offset=*/0,
- MFI.getObjectSize(FrameIndex),
- MFI.getObjectAlignment(FrameIndex));
- NewMI->addMemOperand(MF, MMO);
+ // FIXME: change foldMemoryOperandImpl semantics to also insert NewMI.
+ return MBB->insert(MI, NewMI);
+ }
- return NewMI;
+ // Straight COPY may fold as load/store.
+ if (!MI->isCopy() || Ops.size() != 1)
+ return 0;
+
+ const TargetRegisterClass *RC = canFoldCopy(MI, Ops[0]);
+ if (!RC)
+ return 0;
+
+ const MachineOperand &MO = MI->getOperand(1-Ops[0]);
+ MachineBasicBlock::iterator Pos = MI;
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+
+ if (Flags == MachineMemOperand::MOStore)
+ storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI);
+ else
+ loadRegFromStackSlot(*MBB, Pos, MO.getReg(), FI, RC, TRI);
+ return --Pos;
}
/// foldMemoryOperand - Same as the previous version except it allows folding
/// of any load and store from / to any address, not just from a specific
/// stack slot.
MachineInstr*
-TargetInstrInfo::foldMemoryOperand(MachineFunction &MF,
- MachineInstr* MI,
+TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
const SmallVectorImpl<unsigned> &Ops,
MachineInstr* LoadMI) const {
assert(LoadMI->getDesc().canFoldAsLoad() && "LoadMI isn't foldable!");
@@ -228,11 +306,15 @@ TargetInstrInfo::foldMemoryOperand(MachineFunction &MF,
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
assert(MI->getOperand(Ops[i]).isUse() && "Folding load into def!");
#endif
+ MachineBasicBlock &MBB = *MI->getParent();
+ MachineFunction &MF = *MBB.getParent();
// Ask the target to do the actual folding.
MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, LoadMI);
if (!NewMI) return 0;
+ NewMI = MBB.insert(MI, NewMI);
+
// Copy the memoperands from the load to the folded instruction.
NewMI->setMemRefs(LoadMI->memoperands_begin(),
LoadMI->memoperands_end());
@@ -240,11 +322,9 @@ TargetInstrInfo::foldMemoryOperand(MachineFunction &MF,
return NewMI;
}
-bool
-TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(const MachineInstr *
- MI,
- AliasAnalysis *
- AA) const {
+bool TargetInstrInfo::
+isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
+ AliasAnalysis *AA) const {
const MachineFunction &MF = *MI->getParent()->getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
const TargetMachine &TM = MF.getTarget();
@@ -324,3 +404,31 @@ TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(const MachineInstr *
// Everything checked out.
return true;
}
+
+/// isSchedulingBoundary - Test if the given instruction should be
+/// considered a scheduling boundary. This primarily includes labels
+/// and terminators.
+bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const{
+ // Terminators and labels can't be scheduled around.
+ if (MI->getDesc().isTerminator() || MI->isLabel())
+ return true;
+
+ // Don't attempt to schedule around any instruction that defines
+ // a stack-oriented pointer, as it's unlikely to be profitable. This
+ // saves compile time, because it doesn't require every single
+ // stack slot reference to depend on the instruction that does the
+ // modification.
+ const TargetLowering &TLI = *MF.getTarget().getTargetLowering();
+ if (MI->definesRegister(TLI.getStackPointerRegisterToSaveRestore()))
+ return true;
+
+ return false;
+}
+
+// Default implementation of CreateTargetPostRAHazardRecognizer.
+ScheduleHazardRecognizer *TargetInstrInfoImpl::
+CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const {
+ return (ScheduleHazardRecognizer *)new PostRAHazardRecognizer(II);
+}
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 71ad3fb6f99f..a80cfc4b256f 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -825,32 +825,32 @@ void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
TargetLoweringObjectFile::Initialize(Ctx, TM);
TextSection =
getContext().getCOFFSection(".text",
- MCSectionCOFF::IMAGE_SCN_CNT_CODE |
- MCSectionCOFF::IMAGE_SCN_MEM_EXECUTE |
- MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ COFF::IMAGE_SCN_CNT_CODE |
+ COFF::IMAGE_SCN_MEM_EXECUTE |
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getText());
DataSection =
getContext().getCOFFSection(".data",
- MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- MCSectionCOFF::IMAGE_SCN_MEM_READ |
- MCSectionCOFF::IMAGE_SCN_MEM_WRITE,
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
SectionKind::getDataRel());
ReadOnlySection =
getContext().getCOFFSection(".rdata",
- MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getReadOnly());
StaticCtorSection =
getContext().getCOFFSection(".ctors",
- MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- MCSectionCOFF::IMAGE_SCN_MEM_READ |
- MCSectionCOFF::IMAGE_SCN_MEM_WRITE,
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
SectionKind::getDataRel());
StaticDtorSection =
getContext().getCOFFSection(".dtors",
- MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- MCSectionCOFF::IMAGE_SCN_MEM_READ |
- MCSectionCOFF::IMAGE_SCN_MEM_WRITE,
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
SectionKind::getDataRel());
// FIXME: We're emitting LSDA info into a readonly section on COFF, even
@@ -859,76 +859,76 @@ void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
// adjusted or this should be a data section.
LSDASection =
getContext().getCOFFSection(".gcc_except_table",
- MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getReadOnly());
EHFrameSection =
getContext().getCOFFSection(".eh_frame",
- MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- MCSectionCOFF::IMAGE_SCN_MEM_READ |
- MCSectionCOFF::IMAGE_SCN_MEM_WRITE,
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
SectionKind::getDataRel());
// Debug info.
DwarfAbbrevSection =
getContext().getCOFFSection(".debug_abbrev",
- MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
- MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata());
DwarfInfoSection =
getContext().getCOFFSection(".debug_info",
- MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
- MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata());
DwarfLineSection =
getContext().getCOFFSection(".debug_line",
- MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
- MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata());
DwarfFrameSection =
getContext().getCOFFSection(".debug_frame",
- MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
- MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata());
DwarfPubNamesSection =
getContext().getCOFFSection(".debug_pubnames",
- MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
- MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata());
DwarfPubTypesSection =
getContext().getCOFFSection(".debug_pubtypes",
- MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
- MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata());
DwarfStrSection =
getContext().getCOFFSection(".debug_str",
- MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
- MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata());
DwarfLocSection =
getContext().getCOFFSection(".debug_loc",
- MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
- MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata());
DwarfARangesSection =
getContext().getCOFFSection(".debug_aranges",
- MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
- MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata());
DwarfRangesSection =
getContext().getCOFFSection(".debug_ranges",
- MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
- MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata());
DwarfMacroInfoSection =
getContext().getCOFFSection(".debug_macinfo",
- MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
- MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata());
DrectveSection =
getContext().getCOFFSection(".drectve",
- MCSectionCOFF::IMAGE_SCN_LNK_INFO,
+ COFF::IMAGE_SCN_LNK_INFO,
SectionKind::getMetadata());
}
@@ -936,27 +936,27 @@ static unsigned
getCOFFSectionFlags(SectionKind K) {
unsigned Flags = 0;
- if (!K.isMetadata())
+ if (K.isMetadata())
Flags |=
- MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE;
+ COFF::IMAGE_SCN_MEM_DISCARDABLE;
else if (K.isText())
Flags |=
- MCSectionCOFF::IMAGE_SCN_MEM_EXECUTE |
- MCSectionCOFF::IMAGE_SCN_CNT_CODE;
+ COFF::IMAGE_SCN_MEM_EXECUTE |
+ COFF::IMAGE_SCN_CNT_CODE;
else if (K.isBSS ())
Flags |=
- MCSectionCOFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA |
- MCSectionCOFF::IMAGE_SCN_MEM_READ |
- MCSectionCOFF::IMAGE_SCN_MEM_WRITE;
+ COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE;
else if (K.isReadOnly())
Flags |=
- MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- MCSectionCOFF::IMAGE_SCN_MEM_READ;
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ;
else if (K.isWriteable())
Flags |=
- MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- MCSectionCOFF::IMAGE_SCN_MEM_READ |
- MCSectionCOFF::IMAGE_SCN_MEM_WRITE;
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE;
return Flags;
}
@@ -995,10 +995,10 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
unsigned Characteristics = getCOFFSectionFlags(Kind);
- Characteristics |= MCSectionCOFF::IMAGE_SCN_LNK_COMDAT;
+ Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
return getContext().getCOFFSection(Name.str(), Characteristics,
- MCSectionCOFF::IMAGE_COMDAT_SELECT_EXACT_MATCH, Kind);
+ COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH, Kind);
}
if (Kind.isText())
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 3d10dc13d20b..564914373bb5 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -33,6 +33,7 @@
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -381,7 +382,7 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
DstReg = 0;
unsigned SrcSubIdx, DstSubIdx;
if (!TII->isMoveInstr(MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
- if (MI.isExtractSubreg()) {
+ if (MI.isCopy()) {
DstReg = MI.getOperand(0).getReg();
SrcReg = MI.getOperand(1).getReg();
} else if (MI.isInsertSubreg()) {
@@ -897,6 +898,108 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
}
}
}
+
+ // If this is an instruction with a load folded into it, try unfolding
+ // the load, e.g. avoid this:
+ // movq %rdx, %rcx
+ // addq (%rax), %rcx
+ // in favor of this:
+ // movq (%rax), %rcx
+ // addq %rdx, %rcx
+ // because it's preferable to schedule a load than a register copy.
+ if (TID.mayLoad() && !regBKilled) {
+ // Determine if a load can be unfolded.
+ unsigned LoadRegIndex;
+ unsigned NewOpc =
+ TII->getOpcodeAfterMemoryUnfold(mi->getOpcode(),
+ /*UnfoldLoad=*/true,
+ /*UnfoldStore=*/false,
+ &LoadRegIndex);
+ if (NewOpc != 0) {
+ const TargetInstrDesc &UnfoldTID = TII->get(NewOpc);
+ if (UnfoldTID.getNumDefs() == 1) {
+ MachineFunction &MF = *mbbi->getParent();
+
+ // Unfold the load.
+ DEBUG(dbgs() << "2addr: UNFOLDING: " << *mi);
+ const TargetRegisterClass *RC =
+ UnfoldTID.OpInfo[LoadRegIndex].getRegClass(TRI);
+ unsigned Reg = MRI->createVirtualRegister(RC);
+ SmallVector<MachineInstr *, 2> NewMIs;
+ if (!TII->unfoldMemoryOperand(MF, mi, Reg,
+ /*UnfoldLoad=*/true,/*UnfoldStore=*/false,
+ NewMIs)) {
+ DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
+ return false;
+ }
+ assert(NewMIs.size() == 2 &&
+ "Unfolded a load into multiple instructions!");
+ // The load was previously folded, so this is the only use.
+ NewMIs[1]->addRegisterKilled(Reg, TRI);
+
+ // Tentatively insert the instructions into the block so that they
+ // look "normal" to the transformation logic.
+ mbbi->insert(mi, NewMIs[0]);
+ mbbi->insert(mi, NewMIs[1]);
+
+ DEBUG(dbgs() << "2addr: NEW LOAD: " << *NewMIs[0]
+ << "2addr: NEW INST: " << *NewMIs[1]);
+
+ // Transform the instruction, now that it no longer has a load.
+ unsigned NewDstIdx = NewMIs[1]->findRegisterDefOperandIdx(regA);
+ unsigned NewSrcIdx = NewMIs[1]->findRegisterUseOperandIdx(regB);
+ MachineBasicBlock::iterator NewMI = NewMIs[1];
+ bool TransformSuccess =
+ TryInstructionTransform(NewMI, mi, mbbi,
+ NewSrcIdx, NewDstIdx, Dist);
+ if (TransformSuccess ||
+ NewMIs[1]->getOperand(NewSrcIdx).isKill()) {
+ // Success, or at least we made an improvement. Keep the unfolded
+ // instructions and discard the original.
+ if (LV) {
+ for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = mi->getOperand(i);
+ if (MO.isReg() && MO.getReg() != 0 &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ if (MO.isUse()) {
+ if (MO.isKill()) {
+ if (NewMIs[0]->killsRegister(MO.getReg()))
+ LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[0]);
+ else {
+ assert(NewMIs[1]->killsRegister(MO.getReg()) &&
+ "Kill missing after load unfold!");
+ LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[1]);
+ }
+ }
+ } else if (LV->removeVirtualRegisterDead(MO.getReg(), mi)) {
+ if (NewMIs[1]->registerDefIsDead(MO.getReg()))
+ LV->addVirtualRegisterDead(MO.getReg(), NewMIs[1]);
+ else {
+ assert(NewMIs[0]->registerDefIsDead(MO.getReg()) &&
+ "Dead flag missing after load unfold!");
+ LV->addVirtualRegisterDead(MO.getReg(), NewMIs[0]);
+ }
+ }
+ }
+ }
+ LV->addVirtualRegisterKilled(Reg, NewMIs[1]);
+ }
+ mi->eraseFromParent();
+ mi = NewMIs[1];
+ if (TransformSuccess)
+ return true;
+ } else {
+ // Transforming didn't eliminate the tie and didn't lead to an
+ // improvement. Clean up the unfolded instructions and keep the
+ // original.
+ DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
+ NewMIs[0]->eraseFromParent();
+ NewMIs[1]->eraseFromParent();
+ }
+ }
+ }
+ }
+
return false;
}
@@ -1047,14 +1150,12 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){
DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n");
unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg();
- TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, TRI);
+ TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, *TRI);
ReMatRegs.set(regB);
++NumReMats;
} else {
- bool Emitted = TII->copyRegToReg(*mbbi, mi, regA, regB, rc, rc,
- mi->getDebugLoc());
- (void)Emitted;
- assert(Emitted && "Unable to issue a copy instruction!\n");
+ BuildMI(*mbbi, mi, mi->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ regA).addReg(regB);
}
MachineBasicBlock::iterator prevMI = prior(mi);
@@ -1104,12 +1205,30 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
}
}
}
-
+
+ // Schedule the source copy / remat inserted to form two-address
+ // instruction. FIXME: Does it matter the distance map may not be
+ // accurate after it's scheduled?
+ TII->scheduleTwoAddrSource(prior(mi), mi, *TRI);
+
MadeChange = true;
DEBUG(dbgs() << "\t\trewrite to:\t" << *mi);
}
+ // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form.
+ if (mi->isInsertSubreg()) {
+ // From %reg = INSERT_SUBREG %reg, %subreg, subidx
+ // To %reg:subidx = COPY %subreg
+ unsigned SubIdx = mi->getOperand(3).getImm();
+ mi->RemoveOperand(3);
+ assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx");
+ mi->getOperand(0).setSubReg(SubIdx);
+ mi->RemoveOperand(1);
+ mi->setDesc(TII->get(TargetOpcode::COPY));
+ DEBUG(dbgs() << "\t\tconvert to:\t" << *mi);
+ }
+
// Clear TiedOperands here instead of at the top of the loop
// since most instructions do not have tied operands.
TiedOperands.clear();
@@ -1136,14 +1255,13 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
static void UpdateRegSequenceSrcs(unsigned SrcReg,
unsigned DstReg, unsigned SubIdx,
- MachineRegisterInfo *MRI) {
+ MachineRegisterInfo *MRI,
+ const TargetRegisterInfo &TRI) {
for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
RE = MRI->reg_end(); RI != RE; ) {
MachineOperand &MO = RI.getOperand();
++RI;
- MO.setReg(DstReg);
- assert(MO.getSubReg() == 0);
- MO.setSubReg(SubIdx);
+ MO.substVirtReg(DstReg, SubIdx, TRI);
}
}
@@ -1165,55 +1283,102 @@ TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs,
if (!Seen.insert(SrcReg))
continue;
- // If there are no other uses than extract_subreg which feed into
+ // Check that the instructions are all in the same basic block.
+ MachineInstr *SrcDefMI = MRI->getVRegDef(SrcReg);
+ MachineInstr *DstDefMI = MRI->getVRegDef(DstReg);
+ if (SrcDefMI->getParent() != DstDefMI->getParent())
+ continue;
+
+ // If there are no other uses than copies which feed into
// the reg_sequence, then we might be able to coalesce them.
bool CanCoalesce = true;
- SmallVector<unsigned, 4> SubIndices;
+ SmallVector<unsigned, 4> SrcSubIndices, DstSubIndices;
for (MachineRegisterInfo::use_nodbg_iterator
UI = MRI->use_nodbg_begin(SrcReg),
UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
MachineInstr *UseMI = &*UI;
- if (!UseMI->isExtractSubreg() ||
- UseMI->getOperand(0).getReg() != DstReg) {
+ if (!UseMI->isCopy() || UseMI->getOperand(0).getReg() != DstReg) {
CanCoalesce = false;
break;
}
- SubIndices.push_back(UseMI->getOperand(2).getImm());
+ SrcSubIndices.push_back(UseMI->getOperand(1).getSubReg());
+ DstSubIndices.push_back(UseMI->getOperand(0).getSubReg());
}
- if (!CanCoalesce || SubIndices.size() < 2)
+ if (!CanCoalesce || SrcSubIndices.size() < 2)
continue;
- std::sort(SubIndices.begin(), SubIndices.end());
- unsigned NewSubIdx = 0;
- if (TRI->canCombinedSubRegIndex(MRI->getRegClass(SrcReg), SubIndices,
- NewSubIdx)) {
- bool Proceed = true;
- if (NewSubIdx)
- for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
- RE = MRI->reg_end(); RI != RE; ) {
- MachineOperand &MO = RI.getOperand();
- ++RI;
- // FIXME: If the sub-registers do not combine to the whole
- // super-register, i.e. NewSubIdx != 0, and any of the use has a
- // sub-register index, then abort the coalescing attempt.
- if (MO.getSubReg()) {
- Proceed = false;
- break;
- }
- MO.setReg(DstReg);
- MO.setSubReg(NewSubIdx);
- }
- if (Proceed)
- for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
- RE = MRI->reg_end(); RI != RE; ) {
- MachineOperand &MO = RI.getOperand();
- ++RI;
- MO.setReg(DstReg);
- if (NewSubIdx)
- MO.setSubReg(NewSubIdx);
- }
+ // Check that the source subregisters can be combined.
+ std::sort(SrcSubIndices.begin(), SrcSubIndices.end());
+ unsigned NewSrcSubIdx = 0;
+ if (!TRI->canCombineSubRegIndices(MRI->getRegClass(SrcReg), SrcSubIndices,
+ NewSrcSubIdx))
+ continue;
+
+ // Check that the destination subregisters can also be combined.
+ std::sort(DstSubIndices.begin(), DstSubIndices.end());
+ unsigned NewDstSubIdx = 0;
+ if (!TRI->canCombineSubRegIndices(MRI->getRegClass(DstReg), DstSubIndices,
+ NewDstSubIdx))
+ continue;
+
+ // If neither source nor destination can be combined to the full register,
+ // just give up. This could be improved if it ever matters.
+ if (NewSrcSubIdx != 0 && NewDstSubIdx != 0)
+ continue;
+
+ // Now that we know that all the uses are extract_subregs and that those
+ // subregs can somehow be combined, scan all the extract_subregs again to
+ // make sure the subregs are in the right order and can be composed.
+ MachineInstr *SomeMI = 0;
+ CanCoalesce = true;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI->use_nodbg_begin(SrcReg),
+ UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
+ MachineInstr *UseMI = &*UI;
+ assert(UseMI->isCopy());
+ unsigned DstSubIdx = UseMI->getOperand(0).getSubReg();
+ unsigned SrcSubIdx = UseMI->getOperand(1).getSubReg();
+ assert(DstSubIdx != 0 && "missing subreg from RegSequence elimination");
+ if ((NewDstSubIdx == 0 &&
+ TRI->composeSubRegIndices(NewSrcSubIdx, DstSubIdx) != SrcSubIdx) ||
+ (NewSrcSubIdx == 0 &&
+ TRI->composeSubRegIndices(NewDstSubIdx, SrcSubIdx) != DstSubIdx)) {
+ CanCoalesce = false;
+ break;
+ }
+ // Keep track of one of the uses.
+ SomeMI = UseMI;
+ }
+ if (!CanCoalesce)
+ continue;
+
+ // Insert a copy to replace the original.
+ MachineBasicBlock::iterator InsertLoc = SomeMI;
+ MachineInstr *CopyMI = BuildMI(*SomeMI->getParent(), SomeMI,
+ SomeMI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY))
+ .addReg(DstReg, RegState::Define, NewDstSubIdx)
+ .addReg(SrcReg, 0, NewSrcSubIdx);
+
+ // Remove all the old extract instructions.
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI->use_nodbg_begin(SrcReg),
+ UE = MRI->use_nodbg_end(); UI != UE; ) {
+ MachineInstr *UseMI = &*UI;
+ ++UI;
+ if (UseMI == CopyMI)
+ continue;
+ assert(UseMI->isCopy());
+ // Move any kills to the new copy or extract instruction.
+ if (UseMI->getOperand(1).isKill()) {
+ CopyMI->getOperand(1).setIsKill();
+ if (LV)
+ // Update live variables
+ LV->replaceKillInstruction(SrcReg, UseMI, &*CopyMI);
}
+ UseMI->eraseFromParent();
+ }
}
}
@@ -1268,15 +1433,13 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
}
IsImpDef = false;
- // Remember EXTRACT_SUBREG sources. These might be candidate for
- // coalescing.
- if (DefMI->isExtractSubreg())
+ // Remember COPY sources. These might be candidate for coalescing.
+ if (DefMI->isCopy() && DefMI->getOperand(1).getSubReg())
RealSrcs.push_back(DefMI->getOperand(1).getReg());
- if (!Seen.insert(SrcReg) ||
- MI->getParent() != DefMI->getParent() ||
- !MI->getOperand(i).isKill() ||
- HasOtherRegSequenceUses(SrcReg, MI, MRI)) {
+ bool isKill = MI->getOperand(i).isKill();
+ if (!Seen.insert(SrcReg) || MI->getParent() != DefMI->getParent() ||
+ !isKill || HasOtherRegSequenceUses(SrcReg, MI, MRI)) {
// REG_SEQUENCE cannot have duplicated operands, add a copy.
// Also add an copy if the source is live-in the block. We don't want
// to end up with a partial-redef of a livein, e.g.
@@ -1292,30 +1455,23 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
// If the REG_SEQUENCE doesn't kill its source, keeping live variables
// correctly up to date becomes very difficult. Insert a copy.
//
- const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
- unsigned NewReg = MRI->createVirtualRegister(RC);
MachineBasicBlock::iterator InsertLoc = MI;
- bool Emitted =
- TII->copyRegToReg(*MI->getParent(), InsertLoc, NewReg, SrcReg, RC, RC,
- MI->getDebugLoc());
- (void)Emitted;
- assert(Emitted && "Unable to issue a copy instruction!\n");
- MI->getOperand(i).setReg(NewReg);
- if (MI->getOperand(i).isKill()) {
- MachineBasicBlock::iterator CopyMI = prior(InsertLoc);
- MachineOperand *KillMO = CopyMI->findRegisterUseOperand(SrcReg);
- KillMO->setIsKill();
- if (LV)
- // Update live variables
- LV->replaceKillInstruction(SrcReg, MI, &*CopyMI);
- }
+ MachineInstr *CopyMI = BuildMI(*MI->getParent(), InsertLoc,
+ MI->getDebugLoc(), TII->get(TargetOpcode::COPY))
+ .addReg(DstReg, RegState::Define, MI->getOperand(i+1).getImm())
+ .addReg(SrcReg, getKillRegState(isKill));
+ MI->getOperand(i).setReg(0);
+ if (LV && isKill)
+ LV->replaceKillInstruction(SrcReg, MI, CopyMI);
+ DEBUG(dbgs() << "Inserted: " << *CopyMI);
}
}
for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
unsigned SrcReg = MI->getOperand(i).getReg();
+ if (!SrcReg) continue;
unsigned SubIdx = MI->getOperand(i+1).getImm();
- UpdateRegSequenceSrcs(SrcReg, DstReg, SubIdx, MRI);
+ UpdateRegSequenceSrcs(SrcReg, DstReg, SubIdx, MRI, *TRI);
}
if (IsImpDef) {
@@ -1328,8 +1484,11 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
MI->eraseFromParent();
}
- // Try coalescing some EXTRACT_SUBREG instructions.
- CoalesceExtSubRegs(RealSrcs, DstReg);
+ // Try coalescing some EXTRACT_SUBREG instructions. This can create
+ // INSERT_SUBREG instructions that must have <undef> flags added by
+ // LiveIntervalAnalysis, so only run it when LiveVariables is available.
+ if (LV)
+ CoalesceExtSubRegs(RealSrcs, DstReg);
}
RegSequences.clear();
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index 871d83628ac1..57a1500e6e9d 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -667,8 +667,7 @@ static void ReMaterialize(MachineBasicBlock &MBB,
assert(TID.getNumDefs() == 1 &&
"Don't know how to remat instructions that define > 1 values!");
#endif
- TII->reMaterialize(MBB, MII, DestReg,
- ReMatDefMI->getOperand(0).getSubReg(), ReMatDefMI, TRI);
+ TII->reMaterialize(MBB, MII, DestReg, 0, ReMatDefMI, *TRI);
MachineInstr *NewMI = prior(MII);
for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = NewMI->getOperand(i);
@@ -769,7 +768,7 @@ void AvailableSpills::AddAvailableRegsToLiveIn(MachineBasicBlock &MBB,
I = PhysRegsAvailable.begin(), E = PhysRegsAvailable.end();
I != E; ++I) {
unsigned Reg = I->first;
- const TargetRegisterClass* RC = TRI->getPhysicalRegisterRegClass(Reg);
+ const TargetRegisterClass* RC = TRI->getMinimalPhysRegClass(Reg);
// FIXME: A temporary workaround. We can't reuse available value if it's
// not safe to move the def of the virtual register's class. e.g.
// X86::RFP* register classes. Do not add it as a live-in.
@@ -1022,7 +1021,7 @@ static unsigned FindFreeRegister(MachineBasicBlock::iterator MII,
for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
unsigned Kill = Kills[i];
if (!Defs[Kill] && !Uses[Kill] &&
- TRI->getPhysicalRegisterRegClass(Kill) == RC)
+ RC->contains(Kill))
return Kill;
}
for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) {
@@ -1410,25 +1409,25 @@ OptimizeByUnfold(MachineBasicBlock::iterator &MII,
if (TII->unfoldMemoryOperand(MF, &MI, UnfoldVR, false, false, NewMIs)) {
assert(NewMIs.size() == 1);
MachineInstr *NewMI = NewMIs.back();
+ MBB->insert(MII, NewMI);
NewMIs.clear();
int Idx = NewMI->findRegisterUseOperandIdx(VirtReg, false);
assert(Idx != -1);
SmallVector<unsigned, 1> Ops;
Ops.push_back(Idx);
- MachineInstr *FoldedMI = TII->foldMemoryOperand(MF, NewMI, Ops, SS);
+ MachineInstr *FoldedMI = TII->foldMemoryOperand(NewMI, Ops, SS);
+ NewMI->eraseFromParent();
if (FoldedMI) {
VRM->addSpillSlotUse(SS, FoldedMI);
if (!VRM->hasPhys(UnfoldVR))
VRM->assignVirt2Phys(UnfoldVR, UnfoldPR);
VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
- MII = MBB->insert(MII, FoldedMI);
+ MII = FoldedMI;
InvalidateKills(MI, TRI, RegKills, KillOps);
VRM->RemoveMachineInstrFromMaps(&MI);
MBB->erase(&MI);
- MF.DeleteMachineInstr(NewMI);
return true;
}
- MF.DeleteMachineInstr(NewMI);
}
}
@@ -1480,7 +1479,6 @@ CommuteToFoldReload(MachineBasicBlock::iterator &MII,
if (MII == MBB->begin() || !MII->killsRegister(SrcReg))
return false;
- MachineFunction &MF = *MBB->getParent();
MachineInstr &MI = *MII;
MachineBasicBlock::iterator DefMII = prior(MII);
MachineInstr *DefMI = DefMII;
@@ -1511,11 +1509,12 @@ CommuteToFoldReload(MachineBasicBlock::iterator &MII,
MachineInstr *CommutedMI = TII->commuteInstruction(DefMI, true);
if (!CommutedMI)
return false;
+ MBB->insert(MII, CommutedMI);
SmallVector<unsigned, 1> Ops;
Ops.push_back(NewDstIdx);
- MachineInstr *FoldedMI = TII->foldMemoryOperand(MF, CommutedMI, Ops, SS);
+ MachineInstr *FoldedMI = TII->foldMemoryOperand(CommutedMI, Ops, SS);
// Not needed since foldMemoryOperand returns new MI.
- MF.DeleteMachineInstr(CommutedMI);
+ CommutedMI->eraseFromParent();
if (!FoldedMI)
return false;
@@ -1528,7 +1527,7 @@ CommuteToFoldReload(MachineBasicBlock::iterator &MII,
MachineInstr *StoreMI = MII;
VRM->addSpillSlotUse(SS, StoreMI);
VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
- MII = MBB->insert(MII, FoldedMI); // Update MII to backtrack.
+ MII = FoldedMI; // Update MII to backtrack.
// Delete all 3 old instructions.
InvalidateKills(*ReloadMI, TRI, RegKills, KillOps);
@@ -1704,7 +1703,7 @@ bool LocalRewriter::InsertEmergencySpills(MachineInstr *MI) {
std::vector<unsigned> &EmSpills = VRM->getEmergencySpills(MI);
for (unsigned i = 0, e = EmSpills.size(); i != e; ++i) {
unsigned PhysReg = EmSpills[i];
- const TargetRegisterClass *RC = TRI->getPhysicalRegisterRegClass(PhysReg);
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysReg);
assert(RC && "Unable to determine register class!");
int SS = VRM->getEmergencySpillSlot(RC);
if (UsedSS.count(SS))
@@ -1759,7 +1758,6 @@ bool LocalRewriter::InsertRestores(MachineInstr *MI,
bool DoReMat = VRM->isReMaterialized(VirtReg);
int SSorRMId = DoReMat
? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg);
- const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
if (InReg == Phys) {
// If the value is already available in the expected register, save
@@ -1793,20 +1791,16 @@ bool LocalRewriter::InsertRestores(MachineInstr *MI,
MachineBasicBlock::iterator InsertLoc =
ComputeReloadLoc(MII, MBB->begin(), Phys, TRI, DoReMat, SSorRMId, TII,
*MBB->getParent());
-
- TII->copyRegToReg(*MBB, InsertLoc, Phys, InReg, RC, RC,
- MI->getDebugLoc());
+ MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), Phys)
+ .addReg(InReg, RegState::Kill);
// This invalidates Phys.
Spills.ClobberPhysReg(Phys);
// Remember it's available.
Spills.addAvailable(SSorRMId, Phys);
- // Mark is killed.
- MachineInstr *CopyMI = prior(InsertLoc);
CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
- MachineOperand *KillOpnd = CopyMI->findRegisterUseOperand(InReg);
- KillOpnd->setIsKill();
UpdateKills(*CopyMI, TRI, RegKills, KillOps);
DEBUG(dbgs() << '\t' << *CopyMI);
@@ -2013,7 +2007,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
// = EXTRACT_SUBREG fi#1
// fi#1 is available in EDI, but it cannot be reused because it's not in
// the right register file.
- if (PhysReg && !AvoidReload && (SubIdx || MI.isExtractSubreg())) {
+ if (PhysReg && !AvoidReload && SubIdx) {
const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
if (!RC->contains(PhysReg))
PhysReg = 0;
@@ -2034,6 +2028,18 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
CanReuse = !ReusedOperands.isClobbered(PhysReg) &&
Spills.canClobberPhysReg(PhysReg);
}
+ // If this is an asm, and PhysReg is used elsewhere as an earlyclobber
+ // operand, we can't also use it as an input. (Outputs always come
+ // before inputs, so we can stop looking at i.)
+ if (MI.isInlineAsm()) {
+ for (unsigned k=0; k<i; ++k) {
+ MachineOperand &MOk = MI.getOperand(k);
+ if (MOk.isReg() && MOk.getReg()==PhysReg && MOk.isEarlyClobber()) {
+ CanReuse = false;
+ break;
+ }
+ }
+ }
if (CanReuse) {
// If this stack slot value is already available, reuse it!
@@ -2104,6 +2110,8 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
// To avoid this problem, and to avoid doing a load right after a store,
// we emit a copy from PhysReg into the designated register for this
// operand.
+ //
+ // This case also applies to an earlyclobber'd PhysReg.
unsigned DesignatedReg = VRM->getPhys(VirtReg);
assert(DesignatedReg && "Must map virtreg to physreg!");
@@ -2136,7 +2144,6 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
continue;
}
- const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
MRI->setPhysRegUsed(DesignatedReg);
ReusedOperands.markClobbered(DesignatedReg);
@@ -2144,11 +2151,9 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
MachineBasicBlock::iterator InsertLoc =
ComputeReloadLoc(&MI, MBB->begin(), PhysReg, TRI, DoReMat,
SSorRMId, TII, MF);
-
- TII->copyRegToReg(*MBB, InsertLoc, DesignatedReg, PhysReg, RC, RC,
- MI.getDebugLoc());
-
- MachineInstr *CopyMI = prior(InsertLoc);
+ MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI.getDebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ DesignatedReg).addReg(PhysReg);
CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
UpdateKills(*CopyMI, TRI, RegKills, KillOps);
@@ -2269,27 +2274,16 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
if (unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SS)) {
DEBUG(dbgs() << "Promoted Load To Copy: " << MI);
if (DestReg != InReg) {
- const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
- TII->copyRegToReg(*MBB, &MI, DestReg, InReg, RC, RC,
- MI.getDebugLoc());
MachineOperand *DefMO = MI.findRegisterDefOperand(DestReg);
- unsigned SubIdx = DefMO->getSubReg();
+ MachineInstr *CopyMI = BuildMI(*MBB, &MI, MI.getDebugLoc(),
+ TII->get(TargetOpcode::COPY))
+ .addReg(DestReg, RegState::Define, DefMO->getSubReg())
+ .addReg(InReg, RegState::Kill);
// Revisit the copy so we make sure to notice the effects of the
// operation on the destreg (either needing to RA it if it's
// virtual or needing to clobber any values if it's physical).
- NextMII = &MI;
- --NextMII; // backtrack to the copy.
+ NextMII = CopyMI;
NextMII->setAsmPrinterFlag(MachineInstr::ReloadReuse);
- // Propagate the sub-register index over.
- if (SubIdx) {
- DefMO = NextMII->findRegisterDefOperand(DestReg);
- DefMO->setSubReg(SubIdx);
- }
-
- // Mark is killed.
- MachineOperand *KillOpnd = NextMII->findRegisterUseOperand(InReg);
- KillOpnd->setIsKill();
-
BackTracked = true;
} else {
DEBUG(dbgs() << "Removing now-noop copy: " << MI);
@@ -2430,6 +2424,24 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
// Also check if it's copying from an "undef", if so, we can't
// eliminate this or else the undef marker is lost and it will
// confuses the scavenger. This is extremely rare.
+ if (MI.isIdentityCopy() && !MI.getOperand(1).isUndef() &&
+ MI.getNumOperands() == 2) {
+ ++NumDCE;
+ DEBUG(dbgs() << "Removing now-noop copy: " << MI);
+ SmallVector<unsigned, 2> KillRegs;
+ InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs);
+ if (MO.isDead() && !KillRegs.empty()) {
+ // Source register or an implicit super/sub-register use is killed.
+ assert(TRI->regsOverlap(KillRegs[0], MI.getOperand(0).getReg()));
+ // Last def is now dead.
+ TransferDeadness(MI.getOperand(1).getReg(), RegKills, KillOps);
+ }
+ VRM->RemoveMachineInstrFromMaps(&MI);
+ MBB->erase(&MI);
+ Erased = true;
+ Spills.disallowClobberPhysReg(VirtReg);
+ goto ProcessNextInst;
+ }
unsigned Src, Dst, SrcSR, DstSR;
if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) &&
Src == Dst && SrcSR == DstSR &&
@@ -2519,6 +2531,16 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
// Check to see if this is a noop copy. If so, eliminate the
// instruction before considering the dest reg to be changed.
+ if (MI.isIdentityCopy()) {
+ ++NumDCE;
+ DEBUG(dbgs() << "Removing now-noop copy: " << MI);
+ InvalidateKills(MI, TRI, RegKills, KillOps);
+ VRM->RemoveMachineInstrFromMaps(&MI);
+ MBB->erase(&MI);
+ Erased = true;
+ UpdateKills(*LastStore, TRI, RegKills, KillOps);
+ goto ProcessNextInst;
+ }
{
unsigned Src, Dst, SrcSR, DstSR;
if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) &&