159 files changed, 15273 insertions, 17695 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 25842a7876a2..822a564441ac 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -148,7 +148,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
   assert(State == NULL);
   State = new AggressiveAntiDepState(TRI->getNumRegs(), BB);
 
-  bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn());
+  bool IsReturnBlock = (!BB->empty() && BB->back().isReturn());
   std::vector<unsigned> &KillIndices = State->GetKillIndices();
   std::vector<unsigned> &DefIndices = State->GetDefIndices();
 
@@ -157,7 +157,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
     // In a return block, examine the function live-out regs.
     for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
          E = MRI.liveout_end(); I != E; ++I) {
-      for (const unsigned *Alias = TRI->getOverlaps(*I);
+      for (const uint16_t *Alias = TRI->getOverlaps(*I);
            unsigned Reg = *Alias; ++Alias) {
         State->UnionGroups(Reg, 0);
         KillIndices[Reg] = BB->size();
@@ -173,7 +173,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
          SE = BB->succ_end(); SI != SE; ++SI)
     for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
            E = (*SI)->livein_end(); I != E; ++I) {
-      for (const unsigned *Alias = TRI->getOverlaps(*I);
+      for (const uint16_t *Alias = TRI->getOverlaps(*I);
            unsigned Reg = *Alias; ++Alias) {
         State->UnionGroups(Reg, 0);
         KillIndices[Reg] = BB->size();
@@ -186,10 +186,10 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
   // callee-saved register that is not saved in the prolog.
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   BitVector Pristine = MFI->getPristineRegs(BB);
-  for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
+  for (const uint16_t *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
     unsigned Reg = *I;
     if (!IsReturnBlock && !Pristine.test(Reg)) continue;
-    for (const unsigned *Alias = TRI->getOverlaps(Reg);
+    for (const uint16_t *Alias = TRI->getOverlaps(Reg);
          unsigned AliasReg = *Alias; ++Alias) {
       State->UnionGroups(AliasReg, 0);
       KillIndices[AliasReg] = BB->size();
@@ -265,7 +265,7 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI,
         IsImplicitDefUse(MI, MO)) {
       const unsigned Reg = MO.getReg();
       PassthruRegs.insert(Reg);
-      for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+      for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
            *Subreg; ++Subreg) {
         PassthruRegs.insert(*Subreg);
       }
@@ -333,7 +333,7 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
     DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag);
   }
   // Repeat for subregisters.
-  for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+  for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
        *Subreg; ++Subreg) {
     unsigned SubregReg = *Subreg;
     if (!State->IsLive(SubregReg)) {
@@ -384,7 +384,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
     // If MI's defs have a special allocation requirement, don't allow
     // any def registers to be changed. Also assume all registers
     // defined in a call must not be changed (ABI).
-    if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() ||
+    if (MI->isCall() || MI->hasExtraDefRegAllocReq() ||
         TII->isPredicated(MI)) {
       DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)");
       State->UnionGroups(Reg, 0);
@@ -392,7 +392,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
 
     // Any aliased that are live at this point are completely or
     // partially defined here, so group those aliases with Reg.
-    for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+    for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
       unsigned AliasReg = *Alias;
       if (State->IsLive(AliasReg)) {
         State->UnionGroups(Reg, AliasReg);
@@ -423,7 +423,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
       continue;
 
     // Update def for Reg and aliases.
-    for (const unsigned *Alias = TRI->getOverlaps(Reg);
+    for (const uint16_t *Alias = TRI->getOverlaps(Reg);
          unsigned AliasReg = *Alias; ++Alias)
       DefIndices[AliasReg] = Count;
   }
@@ -451,8 +451,8 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
   // instruction which may not be executed. The second R6 def may or may not
   // re-define R6 so it's not safe to change it since the last R6 use cannot be
   // changed.
-  bool Special = MI->getDesc().isCall() ||
-    MI->getDesc().hasExtraSrcRegAllocReq() ||
+  bool Special = MI->isCall() ||
+    MI->hasExtraSrcRegAllocReq() ||
     TII->isPredicated(MI);
 
   // Scan the register uses for this instruction and update
@@ -678,7 +678,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
         goto next_super_reg;
       } else {
         bool found = false;
-        for (const unsigned *Alias = TRI->getAliasSet(NewReg);
+        for (const uint16_t *Alias = TRI->getAliasSet(NewReg);
              *Alias; ++Alias) {
           unsigned AliasReg = *Alias;
           if (State->IsLive(AliasReg) ||
@@ -780,6 +780,9 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
        I != E; --Count) {
     MachineInstr *MI = --I;
 
+    if (MI->isDebugValue())
+      continue;
+
     DEBUG(dbgs() << "Anti: ");
     DEBUG(MI->dump());
 
diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp
index 1005f102bea6..87f64311a655 100644
--- a/lib/CodeGen/AllocationOrder.cpp
+++ b/lib/CodeGen/AllocationOrder.cpp
@@ -41,7 +41,7 @@ AllocationOrder::AllocationOrder(unsigned VirtReg,
   if (HintPair.first) {
     const TargetRegisterInfo &TRI = VRM.getTargetRegInfo();
     // The remaining allocation order may depend on the hint.
-    ArrayRef<unsigned> Order =
+    ArrayRef<uint16_t> Order =
       TRI.getRawAllocationOrder(RC, HintPair.first, Hint,
                                 VRM.getMachineFunction());
     if (Order.empty())
diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h
index d1e48a1f2e96..0ce7e0c3b5f6 100644
--- a/lib/CodeGen/AllocationOrder.h
+++ b/lib/CodeGen/AllocationOrder.h
@@ -34,8 +34,7 @@ public:
   /// AllocationOrder - Create a new AllocationOrder for VirtReg.
   /// @param VirtReg      Virtual register to allocate for.
   /// @param VRM          Virtual register map for function.
-  /// @param ReservedRegs Set of reserved registers as returned by
-  ///        TargetRegisterInfo::getReservedRegs().
+  /// @param RegClassInfo Information about reserved and allocatable registers.
   AllocationOrder(unsigned VirtReg,
                   const VirtRegMap &VRM,
                   const RegisterClassInfo &RegClassInfo);
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index fafc01044d4f..00874d411378 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -1,4 +1,4 @@
-//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities --*- C++ ------*-===//
+//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities -----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/Analysis.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
@@ -149,33 +150,37 @@ llvm::hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos,
 /// consideration of global floating-point math flags.
 ///
 ISD::CondCode llvm::getFCmpCondCode(FCmpInst::Predicate Pred) {
-  ISD::CondCode FPC, FOC;
   switch (Pred) {
-  case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break;
-  case FCmpInst::FCMP_OEQ:   FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break;
-  case FCmpInst::FCMP_OGT:   FOC = ISD::SETGT; FPC = ISD::SETOGT; break;
-  case FCmpInst::FCMP_OGE:   FOC = ISD::SETGE; FPC = ISD::SETOGE; break;
-  case FCmpInst::FCMP_OLT:   FOC = ISD::SETLT; FPC = ISD::SETOLT; break;
-  case FCmpInst::FCMP_OLE:   FOC = ISD::SETLE; FPC = ISD::SETOLE; break;
-  case FCmpInst::FCMP_ONE:   FOC = ISD::SETNE; FPC = ISD::SETONE; break;
-  case FCmpInst::FCMP_ORD:   FOC = FPC = ISD::SETO;   break;
-  case FCmpInst::FCMP_UNO:   FOC = FPC = ISD::SETUO;  break;
-  case FCmpInst::FCMP_UEQ:   FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break;
-  case FCmpInst::FCMP_UGT:   FOC = ISD::SETGT; FPC = ISD::SETUGT; break;
-  case FCmpInst::FCMP_UGE:   FOC = ISD::SETGE; FPC = ISD::SETUGE; break;
-  case FCmpInst::FCMP_ULT:   FOC = ISD::SETLT; FPC = ISD::SETULT; break;
-  case FCmpInst::FCMP_ULE:   FOC = ISD::SETLE; FPC = ISD::SETULE; break;
-  case FCmpInst::FCMP_UNE:   FOC = ISD::SETNE; FPC = ISD::SETUNE; break;
-  case FCmpInst::FCMP_TRUE:  FOC = FPC = ISD::SETTRUE; break;
-  default:
-    llvm_unreachable("Invalid FCmp predicate opcode!");
-    FOC = FPC = ISD::SETFALSE;
-    break;
+  case FCmpInst::FCMP_FALSE: return ISD::SETFALSE;
+  case FCmpInst::FCMP_OEQ:   return ISD::SETOEQ;
+  case FCmpInst::FCMP_OGT:   return ISD::SETOGT;
+  case FCmpInst::FCMP_OGE:   return ISD::SETOGE;
+  case FCmpInst::FCMP_OLT:   return ISD::SETOLT;
+  case FCmpInst::FCMP_OLE:   return ISD::SETOLE;
+  case FCmpInst::FCMP_ONE:   return ISD::SETONE;
+  case FCmpInst::FCMP_ORD:   return ISD::SETO;
+  case FCmpInst::FCMP_UNO:   return ISD::SETUO;
+  case FCmpInst::FCMP_UEQ:   return ISD::SETUEQ;
+  case FCmpInst::FCMP_UGT:   return ISD::SETUGT;
+  case FCmpInst::FCMP_UGE:   return ISD::SETUGE;
+  case FCmpInst::FCMP_ULT:   return ISD::SETULT;
+  case FCmpInst::FCMP_ULE:   return ISD::SETULE;
+  case FCmpInst::FCMP_UNE:   return ISD::SETUNE;
+  case FCmpInst::FCMP_TRUE:  return ISD::SETTRUE;
+  default: llvm_unreachable("Invalid FCmp predicate opcode!");
+  }
+}
+
+ISD::CondCode llvm::getFCmpCodeWithoutNaN(ISD::CondCode CC) {
+  switch (CC) {
+    case ISD::SETOEQ: case ISD::SETUEQ: return ISD::SETEQ;
+    case ISD::SETONE: case ISD::SETUNE: return ISD::SETNE;
+    case ISD::SETOLT: case ISD::SETULT: return ISD::SETLT;
+    case ISD::SETOLE: case ISD::SETULE: return ISD::SETLE;
+    case ISD::SETOGT: case ISD::SETUGT: return ISD::SETGT;
+    case ISD::SETOGE: case ISD::SETUGE: return ISD::SETGE;
+    default: return CC;
   }
-  if (NoNaNsFPMath)
-    return FOC;
-  else
-    return FPC;
 }
 
 /// getICmpCondCode - Return the ISD condition code corresponding to
@@ -195,7 +200,6 @@ ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) {
   case ICmpInst::ICMP_UGT: return ISD::SETUGT;
   default:
     llvm_unreachable("Invalid ICmp predicate opcode!");
-    return ISD::SETNE;
   }
 }
 
@@ -221,12 +225,13 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
   // longjmp on x86), it can end up causing miscompilation that has not
   // been fully understood.
   if (!Ret &&
-      (!GuaranteedTailCallOpt || !isa<UnreachableInst>(Term))) return false;
+      (!TLI.getTargetMachine().Options.GuaranteedTailCallOpt ||
+       !isa<UnreachableInst>(Term))) return false;
 
   // If I will have a chain, make sure no other instruction that will have a
   // chain interposes between I and the return.
   if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
-      !I->isSafeToSpeculativelyExecute())
+      !isSafeToSpeculativelyExecute(I))
     for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ;
          --BBI) {
       if (&*BBI == I)
@@ -235,7 +240,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
       if (isa<DbgInfoIntrinsic>(BBI))
         continue;
       if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
-          !BBI->isSafeToSpeculativelyExecute())
+          !isSafeToSpeculativelyExecute(BBI))
         return false;
     }
 
@@ -250,7 +255,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
   // Conservatively require the attributes of the call to match those of
   // the return. Ignore noalias because it doesn't affect the call sequence.
   const Function *F = ExitBB->getParent();
-  unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+  Attributes CallerRetAttr = F->getAttributes().getRetAttributes();
   if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
     return false;
 
@@ -285,12 +290,12 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
 }
 
 bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
-                                const TargetLowering &TLI) {
+                                SDValue &Chain, const TargetLowering &TLI) {
   const Function *F = DAG.getMachineFunction().getFunction();
 
   // Conservatively require the attributes of the call to match those of
   // the return. Ignore noalias because it doesn't affect the call sequence.
-  unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+  Attributes CallerRetAttr = F->getAttributes().getRetAttributes();
   if (CallerRetAttr & ~Attribute::NoAlias)
     return false;
 
@@ -299,5 +304,5 @@ bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
     return false;
 
   // Check if the only use is a function return node.
-  return TLI.isUsedByReturnOnly(Node);
+  return TLI.isUsedByReturnOnly(Node, Chain);
 }
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
index 3f2387325360..b60fda86a6ba 100644
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -29,6 +29,7 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/ADT/SmallString.h"
@@ -36,6 +37,12 @@
 #include "llvm/ADT/Twine.h"
 using namespace llvm;
 
+cl::opt<bool>
+EnableARMEHABIDescriptors("arm-enable-ehabi-descriptors", cl::Hidden,
+  cl::desc("Generate ARM EHABI tables with unwinding descriptors"),
+  cl::init(false));
+
+
 ARMException::ARMException(AsmPrinter *A)
   : DwarfException(A),
     shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false)
@@ -72,13 +79,15 @@ void ARMException::EndFunction() {
       Asm->OutStreamer.EmitPersonality(PerSym);
     }
 
-    // Map all labels and get rid of any dead landing pads.
-    MMI->TidyLandingPads();
+    if (EnableARMEHABIDescriptors) {
+      // Map all labels and get rid of any dead landing pads.
+      MMI->TidyLandingPads();
 
-    Asm->OutStreamer.EmitHandlerData();
+      Asm->OutStreamer.EmitHandlerData();
 
-    // Emit actual exception table
-    EmitExceptionTable();
+      // Emit actual exception table
+      EmitExceptionTable();
+    }
   }
 
   Asm->OutStreamer.EmitFnEnd();
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 1999f3608788..b0b2ff4882af 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -100,6 +100,7 @@ AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer)
     OutStreamer(Streamer),
     LastMI(0), LastFn(0), Counter(~0U), SetCounter(0) {
   DD = 0; DE = 0; MMI = 0; LI = 0;
+  CurrentFnSym = CurrentFnSymForSize = 0;
   GCMetadataPrinters = 0;
   VerboseAsm = Streamer.isVerboseAsm();
 }
@@ -613,6 +614,10 @@ bool AsmPrinter::needsSEHMoves() {
     MF->getFunction()->needsUnwindTableEntry();
 }
 
+bool AsmPrinter::needsRelocationsForDwarfStringPool() const {
+  return MAI->doesDwarfUseRelocationsForStringPool();
+}
+
 void AsmPrinter::emitPrologLabel(const MachineInstr &MI) {
   MCSymbol *Label = MI.getOperand(0).getMCSymbol();
 
@@ -732,6 +737,18 @@ void AsmPrinter::EmitFunctionBody() {
       OutStreamer.EmitRawText(StringRef("\tnop\n"));
   }
 
+  const Function *F = MF->getFunction();
+  for (Function::const_iterator i = F->begin(), e = F->end(); i != e; ++i) {
+    const BasicBlock *BB = i;
+    if (!BB->hasAddressTaken())
+      continue;
+    MCSymbol *Sym = GetBlockAddressSymbol(BB);
+    if (Sym->isDefined())
+      continue;
+    OutStreamer.AddComment("Address of block that was removed by CodeGen");
+    OutStreamer.EmitLabel(Sym);
+  }
+
   // Emit target-specific gunk after the function body.
   EmitFunctionBodyEnd();
 
@@ -745,7 +762,8 @@ void AsmPrinter::EmitFunctionBody() {
 
     const MCExpr *SizeExp =
       MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(FnEndLabel, OutContext),
-                              MCSymbolRefExpr::Create(CurrentFnSym, OutContext),
+                              MCSymbolRefExpr::Create(CurrentFnSymForSize,
+                                                      OutContext),
                               OutContext);
     OutStreamer.EmitELFSize(CurrentFnSym, SizeExp);
   }
@@ -780,7 +798,7 @@ void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
   const TargetRegisterInfo *TRI = TM.getRegisterInfo();
   int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false);
 
-  for (const unsigned *SR = TRI->getSuperRegisters(MLoc.getReg());
+  for (const uint16_t *SR = TRI->getSuperRegisters(MLoc.getReg());
        *SR && Reg < 0; ++SR) {
     Reg = TRI->getDwarfRegNum(*SR, false);
     // FIXME: Get the bit range this register uses of the superregister
@@ -841,6 +859,12 @@ bool AsmPrinter::doFinalization(Module &M) {
     EmitVisibility(Name, V, false);
   }
 
+  // Emit module flags.
+  SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
+  M.getModuleFlagsMetadata(ModuleFlags);
+  if (!ModuleFlags.empty())
+    getObjFileLowering().emitModuleFlags(OutStreamer, ModuleFlags, Mang, TM);
+
   // Finalize debug and EH information.
   if (DE) {
     {
@@ -929,6 +953,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
   this->MF = &MF;
   // Get the function symbol.
   CurrentFnSym = Mang->getSymbol(MF.getFunction());
+  CurrentFnSymForSize = CurrentFnSym;
 
   if (isVerbose())
     LI = &getAnalysis<MachineLoopInfo>();
@@ -1120,7 +1145,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
   const MCExpr *Value = 0;
   switch (MJTI->getEntryKind()) {
   case MachineJumpTableInfo::EK_Inline:
-    llvm_unreachable("Cannot emit EK_Inline jump table entry"); break;
+    llvm_unreachable("Cannot emit EK_Inline jump table entry");
   case MachineJumpTableInfo::EK_Custom32:
     Value = TM.getTargetLowering()->LowerCustomJumpTableEntry(MJTI, MBB, UID,
                                                               OutContext);
@@ -1139,6 +1164,15 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
     return;
   }
 
+  case MachineJumpTableInfo::EK_GPRel64BlockAddress: {
+    // EK_GPRel64BlockAddress - Each entry is an address of block, encoded
+    // with a relocation as gp-relative, e.g.:
+    //     .gpdword LBB123
+    MCSymbol *MBBSym = MBB->getSymbol();
+    OutStreamer.EmitGPRel64Value(MCSymbolRefExpr::Create(MBBSym, OutContext));
+    return;
+  }
+
   case MachineJumpTableInfo::EK_LabelDifference32: {
     // EK_LabelDifference32 - Each entry is the address of the block minus
     // the address of the jump table.  This is used for PIC jump tables where
@@ -1191,12 +1225,8 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
 
   assert(GV->hasInitializer() && "Not a special LLVM global!");
 
-  const TargetData *TD = TM.getTargetData();
-  unsigned Align = Log2_32(TD->getPointerPrefAlignment());
   if (GV->getName() == "llvm.global_ctors") {
-    OutStreamer.SwitchSection(getObjFileLowering().getStaticCtorSection());
-    EmitAlignment(Align);
-    EmitXXStructorList(GV->getInitializer());
+    EmitXXStructorList(GV->getInitializer(), /* isCtor */ true);
 
     if (TM.getRelocationModel() == Reloc::Static &&
         MAI->hasStaticCtorDtorReferenceInStaticMode()) {
@@ -1208,9 +1238,7 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
   }
 
   if (GV->getName() == "llvm.global_dtors") {
-    OutStreamer.SwitchSection(getObjFileLowering().getStaticDtorSection());
-    EmitAlignment(Align);
-    EmitXXStructorList(GV->getInitializer());
+    EmitXXStructorList(GV->getInitializer(), /* isCtor */ false);
 
     if (TM.getRelocationModel() == Reloc::Static &&
         MAI->hasStaticCtorDtorReferenceInStaticMode()) {
@@ -1240,7 +1268,7 @@ void AsmPrinter::EmitLLVMUsedList(const Constant *List) {
   }
 }
 
-typedef std::pair<int, Constant*> Structor;
+typedef std::pair<unsigned, Constant*> Structor;
 
 static bool priority_order(const Structor& lhs, const Structor& rhs) {
   return lhs.first < rhs.first;
@@ -1248,7 +1276,7 @@ static bool priority_order(const Structor& lhs, const Structor& rhs) {
 
 /// EmitXXStructorList - Emit the ctor or dtor list taking into account the init
 /// priority.
-void AsmPrinter::EmitXXStructorList(const Constant *List) {
+void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
   // Should be an array of '{ int, void ()* }' structs.  The first value is the
   // init priority.
   if (!isa<ConstantArray>(List)) return;
@@ -1274,19 +1302,20 @@ void AsmPrinter::EmitXXStructorList(const Constant *List) {
                                        CS->getOperand(1)));
   }
 
-  // Emit the function pointers in reverse priority order.
-  switch (MAI->getStructorOutputOrder()) {
-  case Structors::None:
-    break;
-  case Structors::PriorityOrder:
-    std::sort(Structors.begin(), Structors.end(), priority_order);
-    break;
-  case Structors::ReversePriorityOrder:
-    std::sort(Structors.rbegin(), Structors.rend(), priority_order);
-    break;
+  // Emit the function pointers in the target-specific order
+  const TargetData *TD = TM.getTargetData();
+  unsigned Align = Log2_32(TD->getPointerPrefAlignment());
+  std::stable_sort(Structors.begin(), Structors.end(), priority_order);
+  for (unsigned i = 0, e = Structors.size(); i != e; ++i) {
+    const MCSection *OutputSection =
+      (isCtor ?
+       getObjFileLowering().getStaticCtorSection(Structors[i].first) :
+       getObjFileLowering().getStaticDtorSection(Structors[i].first));
+    OutStreamer.SwitchSection(OutputSection);
+    if (OutStreamer.getCurrentSection() != OutStreamer.getPreviousSection())
+      EmitAlignment(Align);
+    EmitXXStructor(Structors[i].second);
   }
-  for (unsigned i = 0, e = Structors.size(); i != e; ++i)
-    EmitGlobalConstant(Structors[i].second);
 }
 
 //===--------------------------------------------------------------------===//
@@ -1423,7 +1452,6 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
   const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
   if (CE == 0) {
     llvm_unreachable("Unknown constant value to lower!");
-    return MCConstantExpr::Create(0, Ctx);
   }
 
   switch (CE->getOpcode()) {
@@ -1445,7 +1473,6 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
                      !AP.MF ? 0 : AP.MF->getFunction()->getParent());
       report_fatal_error(OS.str());
     }
-    return MCConstantExpr::Create(0, Ctx);
   case Instruction::GetElementPtr: {
     const TargetData &TD = *AP.TM.getTargetData();
     // Generate a symbolic expression for the byte address
@@ -1543,6 +1570,19 @@ static void EmitGlobalConstantImpl(const Constant *C, unsigned AddrSpace,
 /// isRepeatedByteSequence - Determine whether the given value is
 /// composed of a repeated sequence of identical bytes and return the
 /// byte value.  If it is not a repeated sequence, return -1.
+static int isRepeatedByteSequence(const ConstantDataSequential *V) {
+  StringRef Data = V->getRawDataValues();
+  assert(!Data.empty() && "Empty aggregates should be CAZ node");
+  char C = Data[0];
+  for (unsigned i = 1, e = Data.size(); i != e; ++i)
+    if (Data[i] != C) return -1;
+  return static_cast<uint8_t>(C); // Ensure 255 is not returned as -1.
+}
+
+
+/// isRepeatedByteSequence - Determine whether the given value is
+/// composed of a repeated sequence of identical bytes and return the
+/// byte value.  If it is not a repeated sequence, return -1.
 static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
 
   if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
@@ -1568,8 +1608,7 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
   if (const ConstantArray *CA = dyn_cast<ConstantArray>(V)) {
     // Make sure all array elements are sequences of the same repeated
     // byte.
-    if (CA->getNumOperands() == 0) return -1;
-
+    assert(CA->getNumOperands() != 0 && "Should be a CAZ");
     int Byte = isRepeatedByteSequence(CA->getOperand(0), TM);
     if (Byte == -1) return -1;
 
@@ -1580,37 +1619,92 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
     }
     return Byte;
   }
+  
+  if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V))
+    return isRepeatedByteSequence(CDS);
 
   return -1;
 }
 
-static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace,
-                                    AsmPrinter &AP) {
-  if (AddrSpace != 0 || !CA->isString()) {
-    // Not a string.  Print the values in successive locations.
-
-    // See if we can aggregate some values.  Make sure it can be
-    // represented as a series of bytes of the constant value.
-    int Value = isRepeatedByteSequence(CA, AP.TM);
-
-    if (Value != -1) {
-      uint64_t Bytes = AP.TM.getTargetData()->getTypeAllocSize(CA->getType());
-      AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace);
+static void EmitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
+                                             unsigned AddrSpace,AsmPrinter &AP){
+  
+  // See if we can aggregate this into a .fill, if so, emit it as such.
+  int Value = isRepeatedByteSequence(CDS, AP.TM);
+  if (Value != -1) {
+    uint64_t Bytes = AP.TM.getTargetData()->getTypeAllocSize(CDS->getType());
+    // Don't emit a 1-byte object as a .fill.
+    if (Bytes > 1)
+      return AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace);
+  }
+  
+  // If this can be emitted with .ascii/.asciz, emit it as such.
+  if (CDS->isString())
+    return AP.OutStreamer.EmitBytes(CDS->getAsString(), AddrSpace);
+
+  // Otherwise, emit the values in successive locations.
+  unsigned ElementByteSize = CDS->getElementByteSize();
+  if (isa<IntegerType>(CDS->getElementType())) {
+    for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+      if (AP.isVerbose())
+        AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n",
+                                                CDS->getElementAsInteger(i));
+      AP.OutStreamer.EmitIntValue(CDS->getElementAsInteger(i),
+                                  ElementByteSize, AddrSpace);
+    }
+  } else if (ElementByteSize == 4) {
+    // FP Constants are printed as integer constants to avoid losing
+    // precision.
+    assert(CDS->getElementType()->isFloatTy());
+    for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+      union {
+        float F;
+        uint32_t I;
+      };
+      
+      F = CDS->getElementAsFloat(i);
+      if (AP.isVerbose())
+        AP.OutStreamer.GetCommentOS() << "float " << F << '\n';
+      AP.OutStreamer.EmitIntValue(I, 4, AddrSpace);
     }
-    else {
-      for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
-        EmitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP);
+  } else {
+    assert(CDS->getElementType()->isDoubleTy());
+    for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+      union {
+        double F;
+        uint64_t I;
+      };
+      
+      F = CDS->getElementAsDouble(i);
+      if (AP.isVerbose())
+        AP.OutStreamer.GetCommentOS() << "double " << F << '\n';
+      AP.OutStreamer.EmitIntValue(I, 8, AddrSpace);
     }
-    return;
   }
 
-  // Otherwise, it can be emitted as .ascii.
-  SmallVector<char, 128> TmpVec;
-  TmpVec.reserve(CA->getNumOperands());
-  for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
-    TmpVec.push_back(cast<ConstantInt>(CA->getOperand(i))->getZExtValue());
+  const TargetData &TD = *AP.TM.getTargetData();
+  unsigned Size = TD.getTypeAllocSize(CDS->getType());
+  unsigned EmittedSize = TD.getTypeAllocSize(CDS->getType()->getElementType()) *
+                        CDS->getNumElements();
+  if (unsigned Padding = Size - EmittedSize)
+    AP.OutStreamer.EmitZeros(Padding, AddrSpace);
 
-  AP.OutStreamer.EmitBytes(StringRef(TmpVec.data(), TmpVec.size()), AddrSpace);
+}
+
+static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace,
+                                    AsmPrinter &AP) {
+  // See if we can aggregate some values.  Make sure it can be
+  // represented as a series of bytes of the constant value.
+  int Value = isRepeatedByteSequence(CA, AP.TM);
+
+  if (Value != -1) {
+    uint64_t Bytes = AP.TM.getTargetData()->getTypeAllocSize(CA->getType());
+    AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace);
+  }
+  else {
+    for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
+      EmitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP);
+  }
 }
 
 static void EmitGlobalConstantVector(const ConstantVector *CV,
@@ -1656,29 +1750,44 @@ static void EmitGlobalConstantStruct(const ConstantStruct *CS,
 
 static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
                                  AsmPrinter &AP) {
-  // FP Constants are printed as integer constants to avoid losing
-  // precision.
-  if (CFP->getType()->isDoubleTy()) {
+  if (CFP->getType()->isHalfTy()) {
     if (AP.isVerbose()) {
-      double Val = CFP->getValueAPF().convertToDouble();
-      AP.OutStreamer.GetCommentOS() << "double " << Val << '\n';
+      SmallString<10> Str;
+      CFP->getValueAPF().toString(Str);
+      AP.OutStreamer.GetCommentOS() << "half " << Str << '\n';
     }
-
     uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
-    AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace);
+    AP.OutStreamer.EmitIntValue(Val, 2, AddrSpace);
     return;
   }
 
   if (CFP->getType()->isFloatTy()) {
     if (AP.isVerbose()) {
       float Val = CFP->getValueAPF().convertToFloat();
-      AP.OutStreamer.GetCommentOS() << "float " << Val << '\n';
+      uint64_t IntVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+      AP.OutStreamer.GetCommentOS() << "float " << Val << '\n'
+                                    << " (" << format("0x%x", IntVal) << ")\n";
     }
     uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
     AP.OutStreamer.EmitIntValue(Val, 4, AddrSpace);
     return;
   }
 
+  // FP Constants are printed as integer constants to avoid losing
+  // precision.
+  if (CFP->getType()->isDoubleTy()) {
+    if (AP.isVerbose()) {
+      double Val = CFP->getValueAPF().convertToDouble();
+      uint64_t IntVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+      AP.OutStreamer.GetCommentOS() << "double " << Val << '\n'
+                                    << " (" << format("0x%lx", IntVal) << ")\n";
+    }
+
+    uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+    AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace);
+    return;
+  }
+
   if (CFP->getType()->isX86_FP80Ty()) {
     // all long double variants are printed as hex
     // API needed to prevent premature destruction
@@ -1742,20 +1851,20 @@ static void EmitGlobalConstantLargeInt(const ConstantInt *CI,
 
 static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
                                    AsmPrinter &AP) {
-  if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) {
-    uint64_t Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
+  const TargetData *TD = AP.TM.getTargetData();
+  uint64_t Size = TD->getTypeAllocSize(CV->getType());
+  if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV))
     return AP.OutStreamer.EmitZeros(Size, AddrSpace);
-  }
 
   if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
-    unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
     switch (Size) {
     case 1:
     case 2:
     case 4:
     case 8:
       if (AP.isVerbose())
-        AP.OutStreamer.GetCommentOS() << format("0x%llx\n", CI->getZExtValue());
+        AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n",
+                                                CI->getZExtValue());
       AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace);
       return;
     default:
@@ -1764,29 +1873,45 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
     }
   }
 
-  if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
-    return EmitGlobalConstantArray(CVA, AddrSpace, AP);
-
-  if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
-    return EmitGlobalConstantStruct(CVS, AddrSpace, AP);
-
   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV))
     return EmitGlobalConstantFP(CFP, AddrSpace, AP);
 
   if (isa<ConstantPointerNull>(CV)) {
-    unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
     AP.OutStreamer.EmitIntValue(0, Size, AddrSpace);
     return;
   }
 
+  if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(CV))
+    return EmitGlobalConstantDataSequential(CDS, AddrSpace, AP);
+  
+  if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
+    return EmitGlobalConstantArray(CVA, AddrSpace, AP);
+
+  if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
+    return EmitGlobalConstantStruct(CVS, AddrSpace, AP);
+
+  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+    // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of
+    // vectors).
+    if (CE->getOpcode() == Instruction::BitCast)
+      return EmitGlobalConstantImpl(CE->getOperand(0), AddrSpace, AP);
+
+    if (Size > 8) {
+      // If the constant expression's size is greater than 64-bits, then we have
+      // to emit the value in chunks. Try to constant fold the value and emit it
+      // that way.
+      Constant *New = ConstantFoldConstantExpression(CE, TD);
+      if (New && New != CE)
+        return EmitGlobalConstantImpl(New, AddrSpace, AP);
+    }
+  }
+  
   if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
     return EmitGlobalConstantVector(V, AddrSpace, AP);
-
+    
   // Otherwise, it must be a ConstantExpr.  Lower it to an MCExpr, then emit it
   // thread the streamer with EmitValue.
-  AP.OutStreamer.EmitValue(LowerConstant(CV, AP),
-                         AP.TM.getTargetData()->getTypeAllocSize(CV->getType()),
-                           AddrSpace);
+  AP.OutStreamer.EmitValue(LowerConstant(CV, AP), Size, AddrSpace);
 }
 
 /// EmitGlobalConstant - Print a general LLVM constant to the .s file.
@@ -1953,7 +2078,7 @@ static void EmitBasicBlockLoopComments(const MachineBasicBlock &MBB,
 void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
   // Emit an alignment directive for this block, if needed.
   if (unsigned Align = MBB->getAlignment())
-    EmitAlignment(Log2_32(Align));
+    EmitAlignment(Align);
 
   // If the block has its address taken, emit any labels that were used to
   // reference the block.  It is possible that there is more than one label
@@ -1970,27 +2095,22 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
       OutStreamer.EmitLabel(Syms[i]);
   }
 
+  // Print some verbose block comments.
+  if (isVerbose()) {
+    if (const BasicBlock *BB = MBB->getBasicBlock())
+      if (BB->hasName())
+        OutStreamer.AddComment("%" + BB->getName());
+    EmitBasicBlockLoopComments(*MBB, LI, *this);
+  }
+
   // Print the main label for the block.
   if (MBB->pred_empty() || isBlockOnlyReachableByFallthrough(MBB)) {
     if (isVerbose() && OutStreamer.hasRawTextSupport()) {
-      if (const BasicBlock *BB = MBB->getBasicBlock())
-        if (BB->hasName())
-          OutStreamer.AddComment("%" + BB->getName());
-
-      EmitBasicBlockLoopComments(*MBB, LI, *this);
-
       // NOTE: Want this comment at start of line, don't emit with AddComment.
       OutStreamer.EmitRawText(Twine(MAI->getCommentString()) + " BB#" +
                               Twine(MBB->getNumber()) + ":");
     }
   } else {
-    if (isVerbose()) {
-      if (const BasicBlock *BB = MBB->getBasicBlock())
-        if (BB->hasName())
-          OutStreamer.AddComment("%" + BB->getName());
-      EmitBasicBlockLoopComments(*MBB, LI, *this);
-    }
-
     OutStreamer.EmitLabel(MBB->getSymbol());
   }
 }
@@ -2048,7 +2168,7 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
     MachineInstr &MI = *II;
 
     // If it is not a simple branch, we are in a table somewhere.
-    if (!MI.getDesc().isBranch() || MI.getDesc().isIndirectBranch())
+    if (!MI.isBranch() || MI.isIndirectBranch())
       return false;
 
     // If we are the operands of one of the branches, this is not
@@ -2090,6 +2210,4 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) {
     }
 
   report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name));
-  return 0;
 }
-
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 4d6c28118427..90d511cbab0a 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -25,6 +25,7 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -35,23 +36,8 @@ using namespace llvm;
 void AsmPrinter::EmitSLEB128(int Value, const char *Desc) const {
   if (isVerbose() && Desc)
     OutStreamer.AddComment(Desc);
-    
-  if (MAI->hasLEB128()) {
-    OutStreamer.EmitSLEB128IntValue(Value);
-    return;
-  }
 
-  // If we don't have .sleb128, emit as .bytes.
-  int Sign = Value >> (8 * sizeof(Value) - 1);
-  bool IsMore;
-  
-  do {
-    unsigned char Byte = static_cast<unsigned char>(Value & 0x7f);
-    Value >>= 7;
-    IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
-    if (IsMore) Byte |= 0x80;
-    OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0);
-  } while (IsMore);
+  OutStreamer.EmitSLEB128IntValue(Value);
 }
 
 /// EmitULEB128 - emit the specified signed leb128 value.
@@ -60,25 +46,7 @@ void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc,
   if (isVerbose() && Desc)
     OutStreamer.AddComment(Desc);
 
-  // FIXME: Should we add a PadTo option to the streamer?
-  if (MAI->hasLEB128() && PadTo == 0) {
-    OutStreamer.EmitULEB128IntValue(Value); 
-    return;
-  }
-  
-  // If we don't have .uleb128 or we want to emit padding, emit as .bytes.
-  do {
-    unsigned char Byte = static_cast<unsigned char>(Value & 0x7f);
-    Value >>= 7;
-    if (Value || PadTo != 0) Byte |= 0x80;
-    OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0);
-  } while (Value);
-
-  if (PadTo) {
-    if (PadTo > 1)
-      OutStreamer.EmitFill(PadTo - 1, 0x80/*fillval*/, 0/*addrspace*/);
-    OutStreamer.EmitFill(1, 0/*fillval*/, 0/*addrspace*/);
-  }
+  OutStreamer.EmitULEB128IntValue(Value, 0/*addrspace*/, PadTo);
 }
 
 /// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value.
@@ -143,7 +111,7 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const {
     return 0;
   
   switch (Encoding & 0x07) {
-  default: assert(0 && "Invalid encoded value.");
+  default: llvm_unreachable("Invalid encoded value.");
   case dwarf::DW_EH_PE_absptr: return TM.getTargetData()->getPointerSize();
   case dwarf::DW_EH_PE_udata2: return 2;
   case dwarf::DW_EH_PE_udata4: return 4;
@@ -177,9 +145,8 @@ void AsmPrinter::EmitReference(const GlobalValue *GV, unsigned Encoding)const{
 void AsmPrinter::EmitSectionOffset(const MCSymbol *Label,
                                    const MCSymbol *SectionLabel) const {
   // On COFF targets, we have to emit the special .secrel32 directive.
-  if (const char *SecOffDir = MAI->getDwarfSectionOffsetDirective()) {
-    // FIXME: MCize.
-    OutStreamer.EmitRawText(SecOffDir + Twine(Label->getName()));
+  if (MAI->getDwarfSectionOffsetDirective()) {
+    OutStreamer.EmitCOFFSecRel32(Label);
     return;
   }
   
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 8eda889155a2..d60585465be0 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -326,7 +326,11 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
           OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
         }
 
-        if (OpNo >= MI->getNumOperands()) {
+	// We may have a location metadata attached to the end of the
+	// instruction, and at no point should see metadata at any
+	// other point while processing. It's an error if so.
+        if (OpNo >= MI->getNumOperands() ||
+	    MI->getOperand(OpNo).isMetadata()) {
           Error = true;
         } else {
           unsigned OpFlags = MI->getOperand(OpNo).getImm();
diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt
index 67d927348b54..58fe2ed9d357 100644
--- a/lib/CodeGen/AsmPrinter/CMakeLists.txt
+++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -4,6 +4,7 @@ add_llvm_library(LLVMAsmPrinter
   AsmPrinterDwarf.cpp
   AsmPrinterInlineAsm.cpp
   DIE.cpp
+  DwarfAccelTable.cpp
   DwarfCFIException.cpp
   DwarfCompileUnit.cpp
   DwarfDebug.cpp
@@ -11,13 +12,3 @@ add_llvm_library(LLVMAsmPrinter
   OcamlGCPrinter.cpp
   Win64Exception.cpp
   )
-
-add_llvm_library_dependencies(LLVMAsmPrinter
-  LLVMAnalysis
-  LLVMCodeGen
-  LLVMCore
-  LLVMMC
-  LLVMMCParser
-  LLVMSupport
-  LLVMTarget
-  )
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 9c1ce761b0c5..3776848e3f47 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -112,15 +112,6 @@ DIE::~DIE() {
     delete Children[i];
 }
 
-/// addSiblingOffset - Add a sibling offset field to the front of the DIE.
-///
-DIEValue *DIE::addSiblingOffset(BumpPtrAllocator &A) {
-  DIEInteger *DI = new (A) DIEInteger(0);
-  Values.insert(Values.begin(), DI);
-  Abbrev.AddFirstAttribute(dwarf::DW_AT_sibling, dwarf::DW_FORM_ref4);
-  return DI;
-}
-
 #ifndef NDEBUG
 void DIE::print(raw_ostream &O, unsigned IncIndent) {
   IndentCount += IncIndent;
@@ -174,6 +165,7 @@ void DIE::dump() {
 }
 #endif
 
+void DIEValue::anchor() { }
 
 #ifndef NDEBUG
 void DIEValue::dump() {
@@ -223,33 +215,14 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const {
   case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer);
   case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer);
   case dwarf::DW_FORM_addr:  return AP->getTargetData().getPointerSize();
-  default: llvm_unreachable("DIE Value form not supported yet"); break;
+  default: llvm_unreachable("DIE Value form not supported yet");
   }
-  return 0;
 }
 
 #ifndef NDEBUG
 void DIEInteger::print(raw_ostream &O) {
-  O << "Int: " << (int64_t)Integer
-    << format("  0x%llx", (unsigned long long)Integer);
-}
-#endif
-
-//===----------------------------------------------------------------------===//
-// DIEString Implementation
-//===----------------------------------------------------------------------===//
-
-/// EmitValue - Emit string value.
-///
-void DIEString::EmitValue(AsmPrinter *AP, unsigned Form) const {
-  AP->OutStreamer.EmitBytes(Str, /*addrspace*/0);
-  // Emit nul terminator.
-  AP->OutStreamer.EmitIntValue(0, 1, /*addrspace*/0);
-}
-
-#ifndef NDEBUG
-void DIEString::print(raw_ostream &O) {
-  O << "Str: \"" << Str << "\"";
+  O << "Int: " << (int64_t)Integer << "  0x";
+  O.write_hex(Integer);
 }
 #endif
 
@@ -267,6 +240,7 @@ void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const {
 ///
 unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const {
   if (Form == dwarf::DW_FORM_data4) return 4;
+  if (Form == dwarf::DW_FORM_strp) return 4;
   return AP->getTargetData().getPointerSize();
 }
 
@@ -290,6 +264,7 @@ void DIEDelta::EmitValue(AsmPrinter *AP, unsigned Form) const {
 ///
 unsigned DIEDelta::SizeOf(AsmPrinter *AP, unsigned Form) const {
   if (Form == dwarf::DW_FORM_data4) return 4;
+  if (Form == dwarf::DW_FORM_strp) return 4;
   return AP->getTargetData().getPointerSize();
 }
 
@@ -335,7 +310,7 @@ unsigned DIEBlock::ComputeSize(AsmPrinter *AP) {
 ///
 void DIEBlock::EmitValue(AsmPrinter *Asm, unsigned Form) const {
   switch (Form) {
-  default: assert(0 && "Improper form for block");    break;
+  default: llvm_unreachable("Improper form for block");
   case dwarf::DW_FORM_block1: Asm->EmitInt8(Size);    break;
   case dwarf::DW_FORM_block2: Asm->EmitInt16(Size);   break;
   case dwarf::DW_FORM_block4: Asm->EmitInt32(Size);   break;
@@ -355,9 +330,8 @@ unsigned DIEBlock::SizeOf(AsmPrinter *AP, unsigned Form) const {
   case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
   case dwarf::DW_FORM_block4: return Size + sizeof(int32_t);
   case dwarf::DW_FORM_block:  return Size + MCAsmInfo::getULEB128Size(Size);
-  default: llvm_unreachable("Improper form for block"); break;
+  default: llvm_unreachable("Improper form for block");
   }
-  return 0;
 }
 
 #ifndef NDEBUG
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index 7d61f1edff4a..f93ea1b045b2 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -31,17 +31,17 @@ namespace llvm {
   class DIEAbbrevData {
     /// Attribute - Dwarf attribute code.
     ///
-    unsigned Attribute;
+    uint16_t Attribute;
 
     /// Form - Dwarf form code.
     ///
-    unsigned Form;
+    uint16_t Form;
   public:
-    DIEAbbrevData(unsigned A, unsigned F) : Attribute(A), Form(F) {}
+    DIEAbbrevData(uint16_t A, uint16_t F) : Attribute(A), Form(F) {}
 
     // Accessors.
-    unsigned getAttribute() const { return Attribute; }
-    unsigned getForm()      const { return Form; }
+    uint16_t getAttribute() const { return Attribute; }
+    uint16_t getForm()      const { return Form; }
 
     /// Profile - Used to gather unique data for the abbreviation folding set.
     ///
@@ -54,41 +54,41 @@ namespace llvm {
   class DIEAbbrev : public FoldingSetNode {
     /// Tag - Dwarf tag code.
     ///
-    unsigned Tag;
+    uint16_t Tag;
 
-    /// Unique number for node.
+    /// ChildrenFlag - Dwarf children flag.
     ///
-    unsigned Number;
+    uint16_t ChildrenFlag;
 
-    /// ChildrenFlag - Dwarf children flag.
+    /// Unique number for node.
     ///
-    unsigned ChildrenFlag;
+    unsigned Number;
 
     /// Data - Raw data bytes for abbreviation.
     ///
     SmallVector<DIEAbbrevData, 8> Data;
 
   public:
-    DIEAbbrev(unsigned T, unsigned C) : Tag(T), ChildrenFlag(C), Data() {}
+    DIEAbbrev(uint16_t T, uint16_t C) : Tag(T), ChildrenFlag(C), Data() {}
 
     // Accessors.
-    unsigned getTag() const { return Tag; }
+    uint16_t getTag() const { return Tag; }
     unsigned getNumber() const { return Number; }
-    unsigned getChildrenFlag() const { return ChildrenFlag; }
+    uint16_t getChildrenFlag() const { return ChildrenFlag; }
     const SmallVector<DIEAbbrevData, 8> &getData() const { return Data; }
-    void setTag(unsigned T) { Tag = T; }
-    void setChildrenFlag(unsigned CF) { ChildrenFlag = CF; }
+    void setTag(uint16_t T) { Tag = T; }
+    void setChildrenFlag(uint16_t CF) { ChildrenFlag = CF; }
     void setNumber(unsigned N) { Number = N; }
 
     /// AddAttribute - Adds another set of attribute information to the
     /// abbreviation.
-    void AddAttribute(unsigned Attribute, unsigned Form) {
+    void AddAttribute(uint16_t Attribute, uint16_t Form) {
       Data.push_back(DIEAbbrevData(Attribute, Form));
     }
 
     /// AddFirstAttribute - Adds a set of attribute information to the front
     /// of the abbreviation.
-    void AddFirstAttribute(unsigned Attribute, unsigned Form) {
+    void AddFirstAttribute(uint16_t Attribute, uint16_t Form) {
       Data.insert(Data.begin(), DIEAbbrevData(Attribute, Form));
     }
 
@@ -113,10 +113,6 @@ namespace llvm {
 
   class DIE {
   protected:
-    /// Abbrev - Buffer for constructing abbreviation.
-    ///
-    DIEAbbrev Abbrev;
-
     /// Offset - Offset in debug info section.
     ///
     unsigned Offset;
@@ -125,6 +121,10 @@ namespace llvm {
     ///
     unsigned Size;
 
+    /// Abbrev - Buffer for constructing abbreviation.
+    ///
+    DIEAbbrev Abbrev;
+
     /// Children DIEs.
     ///
     std::vector<DIE *> Children;
@@ -139,8 +139,8 @@ namespace llvm {
     mutable unsigned IndentCount;
   public:
     explicit DIE(unsigned Tag)
-      : Abbrev(Tag, dwarf::DW_CHILDREN_no), Offset(0),
-        Size(0), Parent (0), IndentCount(0) {}
+      : Offset(0), Size(0), Abbrev(Tag, dwarf::DW_CHILDREN_no), Parent(0),
+        IndentCount(0) {}
     virtual ~DIE();
 
     // Accessors.
@@ -163,16 +163,6 @@ namespace llvm {
       Values.push_back(Value);
     }
 
-    /// SiblingOffset - Return the offset of the debug information entry's
-    /// sibling.
-    unsigned getSiblingOffset() const { return Offset + Size; }
-
-    /// addSiblingOffset - Add a sibling offset field to the front of the DIE.
-    /// The caller is responsible for deleting the return value at or after the
-    /// same time it destroys this DIE.
-    ///
-    DIEValue *addSiblingOffset(BumpPtrAllocator &A);
-
     /// addChild - Add a child to the DIE.
     ///
     void addChild(DIE *Child) {
@@ -195,12 +185,12 @@ namespace llvm {
   /// DIEValue - A debug information entry value.
   ///
   class DIEValue {
+    virtual void anchor();
   public:
     enum {
       isInteger,
       isString,
       isLabel,
-      isSectionOffset,
       isDelta,
       isEntry,
       isBlock
@@ -276,33 +266,6 @@ namespace llvm {
   };
 
   //===--------------------------------------------------------------------===//
-  /// DIEString - A string value DIE. This DIE keeps string reference only.
-  ///
-  class DIEString : public DIEValue {
-    const StringRef Str;
-  public:
-    explicit DIEString(const StringRef S) : DIEValue(isString), Str(S) {}
-
-    /// EmitValue - Emit string value.
-    ///
-    virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
-
-    /// SizeOf - Determine size of string value in bytes.
-    ///
-    virtual unsigned SizeOf(AsmPrinter *AP, unsigned /*Form*/) const {
-      return Str.size() + sizeof(char); // sizeof('\0');
-    }
-
-    // Implement isa/cast/dyncast.
-    static bool classof(const DIEString *) { return true; }
-    static bool classof(const DIEValue *S) { return S->getType() == isString; }
-
-#ifndef NDEBUG
-    virtual void print(raw_ostream &O);
-#endif
-  };
-
-  //===--------------------------------------------------------------------===//
   /// DIELabel - A label expression DIE.
   //
   class DIELabel : public DIEValue {
@@ -359,7 +322,7 @@ namespace llvm {
   };
 
   //===--------------------------------------------------------------------===//
-  /// DIEntry - A pointer to another debug information entry.  An instance of
+  /// DIEEntry - A pointer to another debug information entry.  An instance of
   /// this class can also be used as a proxy for a debug information entry not
   /// yet defined (ie. types.)
   class DIEEntry : public DIEValue {
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
new file mode 100644
index 000000000000..660684d1bea5
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
@@ -0,0 +1,287 @@
+//=-- llvm/CodeGen/DwarfAccelTable.cpp - Dwarf Accelerator Tables -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf accelerator tables.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfAccelTable.h"
+#include "DwarfDebug.h"
+#include "DIE.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+const char *DwarfAccelTable::Atom::AtomTypeString(enum AtomType AT) {
+  switch (AT) {
+  case eAtomTypeNULL: return "eAtomTypeNULL";
+  case eAtomTypeDIEOffset: return "eAtomTypeDIEOffset";
+  case eAtomTypeCUOffset: return "eAtomTypeCUOffset";
+  case eAtomTypeTag: return "eAtomTypeTag";
+  case eAtomTypeNameFlags: return "eAtomTypeNameFlags";
+  case eAtomTypeTypeFlags: return "eAtomTypeTypeFlags";
+  } 
+  llvm_unreachable("invalid AtomType!");
+}
+
+// The general case would need to have a less hard coded size for the
+// length of the HeaderData, however, if we're constructing based on a
+// single Atom then we know it will always be: 4 + 4 + 2 + 2.
+DwarfAccelTable::DwarfAccelTable(DwarfAccelTable::Atom atom) :
+  Header(12),
+  HeaderData(atom) {
+}
+
+// The length of the header data is always going to be 4 + 4 + 4*NumAtoms.
+DwarfAccelTable::DwarfAccelTable(std::vector<DwarfAccelTable::Atom> &atomList) :
+  Header(8 + (atomList.size() * 4)),
+  HeaderData(atomList) {
+}
+
+DwarfAccelTable::~DwarfAccelTable() {
+  for (size_t i = 0, e = Data.size(); i < e; ++i)
+    delete Data[i];
+  for (StringMap<DataArray>::iterator
+         EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI)
+    for (DataArray::iterator DI = EI->second.begin(),
+           DE = EI->second.end(); DI != DE; ++DI)
+      delete (*DI);
+}
+
+void DwarfAccelTable::AddName(StringRef Name, DIE* die, char Flags) {
+  // If the string is in the list already then add this die to the list
+  // otherwise add a new one.
+  DataArray &DIEs = Entries[Name];
+  DIEs.push_back(new HashDataContents(die, Flags));
+}
+
+void DwarfAccelTable::ComputeBucketCount(void) {
+  // First get the number of unique hashes.
+  std::vector<uint32_t> uniques(Data.size());
+  for (size_t i = 0, e = Data.size(); i < e; ++i)
+    uniques[i] = Data[i]->HashValue;
+  array_pod_sort(uniques.begin(), uniques.end());
+  std::vector<uint32_t>::iterator p =
+    std::unique(uniques.begin(), uniques.end());
+  uint32_t num = std::distance(uniques.begin(), p);
+
+  // Then compute the bucket size, minimum of 1 bucket.
+  if (num > 1024) Header.bucket_count = num/4;
+  if (num > 16) Header.bucket_count = num/2;
+  else Header.bucket_count = num > 0 ? num : 1;
+
+  Header.hashes_count = num;
+}
+
+namespace {
+  // DIESorter - comparison predicate that sorts DIEs by their offset.
+  struct DIESorter {
+    bool operator()(const struct DwarfAccelTable::HashDataContents *A,
+                    const struct DwarfAccelTable::HashDataContents *B) const {
+      return A->Die->getOffset() < B->Die->getOffset();
+    }
+  };
+}
+
+void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, const char *Prefix) {
+  // Create the individual hash data outputs.
+  for (StringMap<DataArray>::iterator
+         EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) {
+    struct HashData *Entry = new HashData((*EI).getKeyData());
+
+    // Unique the entries.
+    std::stable_sort(EI->second.begin(), EI->second.end(), DIESorter());
+    EI->second.erase(std::unique(EI->second.begin(), EI->second.end()),
+                       EI->second.end());
+
+    for (DataArray::const_iterator DI = EI->second.begin(),
+           DE = EI->second.end();
+         DI != DE; ++DI)
+      Entry->addData((*DI));
+    Data.push_back(Entry);
+  }
+
+  // Figure out how many buckets we need, then compute the bucket
+  // contents and the final ordering. We'll emit the hashes and offsets
+  // by doing a walk during the emission phase. We add temporary
+  // symbols to the data so that we can reference them during the offset
+  // later, we'll emit them when we emit the data.
+  ComputeBucketCount();
+
+  // Compute bucket contents and final ordering.
+  Buckets.resize(Header.bucket_count);
+  for (size_t i = 0, e = Data.size(); i < e; ++i) {
+    uint32_t bucket = Data[i]->HashValue % Header.bucket_count;
+    Buckets[bucket].push_back(Data[i]);
+    Data[i]->Sym = Asm->GetTempSymbol(Prefix, i);
+  }
+}
+
+// Emits the header for the table via the AsmPrinter.
+void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) {
+  Asm->OutStreamer.AddComment("Header Magic");
+  Asm->EmitInt32(Header.magic);
+  Asm->OutStreamer.AddComment("Header Version");
+  Asm->EmitInt16(Header.version);
+  Asm->OutStreamer.AddComment("Header Hash Function");
+  Asm->EmitInt16(Header.hash_function);
+  Asm->OutStreamer.AddComment("Header Bucket Count");
+  Asm->EmitInt32(Header.bucket_count);
+  Asm->OutStreamer.AddComment("Header Hash Count");
+  Asm->EmitInt32(Header.hashes_count);
+  Asm->OutStreamer.AddComment("Header Data Length");
+  Asm->EmitInt32(Header.header_data_len);
+  Asm->OutStreamer.AddComment("HeaderData Die Offset Base");
+  Asm->EmitInt32(HeaderData.die_offset_base);
+  Asm->OutStreamer.AddComment("HeaderData Atom Count");
+  Asm->EmitInt32(HeaderData.Atoms.size());
+  for (size_t i = 0; i < HeaderData.Atoms.size(); i++) {
+    Atom A = HeaderData.Atoms[i];
+    Asm->OutStreamer.AddComment(Atom::AtomTypeString(A.type));
+    Asm->EmitInt16(A.type);
+    Asm->OutStreamer.AddComment(dwarf::FormEncodingString(A.form));
+    Asm->EmitInt16(A.form);
+  }
+}
+
+// Walk through and emit the buckets for the table. This will look
+// like a list of numbers of how many elements are in each bucket.
+void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) {
+  unsigned index = 0;
+  for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+    Asm->OutStreamer.AddComment("Bucket " + Twine(i));
+    if (Buckets[i].size() != 0)
+      Asm->EmitInt32(index);
+    else
+      Asm->EmitInt32(UINT32_MAX);
+    index += Buckets[i].size();
+  }
+}
+
+// Walk through the buckets and emit the individual hashes for each
+// bucket.
+void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) {
+  for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+    for (HashList::const_iterator HI = Buckets[i].begin(),
+           HE = Buckets[i].end(); HI != HE; ++HI) {
+      Asm->OutStreamer.AddComment("Hash in Bucket " + Twine(i));
+      Asm->EmitInt32((*HI)->HashValue);
+    } 
+  }
+}
+
+// Walk through the buckets and emit the individual offsets for each
+// element in each bucket. This is done via a symbol subtraction from the
+// beginning of the section. The non-section symbol will be output later
+// when we emit the actual data.
+void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) {
+  for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+    for (HashList::const_iterator HI = Buckets[i].begin(),
+           HE = Buckets[i].end(); HI != HE; ++HI) {
+      Asm->OutStreamer.AddComment("Offset in Bucket " + Twine(i));
+      MCContext &Context = Asm->OutStreamer.getContext();
+      const MCExpr *Sub =
+        MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create((*HI)->Sym, Context),
+                                MCSymbolRefExpr::Create(SecBegin, Context),
+                                Context);
+      Asm->OutStreamer.EmitValue(Sub, sizeof(uint32_t), 0);
+    }
+  }
+}
+
+// Walk through the buckets and emit the full data for each element in
+// the bucket. For the string case emit the dies and the various offsets.
+// Terminate each HashData bucket with 0.
+void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) {
+  uint64_t PrevHash = UINT64_MAX;
+  for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+    for (HashList::const_iterator HI = Buckets[i].begin(),
+           HE = Buckets[i].end(); HI != HE; ++HI) {
+      // Remember to emit the label for our offset.
+      Asm->OutStreamer.EmitLabel((*HI)->Sym);
+      Asm->OutStreamer.AddComment((*HI)->Str);
+      Asm->EmitSectionOffset(D->getStringPoolEntry((*HI)->Str),
+                             D->getStringPool());
+      Asm->OutStreamer.AddComment("Num DIEs");
+      Asm->EmitInt32((*HI)->Data.size());
+      for (std::vector<struct HashDataContents*>::const_iterator
+             DI = (*HI)->Data.begin(), DE = (*HI)->Data.end();
+           DI != DE; ++DI) {
+        // Emit the DIE offset
+        Asm->EmitInt32((*DI)->Die->getOffset());
+        // If we have multiple Atoms emit that info too.
+        // FIXME: A bit of a hack, we either emit only one atom or all info.
+        if (HeaderData.Atoms.size() > 1) {
+          Asm->EmitInt16((*DI)->Die->getTag());
+          Asm->EmitInt8((*DI)->Flags);
+        }
+      }
+      // Emit a 0 to terminate the data unless we have a hash collision.
+      if (PrevHash != (*HI)->HashValue)
+        Asm->EmitInt32(0);
+      PrevHash = (*HI)->HashValue;
+    }
+  }
+}
+
+// Emit the entire data structure to the output file.
+void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin,
+                           DwarfDebug *D) {
+  // Emit the header.
+  EmitHeader(Asm);
+
+  // Emit the buckets.
+  EmitBuckets(Asm);
+
+  // Emit the hashes.
+  EmitHashes(Asm);
+
+  // Emit the offsets.
+  EmitOffsets(Asm, SecBegin);
+
+  // Emit the hash data.
+  EmitData(Asm, D);
+}
+
+#ifndef NDEBUG
+void DwarfAccelTable::print(raw_ostream &O) {
+
+  Header.print(O);
+  HeaderData.print(O);
+
+  O << "Entries: \n";
+  for (StringMap<DataArray>::const_iterator
+         EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) {
+    O << "Name: " << EI->getKeyData() << "\n";
+    for (DataArray::const_iterator DI = EI->second.begin(),
+           DE = EI->second.end();
+         DI != DE; ++DI)
+      (*DI)->print(O);
+  }
+
+  O << "Buckets and Hashes: \n";
+  for (size_t i = 0, e = Buckets.size(); i < e; ++i)
+    for (HashList::const_iterator HI = Buckets[i].begin(),
+           HE = Buckets[i].end(); HI != HE; ++HI)
+      (*HI)->print(O);
+
+  O << "Data: \n";
+    for (std::vector<HashData*>::const_iterator
+           DI = Data.begin(), DE = Data.end(); DI != DE; ++DI)
+      (*DI)->print(O);
+  
+
+}
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
new file mode 100644
index 000000000000..2278d4c784f4
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
@@ -0,0 +1,290 @@
+//==-- llvm/CodeGen/DwarfAccelTable.h - Dwarf Accelerator Tables -*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf accelerator tables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__
+#define CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+#include "DIE.h"
+#include <vector>
+#include <map>
+
+// The dwarf accelerator tables are an indirect hash table optimized
+// for null lookup rather than access to known data. They are output into
+// an on-disk format that looks like this:
+//
+// .-------------.
+// |  HEADER     |
+// |-------------|
+// |  BUCKETS    |
+// |-------------|
+// |  HASHES     |
+// |-------------|
+// |  OFFSETS    |
+// |-------------|
+// |  DATA       |
+// `-------------'
+//
+// where the header contains a magic number, version, type of hash function,
+// the number of buckets, total number of hashes, and room for a special
+// struct of data and the length of that struct.
+//
+// The buckets contain an index (e.g. 6) into the hashes array. The hashes
+// section contains all of the 32-bit hash values in contiguous memory, and
+// the offsets contain the offset into the data area for the particular
+// hash.
+// 
+// For a lookup example, we could hash a function name and take it modulo the
+// number of buckets giving us our bucket. From there we take the bucket value
+// as an index into the hashes table and look at each successive hash as long
+// as the hash value is still the same modulo result (bucket value) as earlier.
+// If we have a match we look at that same entry in the offsets table and
+// grab the offset in the data for our final match.
+
+namespace llvm {
+
+class AsmPrinter;
+class DIE;
+class DwarfDebug;
+  
+class DwarfAccelTable {
+
+  enum HashFunctionType {
+    eHashFunctionDJB = 0u
+  };
+
+  static uint32_t HashDJB (StringRef Str) {
+    uint32_t h = 5381;
+    for (unsigned i = 0, e = Str.size(); i != e; ++i)
+      h = ((h << 5) + h) + Str[i];
+    return h;
+  }
+
+  // Helper function to compute the number of buckets needed based on
+  // the number of unique hashes.
+  void ComputeBucketCount (void);
+  
+  struct TableHeader {
+    uint32_t   magic;           // 'HASH' magic value to allow endian detection
+    uint16_t   version;         // Version number.
+    uint16_t   hash_function;   // The hash function enumeration that was used.
+    uint32_t   bucket_count;    // The number of buckets in this hash table.
+    uint32_t   hashes_count;    // The total number of unique hash values
+                                // and hash data offsets in this table.
+    uint32_t   header_data_len; // The bytes to skip to get to the hash
+                                // indexes (buckets) for correct alignment.
+    // Also written to disk is the implementation specific header data.
+
+    static const uint32_t MagicHash = 0x48415348;
+    
+    TableHeader (uint32_t data_len) :
+      magic (MagicHash), version (1), hash_function (eHashFunctionDJB),
+      bucket_count (0), hashes_count (0), header_data_len (data_len)
+    {}
+
+#ifndef NDEBUG
+    void print(raw_ostream &O) {
+      O << "Magic: " << format("0x%x", magic) << "\n"
+        << "Version: " << version << "\n"
+        << "Hash Function: " << hash_function << "\n"
+        << "Bucket Count: " << bucket_count << "\n"
+        << "Header Data Length: " << header_data_len << "\n";
+    }
+    void dump() { print(dbgs()); }
+#endif
+  };
+
+public:
+  // The HeaderData describes the form of each set of data. In general this
+  // is as a list of atoms (atom_count) where each atom contains a type
+  // (AtomType type) of data, and an encoding form (form). In the case of
+  // data that is referenced via DW_FORM_ref_* the die_offset_base is
+  // used to describe the offset for all forms in the list of atoms.
+  // This also serves as a public interface of sorts.
+  // When written to disk this will have the form:
+  //
+  // uint32_t die_offset_base
+  // uint32_t atom_count
+  // atom_count Atoms  
+  enum AtomType {
+    eAtomTypeNULL       = 0u,
+    eAtomTypeDIEOffset  = 1u,   // DIE offset, check form for encoding
+    eAtomTypeCUOffset   = 2u,   // DIE offset of the compiler unit header that
+                                // contains the item in question
+    eAtomTypeTag        = 3u,   // DW_TAG_xxx value, should be encoded as
+                                // DW_FORM_data1 (if no tags exceed 255) or
+                                // DW_FORM_data2.
+    eAtomTypeNameFlags  = 4u,   // Flags from enum NameFlags
+    eAtomTypeTypeFlags  = 5u    // Flags from enum TypeFlags
+  };
+
+  enum TypeFlags {
+    eTypeFlagClassMask = 0x0000000fu,
+    
+    // Always set for C++, only set for ObjC if this is the
+    // @implementation for a class.
+    eTypeFlagClassIsImplementation  = ( 1u << 1 )
+  };  
+  
+  // Make these public so that they can be used as a general interface to
+  // the class.
+  struct Atom {
+    AtomType type; // enum AtomType
+    uint16_t form; // DWARF DW_FORM_ defines
+
+    Atom(AtomType type, uint16_t form) : type(type), form(form) {}
+    static const char * AtomTypeString(enum AtomType);
+#ifndef NDEBUG
+    void print(raw_ostream &O) {
+      O << "Type: " << AtomTypeString(type) << "\n"
+        << "Form: " << dwarf::FormEncodingString(form) << "\n";
+    }
+    void dump() {
+      print(dbgs());
+    }
+#endif
+  };
+
+ private:
+  struct TableHeaderData {
+    
+    uint32_t die_offset_base;
+    std::vector<Atom> Atoms;
+
+    TableHeaderData(std::vector<DwarfAccelTable::Atom> &AtomList,
+                    uint32_t offset = 0) :
+      die_offset_base(offset) {
+      for (size_t i = 0, e = AtomList.size(); i != e; ++i)
+        Atoms.push_back(AtomList[i]);
+    }
+    
+    TableHeaderData(DwarfAccelTable::Atom Atom, uint32_t offset = 0)
+    : die_offset_base(offset) {
+      Atoms.push_back(Atom);
+    }
+    
+#ifndef NDEBUG
+    void print (raw_ostream &O) {
+      O << "die_offset_base: " << die_offset_base << "\n";
+      for (size_t i = 0; i < Atoms.size(); i++)
+        Atoms[i].print(O);
+    }
+    void dump() {
+      print(dbgs());
+    }
+#endif
+  };
+
+  // The data itself consists of a str_offset, a count of the DIEs in the
+  // hash and the offsets to the DIEs themselves.
+  // On disk each data section is ended with a 0 KeyType as the end of the
+  // hash chain.
+  // On output this looks like:
+  // uint32_t str_offset
+  // uint32_t hash_data_count
+  // HashData[hash_data_count]
+public:
+  struct HashDataContents {
+    DIE *Die; // Offsets
+    char Flags; // Specific flags to output
+
+    HashDataContents(DIE *D, char Flags) :
+      Die(D),
+      Flags(Flags) { }
+    #ifndef NDEBUG
+    void print(raw_ostream &O) const {
+      O << "  Offset: " << Die->getOffset() << "\n";
+      O << "  Tag: " << dwarf::TagString(Die->getTag()) << "\n";
+      O << "  Flags: " << Flags << "\n";
+    }
+    #endif
+  };
+private:
+  struct HashData {
+    StringRef Str;
+    uint32_t HashValue;
+    MCSymbol *Sym;
+    std::vector<struct HashDataContents*> Data; // offsets
+    HashData(StringRef S) : Str(S) {
+      HashValue = DwarfAccelTable::HashDJB(S);
+    }
+    void addData(struct HashDataContents *Datum) { Data.push_back(Datum); }
+    #ifndef NDEBUG
+    void print(raw_ostream &O) {
+      O << "Name: " << Str << "\n";
+      O << "  Hash Value: " << format("0x%x", HashValue) << "\n";
+      O << "  Symbol: " ;
+      if (Sym) Sym->print(O);
+      else O << "<none>";
+      O << "\n";
+      for (size_t i = 0; i < Data.size(); i++) {
+        O << "  Offset: " << Data[i]->Die->getOffset() << "\n";
+        O << "  Tag: " << dwarf::TagString(Data[i]->Die->getTag()) << "\n";
+        O << "  Flags: " << Data[i]->Flags << "\n";
+      }
+    }
+    void dump() {
+      print(dbgs());
+    }
+    #endif
+  };
+
+  DwarfAccelTable(const DwarfAccelTable&); // DO NOT IMPLEMENT
+  void operator=(const DwarfAccelTable&);  // DO NOT IMPLEMENT
+
+  // Internal Functions
+  void EmitHeader(AsmPrinter *);
+  void EmitBuckets(AsmPrinter *);
+  void EmitHashes(AsmPrinter *);
+  void EmitOffsets(AsmPrinter *, MCSymbol *);
+  void EmitData(AsmPrinter *, DwarfDebug *D);
+  
+  // Output Variables
+  TableHeader Header;
+  TableHeaderData HeaderData;
+  std::vector<HashData*> Data;
+
+  // String Data
+  typedef std::vector<struct HashDataContents*> DataArray;
+  typedef StringMap<DataArray> StringEntries;
+  StringEntries Entries;
+
+  // Buckets/Hashes/Offsets
+  typedef std::vector<HashData*> HashList;
+  typedef std::vector<HashList> BucketList;
+  BucketList Buckets;
+  HashList Hashes;
+  
+  // Public Implementation
+ public:
+  DwarfAccelTable(DwarfAccelTable::Atom);
+  DwarfAccelTable(std::vector<DwarfAccelTable::Atom> &);
+  ~DwarfAccelTable();
+  void AddName(StringRef, DIE*, char = 0);
+  void FinalizeTable(AsmPrinter *, const char *);
+  void Emit(AsmPrinter *, MCSymbol *, DwarfDebug *);
+#ifndef NDEBUG
+  void print(raw_ostream &O);
+  void dump() { print(dbgs()); }
+#endif
+};
+
+}
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 8ed4f4c43a7c..d975f1f97bea 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -142,12 +142,14 @@ void DwarfCFIException::EndFunction() {
 
   Asm->OutStreamer.EmitCFIEndProc();
 
+  if (!shouldEmitPersonality)
+    return;
+
   Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
                                                 Asm->getFunctionNumber()));
 
   // Map all labels and get rid of any dead landing pads.
   MMI->TidyLandingPads();
 
-  if (shouldEmitPersonality)
-    EmitExceptionTable();
+  EmitExceptionTable();
 }
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 6fe476d02ef7..69dc454ae1d7 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -13,12 +13,14 @@
 
 #define DEBUG_TYPE "dwarfdebug"
 
+#include "DwarfAccelTable.h"
 #include "DwarfCompileUnit.h"
 #include "DwarfDebug.h"
 #include "llvm/Constants.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/Instructions.h"
 #include "llvm/Analysis/DIBuilder.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetFrameLowering.h"
@@ -30,8 +32,9 @@
 using namespace llvm;
 
 /// CompileUnit - Compile unit constructor.
-CompileUnit::CompileUnit(unsigned I, DIE *D, AsmPrinter *A, DwarfDebug *DW)
-  : ID(I), CUDie(D), Asm(A), DD(DW), IndexTyDie(0) {
+CompileUnit::CompileUnit(unsigned I, unsigned L, DIE *D, AsmPrinter *A,
+			 DwarfDebug *DW)
+  : ID(I), Language(L), CUDie(D), Asm(A), DD(DW), IndexTyDie(0) {
   DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1);
 }
 
@@ -67,12 +70,19 @@ void CompileUnit::addSInt(DIE *Die, unsigned Attribute,
   Die->addValue(Attribute, Form, Value);
 }
 
-/// addString - Add a string attribute data and value. DIEString only
-/// keeps string reference.
-void CompileUnit::addString(DIE *Die, unsigned Attribute, unsigned Form,
-                            StringRef String) {
-  DIEValue *Value = new (DIEValueAllocator) DIEString(String);
-  Die->addValue(Attribute, Form, Value);
+/// addString - Add a string attribute data and value. We always emit a
+/// reference to the string pool instead of immediate strings so that DIEs have
+/// more predictable sizes.
+void CompileUnit::addString(DIE *Die, unsigned Attribute, StringRef String) {
+  MCSymbol *Symb = DD->getStringPoolEntry(String);
+  DIEValue *Value;
+  if (Asm->needsRelocationsForDwarfStringPool())
+    Value = new (DIEValueAllocator) DIELabel(Symb);
+  else {
+    MCSymbol *StringPool = DD->getStringPool();
+    Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool);
+  }
+  Die->addValue(Attribute, dwarf::DW_FORM_strp, Value);
 }
 
 /// addLabel - Add a Dwarf label attribute data and value.
@@ -98,7 +108,6 @@ void CompileUnit::addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form,
   Die->addValue(Attribute, Form, createDIEEntry(Entry));
 }
 
-
 /// addBlock - Add block data.
 ///
 void CompileUnit::addBlock(DIE *Die, unsigned Attribute, unsigned Form,
@@ -135,8 +144,7 @@ void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) {
   unsigned Line = G.getLineNumber();
   if (Line == 0)
     return;
-  unsigned FileID = DD->GetOrCreateSourceID(G.getFilename(),
-                                            G.getDirectory());
+  unsigned FileID = DD->GetOrCreateSourceID(G.getFilename(), G.getDirectory());
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
   addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -148,14 +156,14 @@ void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) {
   // Verify subprogram.
   if (!SP.Verify())
     return;
-  // If the line number is 0, don't add it.
-  if (SP.getLineNumber() == 0)
-    return;
 
+  // If the line number is 0, don't add it.
   unsigned Line = SP.getLineNumber();
-  if (!SP.getContext().Verify())
+  if (Line == 0)
     return;
-  unsigned FileID = DD->GetOrCreateSourceID(SP.getFilename(), SP.getDirectory());
+
+  unsigned FileID = DD->GetOrCreateSourceID(SP.getFilename(),
+                                            SP.getDirectory());
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
   addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -169,9 +177,28 @@ void CompileUnit::addSourceLine(DIE *Die, DIType Ty) {
     return;
 
   unsigned Line = Ty.getLineNumber();
-  if (Line == 0 || !Ty.getContext().Verify())
+  if (Line == 0)
     return;
-  unsigned FileID = DD->GetOrCreateSourceID(Ty.getFilename(), Ty.getDirectory());
+  unsigned FileID = DD->GetOrCreateSourceID(Ty.getFilename(),
+                                            Ty.getDirectory());
+  assert(FileID && "Invalid file id");
+  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) {
+  // Verify type.
+  if (!Ty.Verify())
+    return;
+
+  unsigned Line = Ty.getLineNumber();
+  if (Line == 0)
+    return;
+  DIFile File = Ty.getFile();
+  unsigned FileID = DD->GetOrCreateSourceID(File.getFilename(),
+					    File.getDirectory());
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
   addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -458,7 +485,7 @@ static bool isTypeSigned(DIType Ty, int *SizeInBits) {
 /// addConstantValue - Add constant value entry in variable DIE.
 bool CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO,
                                    DIType Ty) {
-  assert (MO.isImm() && "Invalid machine operand!");
+  assert(MO.isImm() && "Invalid machine operand!");
   DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
   int SizeInBits = -1;
   bool SignedConstant = isTypeSigned(Ty, &SizeInBits);
@@ -558,8 +585,8 @@ void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) {
       Buffer.addChild(getOrCreateTemplateValueParameterDIE(
                         DITemplateValueParameter(Element)));
   }
-
 }
+
 /// addToContextOwner - Add Die into the list of its context owner's children.
 void CompileUnit::addToContextOwner(DIE *Die, DIDescriptor Context) {
   if (Context.isType()) {
@@ -598,13 +625,29 @@ DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
     assert(Ty.isDerivedType() && "Unknown kind of DIType");
     constructTypeDIE(*TyDIE, DIDerivedType(Ty));
   }
-
+  // If this is a named finished type then include it in the list of types
+  // for the accelerator tables.
+  if (!Ty.getName().empty() && !Ty.isForwardDecl()) {
+    bool IsImplementation = 0;
+    if (Ty.isCompositeType()) {
+      DICompositeType CT(Ty);
+      // A runtime language of 0 actually means C/C++ and that any
+      // non-negative value is some version of Objective-C/C++.
+      IsImplementation = (CT.getRunTimeLang() == 0) ||
+        CT.isObjcClassComplete();
+    }
+    unsigned Flags = IsImplementation ?
+                     DwarfAccelTable::eTypeFlagClassIsImplementation : 0;
+    addAccelType(Ty.getName(), std::make_pair(TyDIE, Flags));
+  }
+  
   addToContextOwner(TyDIE, Ty.getContext());
   return TyDIE;
 }
 
 /// addType - Add a new type attribute to the specified entity.
-void CompileUnit::addType(DIE *Entity, DIType Ty) {
+void CompileUnit::addType(DIE *Entity, DIType Ty,
+			  unsigned Attribute) {
   if (!Ty.Verify())
     return;
 
@@ -612,7 +655,7 @@ void CompileUnit::addType(DIE *Entity, DIType Ty) {
   DIEEntry *Entry = getDIEEntry(Ty);
   // If it exists then use the existing value.
   if (Entry) {
-    Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
+    Entity->addValue(Attribute, dwarf::DW_FORM_ref4, Entry);
     return;
   }
 
@@ -622,7 +665,7 @@ void CompileUnit::addType(DIE *Entity, DIType Ty) {
   // Set up proxy.
   Entry = createDIEEntry(Buffer);
   insertDIEEntry(Ty, Entry);
-  Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
+  Entity->addValue(Attribute, dwarf::DW_FORM_ref4, Entry);
 
   // If this is a complete composite type then include it in the
   // list of global types.
@@ -662,7 +705,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) {
   StringRef Name = BTy.getName();
   // Add name if not anonymous or intermediate type.
   if (!Name.empty())
-    addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+    addString(&Buffer, dwarf::DW_AT_name, Name);
 
   if (BTy.getTag() == dwarf::DW_TAG_unspecified_type) {
     Buffer.setTag(dwarf::DW_TAG_unspecified_type);
@@ -671,8 +714,8 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) {
   }
 
   Buffer.setTag(dwarf::DW_TAG_base_type);
-  addUInt(&Buffer, dwarf::DW_AT_encoding,  dwarf::DW_FORM_data1,
-	  BTy.getEncoding());
+  addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+          BTy.getEncoding());
 
   uint64_t Size = BTy.getSizeInBits() >> 3;
   addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
@@ -696,10 +739,10 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
 
   // Add name if not anonymous or intermediate type.
   if (!Name.empty())
-    addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+    addString(&Buffer, dwarf::DW_AT_name, Name);
 
   // Add size if non-zero (derived types might be zero-sized.)
-  if (Size)
+  if (Size && Tag != dwarf::DW_TAG_pointer_type)
     addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
 
   // Add source line info if available and TyDesc is not a forward declaration.
@@ -755,8 +798,12 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
         Buffer.addChild(Arg);
       }
     }
-    // Add prototype flag.
-    if (isPrototyped)
+    // Add prototype flag if we're dealing with a C language and the
+    // function has been prototyped.
+    if (isPrototyped &&
+	(Language == dwarf::DW_LANG_C89 ||
+	 Language == dwarf::DW_LANG_C99 ||
+	 Language == dwarf::DW_LANG_ObjC))
       addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
   }
     break;
@@ -779,13 +826,13 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
         DISubprogram SP(Element);
         ElemDie = getOrCreateSubprogramDIE(DISubprogram(Element));
         if (SP.isProtected())
-          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
                   dwarf::DW_ACCESS_protected);
         else if (SP.isPrivate())
-          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
                   dwarf::DW_ACCESS_private);
         else 
-          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_public);
         if (SP.isExplicit())
           addUInt(ElemDie, dwarf::DW_AT_explicit, dwarf::DW_FORM_flag, 1);
@@ -793,15 +840,54 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
       else if (Element.isVariable()) {
         DIVariable DV(Element);
         ElemDie = new DIE(dwarf::DW_TAG_variable);
-        addString(ElemDie, dwarf::DW_AT_name, dwarf::DW_FORM_string,
-                  DV.getName());
+        addString(ElemDie, dwarf::DW_AT_name, DV.getName());
         addType(ElemDie, DV.getType());
         addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
         addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
         addSourceLine(ElemDie, DV);
-      } else if (Element.isDerivedType())
-        ElemDie = createMemberDIE(DIDerivedType(Element));
-      else
+      } else if (Element.isDerivedType()) {
+	DIDerivedType DDTy(Element);
+	if (DDTy.getTag() == dwarf::DW_TAG_friend) {
+	  ElemDie = new DIE(dwarf::DW_TAG_friend);
+	  addType(ElemDie, DDTy.getTypeDerivedFrom(), dwarf::DW_AT_friend);
+	} else
+	  ElemDie = createMemberDIE(DIDerivedType(Element));
+      } else if (Element.isObjCProperty()) {
+        DIObjCProperty Property(Element);
+        ElemDie = new DIE(Property.getTag());
+        StringRef PropertyName = Property.getObjCPropertyName();
+        addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName);
+	addType(ElemDie, Property.getType());
+	addSourceLine(ElemDie, Property);
+        StringRef GetterName = Property.getObjCPropertyGetterName();
+        if (!GetterName.empty())
+          addString(ElemDie, dwarf::DW_AT_APPLE_property_getter, GetterName);
+        StringRef SetterName = Property.getObjCPropertySetterName();
+        if (!SetterName.empty())
+          addString(ElemDie, dwarf::DW_AT_APPLE_property_setter, SetterName);
+        unsigned PropertyAttributes = 0;
+        if (Property.isReadOnlyObjCProperty())
+          PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly;
+        if (Property.isReadWriteObjCProperty())
+          PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readwrite;
+        if (Property.isAssignObjCProperty())
+          PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_assign;
+        if (Property.isRetainObjCProperty())
+          PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_retain;
+        if (Property.isCopyObjCProperty())
+          PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_copy;
+        if (Property.isNonAtomicObjCProperty())
+          PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic;
+        if (PropertyAttributes)
+          addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, 0, 
+                 PropertyAttributes);
+
+        DIEEntry *Entry = getDIEEntry(Element);
+        if (!Entry) {
+          Entry = createDIEEntry(ElemDie);
+          insertDIEEntry(Element, Entry);
+        }
+      } else
         continue;
       Buffer.addChild(ElemDie);
     }
@@ -809,11 +895,6 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
     if (CTy.isAppleBlockExtension())
       addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1);
 
-    unsigned RLang = CTy.getRunTimeLang();
-    if (RLang)
-      addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class,
-              dwarf::DW_FORM_data1, RLang);
-
     DICompositeType ContainingType = CTy.getContainingType();
     if (DIDescriptor(ContainingType).isCompositeType())
       addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
@@ -827,7 +908,11 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
       addUInt(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type,
               dwarf::DW_FORM_flag, 1);
 
-    if (Tag == dwarf::DW_TAG_class_type) 
+    // Add template parameters to a class, structure or union types.
+    // FIXME: The support isn't in the metadata for this yet.
+    if (Tag == dwarf::DW_TAG_class_type ||
+        Tag == dwarf::DW_TAG_structure_type ||
+        Tag == dwarf::DW_TAG_union_type)
       addTemplateParams(Buffer, CTy.getTemplateParams());
 
     break;
@@ -838,11 +923,11 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
 
   // Add name if not anonymous or intermediate type.
   if (!Name.empty())
-    addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+    addString(&Buffer, dwarf::DW_AT_name, Name);
 
   if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type
       || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type)
-    {
+  {
     // Add size if non-zero (derived types might be zero-sized.)
     if (Size)
       addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
@@ -857,6 +942,12 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
     // Add source line info if available.
     if (!CTy.isForwardDecl())
       addSourceLine(&Buffer, CTy);
+
+    // No harm in adding the runtime language to the declaration.
+    unsigned RLang = CTy.getRunTimeLang();
+    if (RLang)
+      addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class,
+              dwarf::DW_FORM_data1, RLang);
   }
 }
 
@@ -870,7 +961,7 @@ CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) {
 
   ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter);
   addType(ParamDIE, TP.getType());
-  addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TP.getName());
+  addString(ParamDIE, dwarf::DW_AT_name, TP.getName());
   return ParamDIE;
 }
 
@@ -885,7 +976,7 @@ CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV)
   ParamDIE = new DIE(dwarf::DW_TAG_template_value_parameter);
   addType(ParamDIE, TPV.getType());
   if (!TPV.getName().empty())
-    addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TPV.getName());
+    addString(ParamDIE, dwarf::DW_AT_name, TPV.getName());
   addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, 
           TPV.getValue());
   return ParamDIE;
@@ -898,8 +989,11 @@ DIE *CompileUnit::getOrCreateNameSpace(DINameSpace NS) {
     return NDie;
   NDie = new DIE(dwarf::DW_TAG_namespace);
   insertDIE(NS, NDie);
-  if (!NS.getName().empty())
-    addString(NDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, NS.getName());
+  if (!NS.getName().empty()) {
+    addString(NDie, dwarf::DW_AT_name, NS.getName());
+    addAccelNamespace(NS.getName(), NDie);
+  } else
+    addAccelNamespace("(anonymous namespace)", NDie);
   addSourceLine(NDie, NS);
   addToContextOwner(NDie, NS.getContext());
   return NDie;
@@ -921,6 +1015,12 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
   if (SPDie)
     return SPDie;
 
+  DISubprogram SPDecl = SP.getFunctionDeclaration();
+  DIE *DeclDie = NULL;
+  if (SPDecl.isSubprogram()) {
+    DeclDie = getOrCreateSubprogramDIE(SPDecl);
+  }
+
   SPDie = new DIE(dwarf::DW_TAG_subprogram);
   
   // DW_TAG_inlined_subroutine may refer to this DIE.
@@ -932,25 +1032,36 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
   // Add function template parameters.
   addTemplateParams(*SPDie, SP.getTemplateParams());
 
+  // Unfortunately this code needs to stay here to work around
+  // a bug in older gdbs that requires the linkage name to resolve
+  // multiple template functions.
   StringRef LinkageName = SP.getLinkageName();
   if (!LinkageName.empty())
-    addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, 
-                    dwarf::DW_FORM_string,
-                    getRealLinkageName(LinkageName));
+    addString(SPDie, dwarf::DW_AT_MIPS_linkage_name,
+              getRealLinkageName(LinkageName));
 
   // If this DIE is going to refer declaration info using AT_specification
   // then there is no need to add other attributes.
-  if (SP.getFunctionDeclaration().isSubprogram())
+  if (DeclDie) {
+    // Refer function declaration directly.
+    addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
+                DeclDie);
+
     return SPDie;
+  }
 
   // Constructors and operators for anonymous aggregates do not have names.
   if (!SP.getName().empty())
-    addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, 
-                    SP.getName());
+    addString(SPDie, dwarf::DW_AT_name, SP.getName());
 
   addSourceLine(SPDie, SP);
 
-  if (SP.isPrototyped()) 
+  // Add the prototype if we have a prototype and we have a C like
+  // language.
+  if (SP.isPrototyped() &&
+      (Language == dwarf::DW_LANG_C89 ||
+       Language == dwarf::DW_LANG_C99 ||
+       Language == dwarf::DW_LANG_ObjC))
     addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
 
   // Add Return Type.
@@ -965,7 +1076,7 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
 
   unsigned VK = SP.getVirtuality();
   if (VK) {
-    addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK);
+    addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK);
     DIEBlock *Block = getDIEBlock();
     addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
     addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex());
@@ -1052,31 +1163,30 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
   insertDIE(N, VariableDIE);
 
   // Add name.
-  addString(VariableDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string,
-                   GV.getDisplayName());
+  addString(VariableDIE, dwarf::DW_AT_name, GV.getDisplayName());
   StringRef LinkageName = GV.getLinkageName();
   bool isGlobalVariable = GV.getGlobal() != NULL;
   if (!LinkageName.empty() && isGlobalVariable)
-    addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, 
-                     dwarf::DW_FORM_string,
-                     getRealLinkageName(LinkageName));
+    addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name,
+              getRealLinkageName(LinkageName));
   // Add type.
   DIType GTy = GV.getType();
   addType(VariableDIE, GTy);
 
   // Add scoping info.
-  if (!GV.isLocalToUnit()) {
+  if (!GV.isLocalToUnit())
     addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
-    // Expose as global. 
-    addGlobal(GV.getName(), VariableDIE);
-  }
+
   // Add line number info.
   addSourceLine(VariableDIE, GV);
   // Add to context owner.
   DIDescriptor GVContext = GV.getContext();
   addToContextOwner(VariableDIE, GVContext);
   // Add location.
+  bool addToAccelTable = false;
+  DIE *VariableSpecDIE = NULL;
   if (isGlobalVariable) {
+    addToAccelTable = true;
     DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
     addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
     addLabel(Block, 0, dwarf::DW_FORM_udata,
@@ -1086,7 +1196,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
     if (GVContext && GV.isDefinition() && !GVContext.isCompileUnit() &&
         !GVContext.isFile() && !isSubprogramContext(GVContext)) {
       // Create specification DIE.
-      DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable);
+      VariableSpecDIE = new DIE(dwarf::DW_TAG_variable);
       addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification,
                   dwarf::DW_FORM_ref4, VariableDIE);
       addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block);
@@ -1095,11 +1205,12 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
       addDie(VariableSpecDIE);
     } else {
       addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
-    } 
+    }
   } else if (const ConstantInt *CI = 
              dyn_cast_or_null<ConstantInt>(GV.getConstant()))
     addConstantValue(VariableDIE, CI, GTy.isUnsignedDIType());
   else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) {
+    addToAccelTable = true;
     // GV is a merged global.
     DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
     Value *Ptr = CE->getOperand(0);
@@ -1114,6 +1225,16 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
     addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
   }
 
+  if (addToAccelTable) {
+    DIE *AddrDIE = VariableSpecDIE ? VariableSpecDIE : VariableDIE;
+    addAccelName(GV.getName(), AddrDIE);
+
+    // If the linkage name is different than the name, go ahead and output
+    // that as well into the name table.
+    if (GV.getLinkageName() != "" && GV.getName() != GV.getLinkageName())
+      addAccelName(GV.getLinkageName(), AddrDIE);
+  }
+
   return;
 }
 
@@ -1121,8 +1242,8 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
 void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){
   DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type);
   addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy);
-  int64_t L = SR.getLo();
-  int64_t H = SR.getHi();
+  uint64_t L = SR.getLo();
+  uint64_t H = SR.getHi();
 
   // The L value defines the lower bounds which is typically zero for C/C++. The
   // H value is the upper bounds.  Values are 64 bit.  H - L + 1 is the size
@@ -1135,8 +1256,8 @@ void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy)
     return;
   }
   if (L)
-    addSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L);
-  addSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H);
+    addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L);
+  addUInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H);
   Buffer.addChild(DW_Subrange);
 }
 
@@ -1175,7 +1296,7 @@ void CompileUnit::constructArrayTypeDIE(DIE &Buffer,
 DIE *CompileUnit::constructEnumTypeDIE(DIEnumerator ETy) {
   DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator);
   StringRef Name = ETy.getName();
-  addString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+  addString(Enumerator, dwarf::DW_AT_name, Name);
   int64_t Value = ETy.getEnumValue();
   addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value);
   return Enumerator;
@@ -1212,8 +1333,7 @@ DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) {
     addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin,
                             dwarf::DW_FORM_ref4, AbsDIE);
   else {
-    addString(VariableDie, dwarf::DW_AT_name, 
-                          dwarf::DW_FORM_string, Name);
+    addString(VariableDie, dwarf::DW_AT_name, Name);
     addSourceLine(VariableDie, DV->getVariable());
     addType(VariableDie, DV->getType());
   }
@@ -1308,7 +1428,7 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) {
   DIE *MemberDie = new DIE(DT.getTag());
   StringRef Name = DT.getName();
   if (!Name.empty())
-    addString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+    addString(MemberDie, dwarf::DW_AT_name, Name);
 
   addType(MemberDie, DT.getTypeDerivedFrom());
 
@@ -1366,32 +1486,35 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) {
     addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie);
 
   if (DT.isProtected())
-    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_protected);
   else if (DT.isPrivate())
-    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_private);
   // Otherwise C++ member and base classes are considered public.
   else 
-    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_public);
   if (DT.isVirtual())
-    addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag,
+    addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1,
             dwarf::DW_VIRTUALITY_virtual);
 
   // Objective-C properties.
+  if (MDNode *PNode = DT.getObjCProperty())
+    if (DIEEntry *PropertyDie = getDIEEntry(PNode))
+      MemberDie->addValue(dwarf::DW_AT_APPLE_property, dwarf::DW_FORM_ref4, 
+                          PropertyDie);
+
+  // This is only for backward compatibility.
   StringRef PropertyName = DT.getObjCPropertyName();
   if (!PropertyName.empty()) {
-    addString(MemberDie, dwarf::DW_AT_APPLE_property_name, dwarf::DW_FORM_string,
-              PropertyName);
+    addString(MemberDie, dwarf::DW_AT_APPLE_property_name, PropertyName);
     StringRef GetterName = DT.getObjCPropertyGetterName();
     if (!GetterName.empty())
-      addString(MemberDie, dwarf::DW_AT_APPLE_property_getter,
-                dwarf::DW_FORM_string, GetterName);
+      addString(MemberDie, dwarf::DW_AT_APPLE_property_getter, GetterName);
     StringRef SetterName = DT.getObjCPropertySetterName();
     if (!SetterName.empty())
-      addString(MemberDie, dwarf::DW_AT_APPLE_property_setter,
-                dwarf::DW_FORM_string, SetterName);
+      addString(MemberDie, dwarf::DW_AT_APPLE_property_setter, SetterName);
     unsigned PropertyAttributes = 0;
     if (DT.isReadOnlyObjCProperty())
       PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly;
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 785926579fa4..45e407e27ffa 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -29,13 +29,17 @@ class ConstantInt;
 class DbgVariable;
 
 //===----------------------------------------------------------------------===//
-/// CompileUnit - This dwarf writer support class manages information associate
+/// CompileUnit - This dwarf writer support class manages information associated
 /// with a source file.
 class CompileUnit {
   /// ID - File identifier for source.
   ///
   unsigned ID;
 
+  /// Language - The DW_AT_language of the compile unit
+  ///
+  unsigned Language;
+
   /// Die - Compile unit debug information entry.
   ///
   const OwningPtr<DIE> CUDie;
@@ -56,14 +60,17 @@ class CompileUnit {
   /// descriptors to debug information entries using a DIEEntry proxy.
   DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap;
 
-  /// Globals - A map of globally visible named entities for this unit.
-  ///
-  StringMap<DIE*> Globals;
-
   /// GlobalTypes - A map of globally visible types for this unit.
   ///
   StringMap<DIE*> GlobalTypes;
 
+  /// AccelNames - A map of names for the name accelerator table.
+  ///
+  StringMap<std::vector<DIE*> > AccelNames;
+  StringMap<std::vector<DIE*> > AccelObjC;
+  StringMap<std::vector<DIE*> > AccelNamespace;
+  StringMap<std::vector<std::pair<DIE*, unsigned> > > AccelTypes;
+
   /// DIEBlocks - A list of all the DIEBlocks in use.
   std::vector<DIEBlock *> DIEBlocks;
 
@@ -73,27 +80,56 @@ class CompileUnit {
   DenseMap<DIE *, const MDNode *> ContainingTypeMap;
 
 public:
-  CompileUnit(unsigned I, DIE *D, AsmPrinter *A, DwarfDebug *DW);
+  CompileUnit(unsigned I, unsigned L, DIE *D, AsmPrinter *A, DwarfDebug *DW);
   ~CompileUnit();
 
   // Accessors.
   unsigned getID()                  const { return ID; }
+  unsigned getLanguage()            const { return Language; }
   DIE* getCUDie()                   const { return CUDie.get(); }
-  const StringMap<DIE*> &getGlobals()     const { return Globals; }
   const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; }
 
+  const StringMap<std::vector<DIE*> > &getAccelNames() const {
+    return AccelNames;
+  }
+  const StringMap<std::vector<DIE*> > &getAccelObjC() const {
+    return AccelObjC;
+  }
+  const StringMap<std::vector<DIE*> > &getAccelNamespace() const {
+    return AccelNamespace;
+  }
+  const StringMap<std::vector<std::pair<DIE*, unsigned > > >
+  &getAccelTypes() const {
+    return AccelTypes;
+  }
+  
   /// hasContent - Return true if this compile unit has something to write out.
   ///
   bool hasContent() const { return !CUDie->getChildren().empty(); }
 
-  /// addGlobal - Add a new global entity to the compile unit.
-  ///
-  void addGlobal(StringRef Name, DIE *Die) { Globals[Name] = Die; }
-
   /// addGlobalType - Add a new global type to the compile unit.
   ///
   void addGlobalType(DIType Ty);
 
+
+  /// addAccelName - Add a new name to the name accelerator table.
+  void addAccelName(StringRef Name, DIE *Die) {
+    std::vector<DIE*> &DIEs = AccelNames[Name];
+    DIEs.push_back(Die);
+  }
+  void addAccelObjC(StringRef Name, DIE *Die) {
+    std::vector<DIE*> &DIEs = AccelObjC[Name];
+    DIEs.push_back(Die);
+  }
+  void addAccelNamespace(StringRef Name, DIE *Die) {
+    std::vector<DIE*> &DIEs = AccelNamespace[Name];
+    DIEs.push_back(Die);
+  }
+  void addAccelType(StringRef Name, std::pair<DIE *, unsigned> Die) {
+    std::vector<std::pair<DIE*, unsigned > > &DIEs = AccelTypes[Name];
+    DIEs.push_back(Die);
+  }
+  
   /// getDIE - Returns the debug information entry map slot for the
   /// specified debug variable.
   DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); }
@@ -150,8 +186,7 @@ public:
 
   /// addString - Add a string attribute data and value.
   ///
-  void addString(DIE *Die, unsigned Attribute, unsigned Form,
-                 const StringRef Str);
+  void addString(DIE *Die, unsigned Attribute, const StringRef Str);
 
   /// addLabel - Add a Dwarf label attribute data and value.
   ///
@@ -178,6 +213,7 @@ public:
   void addSourceLine(DIE *Die, DISubprogram SP);
   void addSourceLine(DIE *Die, DIType Ty);
   void addSourceLine(DIE *Die, DINameSpace NS);
+  void addSourceLine(DIE *Die, DIObjCProperty Ty);
 
   /// addAddress - Add an address attribute to a die based on the location
   /// provided.
@@ -225,8 +261,10 @@ public:
   /// addToContextOwner - Add Die into the list of its context owner's children.
   void addToContextOwner(DIE *Die, DIDescriptor Context);
 
-  /// addType - Add a new type attribute to the specified entity.
-  void addType(DIE *Entity, DIType Ty);
+  /// addType - Add a new type attribute to the specified entity. This takes
+  /// and attribute parameter because DW_AT_friend attributes are also
+  /// type references.
+  void addType(DIE *Entity, DIType Ty, unsigned Attribute = dwarf::DW_AT_type);
 
   /// getOrCreateNameSpace - Create a DIE for DINameSpace.
   DIE *getOrCreateNameSpace(DINameSpace NS);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 1b7e370fca09..cb7887890cda 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -14,10 +14,12 @@
 #define DEBUG_TYPE "dwarfdebug"
 #include "DwarfDebug.h"
 #include "DIE.h"
+#include "DwarfAccelTable.h"
 #include "DwarfCompileUnit.h"
 #include "llvm/Constants.h"
 #include "llvm/Module.h"
 #include "llvm/Instructions.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/MC/MCAsmInfo.h"
@@ -52,6 +54,10 @@ static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden,
      cl::desc("Make an absence of debug location information explicit."),
      cl::init(false));
 
+static cl::opt<bool> DwarfAccelTables("dwarf-accel-tables", cl::Hidden,
+     cl::desc("Output prototype dwarf accelerator tables."),
+     cl::init(false));
+
 namespace {
   const char *DWARFGroupName = "DWARF Emission";
   const char *DbgTimerName = "DWARF Debug Writer";
@@ -128,6 +134,11 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
   DwarfStrSectionSym = TextSectionSym = 0;
   DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0;
   FunctionBeginSym = FunctionEndSym = 0;
+
+  // Turn on accelerator tables for Darwin.
+  if (Triple(M->getTargetTriple()).isOSDarwin())
+    DwarfAccelTables = true;
+  
   {
     NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
     beginModule(M);
@@ -136,6 +147,22 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
 DwarfDebug::~DwarfDebug() {
 }
 
+/// EmitSectionSym - Switch to the specified MCSection and emit an assembler
+/// temporary label to it if SymbolStem is specified.
+static MCSymbol *EmitSectionSym(AsmPrinter *Asm, const MCSection *Section,
+                                const char *SymbolStem = 0) {
+  Asm->OutStreamer.SwitchSection(Section);
+  if (!SymbolStem) return 0;
+
+  MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem);
+  Asm->OutStreamer.EmitLabel(TmpSym);
+  return TmpSym;
+}
+
+MCSymbol *DwarfDebug::getStringPool() {
+  return Asm->GetTempSymbol("section_str");
+}
+
 MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) {
   std::pair<MCSymbol*, unsigned> &Entry = StringPool[Str];
   if (Entry.first) return Entry.first;
@@ -144,7 +171,6 @@ MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) {
   return Entry.first = Asm->GetTempSymbol("string", Entry.second);
 }
 
-
 /// assignAbbrevNumber - Define a unique number for the abbreviation.
 ///
 void DwarfDebug::assignAbbrevNumber(DIEAbbrev &Abbrev) {
@@ -178,6 +204,63 @@ static StringRef getRealLinkageName(StringRef LinkageName) {
   return LinkageName;
 }
 
+static bool isObjCClass(StringRef Name) {
+  return Name.startswith("+") || Name.startswith("-");
+}
+
+static bool hasObjCCategory(StringRef Name) {
+  if (!isObjCClass(Name)) return false;
+
+  size_t pos = Name.find(')');
+  if (pos != std::string::npos) {
+    if (Name[pos+1] != ' ') return false;
+    return true;
+  }
+  return false;
+}
+
+static void getObjCClassCategory(StringRef In, StringRef &Class,
+                                 StringRef &Category) {
+  if (!hasObjCCategory(In)) {
+    Class = In.slice(In.find('[') + 1, In.find(' '));
+    Category = "";
+    return;
+  }
+
+  Class = In.slice(In.find('[') + 1, In.find('('));
+  Category = In.slice(In.find('[') + 1, In.find(' '));
+  return;
+}
+
+static StringRef getObjCMethodName(StringRef In) {
+  return In.slice(In.find(' ') + 1, In.find(']'));
+}
+
+// Add the various names to the Dwarf accelerator table names.
+static void addSubprogramNames(CompileUnit *TheCU, DISubprogram SP,
+                               DIE* Die) {
+  if (!SP.isDefinition()) return;
+  
+  TheCU->addAccelName(SP.getName(), Die);
+
+  // If the linkage name is different than the name, go ahead and output
+  // that as well into the name table.
+  if (SP.getLinkageName() != "" && SP.getName() != SP.getLinkageName())
+    TheCU->addAccelName(SP.getLinkageName(), Die);
+
+  // If this is an Objective-C selector name add it to the ObjC accelerator
+  // too.
+  if (isObjCClass(SP.getName())) {
+    StringRef Class, Category;
+    getObjCClassCategory(SP.getName(), Class, Category);
+    TheCU->addAccelObjC(Class, Die);
+    if (Category != "")
+      TheCU->addAccelObjC(Category, Die);
+    // Also add the base method name to the name table.
+    TheCU->addAccelName(getObjCMethodName(SP.getName()), Die);
+  }
+}
+
 /// updateSubprogramScopeDIE - Find DIE for the given subprogram and
 /// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
 /// If there are global variables in this scope then create and insert
@@ -190,11 +273,7 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
   DISubprogram SP(SPNode);
 
   DISubprogram SPDecl = SP.getFunctionDeclaration();
-  if (SPDecl.isSubprogram())
-    // Refer function declaration directly.
-    SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
-                      SPCU->getOrCreateSubprogramDIE(SPDecl));
-  else {
+  if (!SPDecl.isSubprogram()) {
     // There is not any need to generate specification DIE for a function
     // defined at compile unit level. If a function is defined inside another
     // function then gdb prefers the definition at top level and but does not
@@ -203,7 +282,7 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
     if (SP.isDefinition() && !SP.getContext().isCompileUnit() &&
         !SP.getContext().isFile() &&
         !isSubprogramContext(SP.getContext())) {
-      SPCU-> addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+      SPCU->addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
       
       // Add arguments.
       DICompositeType SPTy = SP.getType();
@@ -241,6 +320,10 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
   MachineLocation Location(RI->getFrameRegister(*Asm->MF));
   SPCU->addAddress(SPDie, dwarf::DW_AT_frame_base, Location);
 
+  // Add name to the name table, we do this here because we're guaranteed
+  // to have concrete versions of our DW_TAG_subprogram nodes.
+  addSubprogramNames(SPCU, SP, SPDie);
+  
   return SPDie;
 }
 
@@ -248,7 +331,6 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
 /// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels.
 DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, 
                                           LexicalScope *Scope) {
-
   DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block);
   if (Scope->isAbstractScope())
     return ScopeDIE;
@@ -294,10 +376,9 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU,
 /// of the function.
 DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
                                           LexicalScope *Scope) {
-
   const SmallVector<InsnRange, 4> &Ranges = Scope->getRanges();
-  assert (Ranges.empty() == false
-          && "LexicalScope does not have instruction markers!");
+  assert(Ranges.empty() == false &&
+         "LexicalScope does not have instruction markers!");
 
   if (!Scope->getScopeNode())
     return NULL;
@@ -314,8 +395,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
   const MCSymbol *EndLabel = getLabelAfterInsn(RI->second);
 
   if (StartLabel == 0 || EndLabel == 0) {
-    assert (0 && "Unexpected Start and End labels for a inlined scope!");
-    return 0;
+    llvm_unreachable("Unexpected Start and End labels for a inlined scope!");
   }
   assert(StartLabel->isDefined() &&
          "Invalid starting label for an inlined scope!");
@@ -358,16 +438,20 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
     I = InlineInfo.find(InlinedSP);
 
   if (I == InlineInfo.end()) {
-    InlineInfo[InlinedSP].push_back(std::make_pair(StartLabel,
-                                                             ScopeDIE));
+    InlineInfo[InlinedSP].push_back(std::make_pair(StartLabel, ScopeDIE));
     InlinedSPNodes.push_back(InlinedSP);
   } else
     I->second.push_back(std::make_pair(StartLabel, ScopeDIE));
 
   DILocation DL(Scope->getInlinedAt());
-  TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, TheCU->getID());
+  TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0,
+                 GetOrCreateSourceID(DL.getFilename(), DL.getDirectory()));
   TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber());
 
+  // Add name to the name table, we do this here because we're guaranteed
+  // to have concrete versions of our DW_TAG_inlined_subprogram nodes.
+  addSubprogramNames(TheCU, InlinedSP, ScopeDIE);
+  
   return ScopeDIE;
 }
 
@@ -376,7 +460,7 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
   if (!Scope || !Scope->getScopeNode())
     return NULL;
 
-  SmallVector <DIE *, 8> Children;
+  SmallVector<DIE *, 8> Children;
 
   // Collect arguments for current function.
   if (LScopes.isCurrentFunctionScope(Scope))
@@ -426,39 +510,39 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
     ScopeDIE->addChild(*I);
 
   if (DS.isSubprogram())
-   TheCU->addPubTypes(DISubprogram(DS));
+    TheCU->addPubTypes(DISubprogram(DS));
 
- return ScopeDIE;
+  return ScopeDIE;
 }
 
 /// GetOrCreateSourceID - Look up the source id with the given directory and
 /// source file names. If none currently exists, create a new id and insert it
 /// in the SourceIds map. This can update DirectoryNames and SourceFileNames
 /// maps as well.
-
 unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName, 
                                          StringRef DirName) {
   // If FE did not provide a file name, then assume stdin.
   if (FileName.empty())
     return GetOrCreateSourceID("<stdin>", StringRef());
 
-  // MCStream expects full path name as filename.
-  if (!DirName.empty() && !sys::path::is_absolute(FileName)) {
-    SmallString<128> FullPathName = DirName;
-    sys::path::append(FullPathName, FileName);
-    // Here FullPathName will be copied into StringMap by GetOrCreateSourceID.
-    return GetOrCreateSourceID(StringRef(FullPathName), StringRef());
-  }
+  // TODO: this might not belong here. See if we can factor this better.
+  if (DirName == CompilationDir)
+    DirName = "";
 
-  StringMapEntry<unsigned> &Entry = SourceIdMap.GetOrCreateValue(FileName);
-  if (Entry.getValue())
-    return Entry.getValue();
+  unsigned SrcId = SourceIdMap.size()+1;
 
-  unsigned SrcId = SourceIdMap.size();
-  Entry.setValue(SrcId);
+  // We look up the file/dir pair by concatenating them with a zero byte.
+  SmallString<128> NamePair;
+  NamePair += DirName;
+  NamePair += '\0'; // Zero bytes are not allowed in paths.
+  NamePair += FileName;
+
+  StringMapEntry<unsigned> &Ent = SourceIdMap.GetOrCreateValue(NamePair, SrcId);
+  if (Ent.getValue() != SrcId)
+    return Ent.getValue();
 
   // Print out a .file directive to specify files for .loc directives.
-  Asm->OutStreamer.EmitDwarfFileDirective(SrcId, Entry.getKey());
+  Asm->OutStreamer.EmitDwarfFileDirective(SrcId, DirName, FileName);
 
   return SrcId;
 }
@@ -468,39 +552,36 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName,
 CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
   DICompileUnit DIUnit(N);
   StringRef FN = DIUnit.getFilename();
-  StringRef Dir = DIUnit.getDirectory();
-  unsigned ID = GetOrCreateSourceID(FN, Dir);
+  CompilationDir = DIUnit.getDirectory();
+  unsigned ID = GetOrCreateSourceID(FN, CompilationDir);
 
   DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
-  CompileUnit *NewCU = new CompileUnit(ID, Die, Asm, this);
-  NewCU->addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string,
-                   DIUnit.getProducer());
+  CompileUnit *NewCU = new CompileUnit(ID, DIUnit.getLanguage(), Die, Asm, this);
+  NewCU->addString(Die, dwarf::DW_AT_producer, DIUnit.getProducer());
   NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
                  DIUnit.getLanguage());
-  NewCU->addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN);
-  // Use DW_AT_entry_pc instead of DW_AT_low_pc/DW_AT_high_pc pair. This
-  // simplifies debug range entries.
-  NewCU->addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_addr, 0);
+  NewCU->addString(Die, dwarf::DW_AT_name, FN);
+  // 2.17.1 requires that we use DW_AT_low_pc for a single entry point
+  // into an entity.
+  NewCU->addUInt(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0);
   // DW_AT_stmt_list is a offset of line number information for this
   // compile unit in debug_line section.
-  if(Asm->MAI->doesDwarfRequireRelocationForSectionOffset())
+  if (Asm->MAI->doesDwarfRequireRelocationForSectionOffset())
     NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
                     Asm->GetTempSymbol("section_line"));
   else
     NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
 
-  if (!Dir.empty())
-    NewCU->addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir);
+  if (!CompilationDir.empty())
+    NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
   if (DIUnit.isOptimized())
     NewCU->addUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
 
   StringRef Flags = DIUnit.getFlags();
   if (!Flags.empty())
-    NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, 
-                     Flags);
+    NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, Flags);
   
-  unsigned RVer = DIUnit.getRunTimeVersion();
-  if (RVer)
+  if (unsigned RVer = DIUnit.getRunTimeVersion())
     NewCU->addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
             dwarf::DW_FORM_data1, RVer);
 
@@ -513,6 +594,11 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
 /// construct SubprogramDIE - Construct subprogram DIE.
 void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, 
                                         const MDNode *N) {
+  CompileUnit *&CURef = SPMap[N];
+  if (CURef)
+    return;
+  CURef = TheCU;
+
   DISubprogram SP(N);
   if (!SP.isDefinition())
     // This is a method declaration which will be handled while constructing
@@ -527,10 +613,6 @@ void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU,
   // Add to context owner.
   TheCU->addToContextOwner(SubprogramDie, SP.getContext());
 
-  // Expose as global.
-  TheCU->addGlobal(SP.getName(), SubprogramDie);
-
-  SPMap[N] = TheCU;
   return;
 }
 
@@ -676,7 +758,7 @@ void DwarfDebug::endModule() {
         
         // Construct subprogram DIE and add variables DIEs.
         CompileUnit *SPCU = CUMap.lookup(TheCU);
-        assert (SPCU && "Unable to find Compile Unit!");
+        assert(SPCU && "Unable to find Compile Unit!");
         constructSubprogramDIE(SPCU, SP);
         DIE *ScopeDIE = SPCU->getDIE(SP);
         for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) {
@@ -697,6 +779,13 @@ void DwarfDebug::endModule() {
     DIE *ISP = *AI;
     FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
   }
+  for (DenseMap<const MDNode *, DIE *>::iterator AI = AbstractSPDies.begin(),
+         AE = AbstractSPDies.end(); AI != AE; ++AI) {
+    DIE *ISP = AI->second;
+    if (InlinedSubprogramDIEs.count(ISP))
+      continue;
+    FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
+  }
 
   // Emit DW_AT_containing_type attribute to connect types with their
   // vtable holding type.
@@ -727,9 +816,14 @@ void DwarfDebug::endModule() {
   // Corresponding abbreviations into a abbrev section.
   emitAbbreviations();
 
-  // Emit info into a debug pubnames section.
-  emitDebugPubNames();
-
+  // Emit info into a dwarf accelerator table sections.
+  if (DwarfAccelTables) {
+    emitAccelNames();
+    emitAccelObjC();
+    emitAccelNamespaces();
+    emitAccelTypes();
+  }
+  
   // Emit info into a debug pubtypes section.
   emitDebugPubTypes();
 
@@ -837,7 +931,7 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction *MF,
 /// isDbgValueInDefinedReg - Return true if debug value, encoded by
 /// DBG_VALUE instruction, is in a defined reg.
 static bool isDbgValueInDefinedReg(const MachineInstr *MI) {
-  assert (MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!");
+  assert(MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!");
   return MI->getNumOperands() == 3 &&
          MI->getOperand(0).isReg() && MI->getOperand(0).getReg() &&
          MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0;
@@ -867,8 +961,7 @@ static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm,
   if (MI->getOperand(0).isCImm())
     return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getCImm());
 
-  assert (0 && "Unexpected 3 operand DBG_VALUE instruction!");
-  return DotDebugLocEntry();
+  llvm_unreachable("Unexpected 3 operand DBG_VALUE instruction!");
 }
 
 /// collectVariableInfo - Find variables for each lexical scope.
@@ -964,7 +1057,8 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
       }
 
       // The value is valid until the next DBG_VALUE or clobber.
-      DotDebugLocEntries.push_back(getDebugLocEntry(Asm, FLabel, SLabel, Begin));
+      DotDebugLocEntries.push_back(getDebugLocEntry(Asm, FLabel, SLabel,
+                                                    Begin));
     }
     DotDebugLocEntries.push_back(DotDebugLocEntry());
   }
@@ -999,12 +1093,15 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
   if (!MI->isDebugValue()) {
     DebugLoc DL = MI->getDebugLoc();
     if (DL != PrevInstLoc && (!DL.isUnknown() || UnknownLocations)) {
-      unsigned Flags = DWARF2_FLAG_IS_STMT;
+      unsigned Flags = 0;
       PrevInstLoc = DL;
       if (DL == PrologEndLoc) {
         Flags |= DWARF2_FLAG_PROLOGUE_END;
         PrologEndLoc = DebugLoc();
       }
+      if (PrologEndLoc.isUnknown())
+        Flags |= DWARF2_FLAG_IS_STMT;
+
       if (!DL.isUnknown()) {
         const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext());
         recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags);
@@ -1099,12 +1196,19 @@ static MDNode *getScopeNode(DebugLoc DL, const LLVMContext &Ctx) {
 }
 
 /// getFnDebugLoc - Walk up the scope chain of given debug loc and find
-/// line number  info for the function.
+/// line number info for the function.
 static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) {
   const MDNode *Scope = getScopeNode(DL, Ctx);
   DISubprogram SP = getDISubprogram(Scope);
-  if (SP.Verify()) 
-    return DebugLoc::get(SP.getLineNumber(), 0, SP);
+  if (SP.Verify()) {
+    // Check for number of operands since the compatibility is
+    // cheap here.
+    if (SP->getNumOperands() > 19)
+      return DebugLoc::get(SP.getScopeLineNumber(), 0, SP);
+    else
+      return DebugLoc::get(SP.getLineNumber(), 0, SP);
+  }
+
   return DebugLoc();
 }
 
@@ -1135,7 +1239,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
       const MachineInstr *MI = II;
 
       if (MI->isDebugValue()) {
-        assert (MI->getNumOperands() > 1 && "Invalid machine instruction!");
+        assert(MI->getNumOperands() > 1 && "Invalid machine instruction!");
 
         // Keep track of user variables.
         const MDNode *Var =
@@ -1206,7 +1310,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
                MOE = MI->operands_end(); MOI != MOE; ++MOI) {
           if (!MOI->isReg() || !MOI->isDef() || !MOI->getReg())
             continue;
-          for (const unsigned *AI = TRI->getOverlaps(MOI->getReg());
+          for (const uint16_t *AI = TRI->getOverlaps(MOI->getReg());
                unsigned Reg = *AI; ++AI) {
             const MDNode *Var = LiveUserVar[Reg];
             if (!Var)
@@ -1277,7 +1381,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
                                        MF->getFunction()->getContext());
     recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(),
                      FnStartDL.getScope(MF->getFunction()->getContext()),
-                     DWARF2_FLAG_IS_STMT);
+                     0);
   }
 }
 
@@ -1303,7 +1407,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
   
   LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
   CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode());
-  assert (TheCU && "Unable to find compile unit!");
+  assert(TheCU && "Unable to find compile unit!");
 
   // Construct abstract scopes.
   ArrayRef<LexicalScope *> AList = LScopes.getAbstractScopesList();
@@ -1327,7 +1431,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
   
   DIE *CurFnDIE = constructScopeDIE(TheCU, FnScope);
   
-  if (!DisableFramePointerElim(*MF))
+  if (!MF->getTarget().Options.DisableFramePointerElim(*MF))
     TheCU->addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr,
                    dwarf::DW_FORM_flag, 1);
 
@@ -1380,7 +1484,7 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
       Fn = DB.getFilename();
       Dir = DB.getDirectory();
     } else
-      assert(0 && "Unexpected scope info");
+      llvm_unreachable("Unexpected scope info");
 
     Src = GetOrCreateSourceID(Fn, Dir);
   }
@@ -1398,10 +1502,6 @@ DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) {
   // Get the children.
   const std::vector<DIE *> &Children = Die->getChildren();
 
-  // If not last sibling and has children then add sibling offset attribute.
-  if (!Last && !Children.empty())
-    Die->addSiblingOffset(DIEValueAllocator);
-
   // Record the abbreviation.
   assignAbbrevNumber(Die->getAbbrev());
 
@@ -1454,18 +1554,6 @@ void DwarfDebug::computeSizeAndOffsets() {
   }
 }
 
-/// EmitSectionSym - Switch to the specified MCSection and emit an assembler
-/// temporary label to it if SymbolStem is specified.
-static MCSymbol *EmitSectionSym(AsmPrinter *Asm, const MCSection *Section,
-                                const char *SymbolStem = 0) {
-  Asm->OutStreamer.SwitchSection(Section);
-  if (!SymbolStem) return 0;
-
-  MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem);
-  Asm->OutStreamer.EmitLabel(TmpSym);
-  return TmpSym;
-}
-
 /// EmitSectionLabels - Emit initial Dwarf sections with a label at
 /// the start of each one.
 void DwarfDebug::EmitSectionLabels() {
@@ -1483,7 +1571,6 @@ void DwarfDebug::EmitSectionLabels() {
 
   EmitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line");
   EmitSectionSym(Asm, TLOF.getDwarfLocSection());
-  EmitSectionSym(Asm, TLOF.getDwarfPubNamesSection());
   EmitSectionSym(Asm, TLOF.getDwarfPubTypesSection());
   DwarfStrSectionSym =
     EmitSectionSym(Asm, TLOF.getDwarfStrSection(), "section_str");
@@ -1525,9 +1612,6 @@ void DwarfDebug::emitDIE(DIE *Die) {
       Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr));
 
     switch (Attr) {
-    case dwarf::DW_AT_sibling:
-      Asm->EmitInt32(Die->getSiblingOffset());
-      break;
     case dwarf::DW_AT_abstract_origin: {
       DIEEntry *E = cast<DIEEntry>(Values[i]);
       DIE *Origin = E->getEntry();
@@ -1539,7 +1623,7 @@ void DwarfDebug::emitDIE(DIE *Die) {
       // DW_AT_range Value encodes offset in debug_range section.
       DIEInteger *V = cast<DIEInteger>(Values[i]);
 
-      if (Asm->MAI->doesDwarfUsesLabelOffsetForRanges()) {
+      if (Asm->MAI->doesDwarfUseLabelOffsetForRanges()) {
         Asm->EmitLabelPlusOffset(DwarfDebugRangeSectionSym,
                                  V->getValue(),
                                  4);
@@ -1678,62 +1762,133 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
   Asm->EmitInt8(1);
 }
 
-/// emitDebugPubNames - Emit visible names into a debug pubnames section.
-///
-void DwarfDebug::emitDebugPubNames() {
+/// emitAccelNames - Emit visible names into a hashed accelerator table
+/// section.
+void DwarfDebug::emitAccelNames() {
+  DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
+                                           dwarf::DW_FORM_data4));
   for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
          E = CUMap.end(); I != E; ++I) {
     CompileUnit *TheCU = I->second;
-    // Start the dwarf pubnames section.
-    Asm->OutStreamer.SwitchSection(
-      Asm->getObjFileLowering().getDwarfPubNamesSection());
+    const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelNames();
+    for (StringMap<std::vector<DIE*> >::const_iterator
+           GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) {
+      const char *Name = GI->getKeyData();
+      const std::vector<DIE *> &Entities = GI->second;
+      for (std::vector<DIE *>::const_iterator DI = Entities.begin(),
+             DE = Entities.end(); DI != DE; ++DI)
+        AT.AddName(Name, (*DI));
+    }
+  }
 
-    Asm->OutStreamer.AddComment("Length of Public Names Info");
-    Asm->EmitLabelDifference(
-      Asm->GetTempSymbol("pubnames_end", TheCU->getID()),
-      Asm->GetTempSymbol("pubnames_begin", TheCU->getID()), 4);
+  AT.FinalizeTable(Asm, "Names");
+  Asm->OutStreamer.SwitchSection(
+    Asm->getObjFileLowering().getDwarfAccelNamesSection());
+  MCSymbol *SectionBegin = Asm->GetTempSymbol("names_begin");
+  Asm->OutStreamer.EmitLabel(SectionBegin);
 
-    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin",
-                                                  TheCU->getID()));
+  // Emit the full data.
+  AT.Emit(Asm, SectionBegin, this);
+}
 
-    Asm->OutStreamer.AddComment("DWARF Version");
-    Asm->EmitInt16(dwarf::DWARF_VERSION);
+/// emitAccelObjC - Emit objective C classes and categories into a hashed
+/// accelerator table section.
+void DwarfDebug::emitAccelObjC() {
+  DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
+                                           dwarf::DW_FORM_data4));
+  for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+         E = CUMap.end(); I != E; ++I) {
+    CompileUnit *TheCU = I->second;
+    const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelObjC();
+    for (StringMap<std::vector<DIE*> >::const_iterator
+           GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) {
+      const char *Name = GI->getKeyData();
+      const std::vector<DIE *> &Entities = GI->second;
+      for (std::vector<DIE *>::const_iterator DI = Entities.begin(),
+             DE = Entities.end(); DI != DE; ++DI)
+        AT.AddName(Name, (*DI));
+    }
+  }
 
-    Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
-    Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()),
-                           DwarfInfoSectionSym);
+  AT.FinalizeTable(Asm, "ObjC");
+  Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering()
+                                 .getDwarfAccelObjCSection());
+  MCSymbol *SectionBegin = Asm->GetTempSymbol("objc_begin");
+  Asm->OutStreamer.EmitLabel(SectionBegin);
 
-    Asm->OutStreamer.AddComment("Compilation Unit Length");
-    Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()),
-                             Asm->GetTempSymbol("info_begin", TheCU->getID()),
-                             4);
+  // Emit the full data.
+  AT.Emit(Asm, SectionBegin, this);
+}
 
-    const StringMap<DIE*> &Globals = TheCU->getGlobals();
-    for (StringMap<DIE*>::const_iterator
-           GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
+/// emitAccelNamespace - Emit namespace dies into a hashed accelerator
+/// table.
+void DwarfDebug::emitAccelNamespaces() {
+  DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
+                                           dwarf::DW_FORM_data4));
+  for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+         E = CUMap.end(); I != E; ++I) {
+    CompileUnit *TheCU = I->second;
+    const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelNamespace();
+    for (StringMap<std::vector<DIE*> >::const_iterator
+           GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) {
       const char *Name = GI->getKeyData();
-      DIE *Entity = GI->second;
+      const std::vector<DIE *> &Entities = GI->second;
+      for (std::vector<DIE *>::const_iterator DI = Entities.begin(),
+             DE = Entities.end(); DI != DE; ++DI)
+        AT.AddName(Name, (*DI));
+    }
+  }
 
-      Asm->OutStreamer.AddComment("DIE offset");
-      Asm->EmitInt32(Entity->getOffset());
+  AT.FinalizeTable(Asm, "namespac");
+  Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering()
+                                 .getDwarfAccelNamespaceSection());
+  MCSymbol *SectionBegin = Asm->GetTempSymbol("namespac_begin");
+  Asm->OutStreamer.EmitLabel(SectionBegin);
 
-      if (Asm->isVerbose())
-        Asm->OutStreamer.AddComment("External Name");
-      Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0);
-    }
+  // Emit the full data.
+  AT.Emit(Asm, SectionBegin, this);
+}
 
-    Asm->OutStreamer.AddComment("End Mark");
-    Asm->EmitInt32(0);
-    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end",
-                                                  TheCU->getID()));
+/// emitAccelTypes() - Emit type dies into a hashed accelerator table.
+void DwarfDebug::emitAccelTypes() {
+  std::vector<DwarfAccelTable::Atom> Atoms;
+  Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
+                                        dwarf::DW_FORM_data4));
+  Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeTag,
+                                        dwarf::DW_FORM_data2));
+  Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeTypeFlags,
+                                        dwarf::DW_FORM_data1));
+  DwarfAccelTable AT(Atoms);
+  for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+         E = CUMap.end(); I != E; ++I) {
+    CompileUnit *TheCU = I->second;
+    const StringMap<std::vector<std::pair<DIE*, unsigned > > > &Names
+      = TheCU->getAccelTypes();
+    for (StringMap<std::vector<std::pair<DIE*, unsigned> > >::const_iterator
+           GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) {
+      const char *Name = GI->getKeyData();
+      const std::vector<std::pair<DIE *, unsigned> > &Entities = GI->second;
+      for (std::vector<std::pair<DIE *, unsigned> >::const_iterator DI
+             = Entities.begin(), DE = Entities.end(); DI !=DE; ++DI)
+        AT.AddName(Name, (*DI).first, (*DI).second);
+    }
   }
+
+  AT.FinalizeTable(Asm, "types");
+  Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering()
+                                 .getDwarfAccelTypesSection());
+  MCSymbol *SectionBegin = Asm->GetTempSymbol("types_begin");
+  Asm->OutStreamer.EmitLabel(SectionBegin);
+
+  // Emit the full data.
+  AT.Emit(Asm, SectionBegin, this);
 }
 
 void DwarfDebug::emitDebugPubTypes() {
   for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
          E = CUMap.end(); I != E; ++I) {
     CompileUnit *TheCU = I->second;
-    // Start the dwarf pubnames section.
+    // Start the dwarf pubtypes section.
     Asm->OutStreamer.SwitchSection(
       Asm->getObjFileLowering().getDwarfPubTypesSection());
     Asm->OutStreamer.AddComment("Length of Public Types Info");
@@ -1766,6 +1921,7 @@ void DwarfDebug::emitDebugPubTypes() {
       Asm->EmitInt32(Entity->getOffset());
 
       if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name");
+      // Emit the name with a terminating null byte.
       Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1), 0);
     }
 
@@ -1801,8 +1957,10 @@ void DwarfDebug::emitDebugStr() {
     // Emit a label for reference from debug information entries.
     Asm->OutStreamer.EmitLabel(Entries[i].second->getValue().first);
 
-    // Emit the string itself.
-    Asm->OutStreamer.EmitBytes(Entries[i].second->getKey(), 0/*addrspace*/);
+    // Emit the string itself with a terminating null byte.
+    Asm->OutStreamer.EmitBytes(StringRef(Entries[i].second->getKeyData(),
+                                         Entries[i].second->getKeyLength()+1),
+                               0/*addrspace*/);
   }
 }
 
@@ -1958,7 +2116,7 @@ void DwarfDebug::emitDebugMacInfo() {
 /// __debug_info section, and the low_pc is the starting address for the
 /// inlining instance.
 void DwarfDebug::emitDebugInlineInfo() {
-  if (!Asm->MAI->doesDwarfUsesInlineInfoSection())
+  if (!Asm->MAI->doesDwarfUseInlineInfoSection())
     return;
 
   if (!FirstCU)
@@ -1990,10 +2148,9 @@ void DwarfDebug::emitDebugInlineInfo() {
     StringRef Name = SP.getName();
 
     Asm->OutStreamer.AddComment("MIPS linkage name");
-    if (LName.empty()) {
-      Asm->OutStreamer.EmitBytes(Name, 0);
-      Asm->OutStreamer.EmitIntValue(0, 1, 0); // nul terminator.
-    } else
+    if (LName.empty())
+      Asm->EmitSectionOffset(getStringPoolEntry(Name), DwarfStrSectionSym);
+    else
       Asm->EmitSectionOffset(getStringPoolEntry(getRealLinkageName(LName)),
                              DwarfStrSectionSym);
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 35653be5c897..83f30f5b446f 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -30,7 +30,8 @@
 namespace llvm {
 
 class CompileUnit;
-class DbgConcreteScope;
+class ConstantInt;
+class ConstantFP;
 class DbgVariable;
 class MachineFrameInfo;
 class MachineModuleInfo;
@@ -207,8 +208,8 @@ class DwarfDebug {
   ///
   std::vector<DIEAbbrev *> Abbreviations;
 
-  /// SourceIdMap - Source id map, i.e. pair of directory id and source file
-  /// id mapped to a unique id.
+  /// SourceIdMap - Source id map, i.e. pair of source filename and directory,
+  /// separated by a zero byte, mapped to a unique id.
   StringMap<unsigned> SourceIdMap;
 
   /// StringPool - A String->Symbol mapping of strings used by indirect
@@ -216,8 +217,6 @@ class DwarfDebug {
   StringMap<std::pair<MCSymbol*, unsigned> > StringPool;
   unsigned NextStringPoolNumber;
   
-  MCSymbol *getStringPoolEntry(StringRef Str);
-
   /// SectionMap - Provides a unique id per text section.
   ///
   UniqueVector<const MCSection*> SectionMap;
@@ -239,12 +238,12 @@ class DwarfDebug {
   /// DotDebugLocEntries - Collection of DotDebugLocEntry.
   SmallVector<DotDebugLocEntry, 4> DotDebugLocEntries;
 
-  /// InliendSubprogramDIEs - Collection of subprgram DIEs that are marked
+  /// InlinedSubprogramDIEs - Collection of subprogram DIEs that are marked
   /// (at the end of the module) as DW_AT_inline.
   SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs;
 
   /// InlineInfo - Keep track of inlined functions and their location.  This
-  /// information is used to populate debug_inlined section.
+  /// information is used to populate the debug_inlined section.
   typedef std::pair<const MCSymbol *, DIE *> InlineInfoLabels;
   DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> > InlineInfo;
   SmallVector<const MDNode *, 4> InlinedSPNodes;
@@ -304,6 +303,10 @@ class DwarfDebug {
   MCSymbol *DwarfDebugLocSectionSym;
   MCSymbol *FunctionBeginSym, *FunctionEndSym;
 
+  // As an optimization, there is no need to emit an entry in the directory
+  // table for the same directory as DW_at_comp_dir.
+  StringRef CompilationDir;
+
 private:
 
   /// assignAbbrevNumber - Define a unique number for the abbreviation.
@@ -340,7 +343,7 @@ private:
   /// the start of each one.
   void EmitSectionLabels();
 
-  /// emitDIE - Recusively Emits a debug information entry.
+  /// emitDIE - Recursively Emits a debug information entry.
   ///
   void emitDIE(DIE *Die);
 
@@ -365,10 +368,22 @@ private:
   ///
   void emitEndOfLineMatrix(unsigned SectionEnd);
 
-  /// emitDebugPubNames - Emit visible names into a debug pubnames section.
-  ///
-  void emitDebugPubNames();
+  /// emitAccelNames - Emit visible names into a hashed accelerator table
+  /// section.
+  void emitAccelNames();
+  
+  /// emitAccelObjC - Emit objective C classes and categories into a hashed
+  /// accelerator table section.
+  void emitAccelObjC();
+
+  /// emitAccelNamespace - Emit namespace dies into a hashed accelerator
+  /// table.
+  void emitAccelNamespaces();
 
+  /// emitAccelTypes() - Emit type dies into a hashed accelerator table.
+  ///
+  void emitAccelTypes();
+  
   /// emitDebugPubTypes - Emit visible types into a debug pubtypes section.
   ///
   void emitDebugPubTypes();
@@ -407,10 +422,10 @@ private:
   /// 3. an unsigned LEB128 number indicating the number of distinct inlining 
   /// instances for the function.
   /// 
-  /// The rest of the entry consists of a {die_offset, low_pc}  pair for each 
+  /// The rest of the entry consists of a {die_offset, low_pc} pair for each 
   /// inlined instance; the die_offset points to the inlined_subroutine die in
-  /// the __debug_info section, and the low_pc is the starting address  for the
-  ///  inlining instance.
+  /// the __debug_info section, and the low_pc is the starting address for the
+  /// inlining instance.
   void emitDebugInlineInfo();
 
   /// constructCompileUnit - Create new CompileUnit for the given 
@@ -426,8 +441,8 @@ private:
   void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope,
                         unsigned Flags);
   
-  /// identifyScopeMarkers() - Indentify instructions that are marking
-  /// beginning of or end of a scope.
+  /// identifyScopeMarkers() - Indentify instructions that are marking the
+  /// beginning of or ending of a scope.
   void identifyScopeMarkers();
 
   /// addCurrentFnArgument - If Var is an current function argument that add
@@ -472,7 +487,7 @@ public:
   void collectInfoFromNamedMDNodes(Module *M);
 
   /// collectLegacyDebugInfo - Collect debug info using DebugInfoFinder.
-  /// FIXME - Remove this when dragon-egg and llvm-gcc switch to DIBuilder.
+  /// FIXME - Remove this when DragonEgg switches to DIBuilder.
   bool collectLegacyDebugInfo(Module *M);
 
   /// beginModule - Emit all Dwarf sections that should come prior to the
@@ -504,6 +519,13 @@ public:
 
   /// createSubprogramDIE - Create new DIE using SP.
   DIE *createSubprogramDIE(DISubprogram SP);
+
+  /// getStringPool - returns the entry into the start of the pool.
+  MCSymbol *getStringPool();
+
+  /// getStringPoolEntry - returns an entry into the string pool with the given
+  /// string text.
+  MCSymbol *getStringPoolEntry(StringRef Str);
 };
 } // End of namespace llvm
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 18b726b173dc..70cc2e56b3e1 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -31,6 +31,7 @@
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
@@ -184,7 +185,7 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
 /// CallToNoUnwindFunction - Return `true' if this is a call to a function
 /// marked `nounwind'. Return `false' otherwise.
 bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) {
-  assert(MI->getDesc().isCall() && "This should be a call instruction!");
+  assert(MI->isCall() && "This should be a call instruction!");
 
   bool MarkedNoUnwind = false;
   bool SawFunc = false;
@@ -243,7 +244,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
     for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
          MI != E; ++MI) {
       if (!MI->isLabel()) {
-        if (MI->getDesc().isCall())
+        if (MI->isCall())
           SawPotentiallyThrowing |= !CallToNoUnwindFunction(MI);
         continue;
       }
@@ -529,10 +530,8 @@ void DwarfException::EmitExceptionTable() {
       // Offset of the landing pad, counted in 16-byte bundles relative to the
       // @LPStart address.
       if (VerboseAsm) {
-        Asm->OutStreamer.AddComment(Twine(">> Call Site ") +
-                                    llvm::utostr(idx) + " <<");
-        Asm->OutStreamer.AddComment(Twine("  On exception at call site ") +
-                                    llvm::utostr(idx));
+        Asm->OutStreamer.AddComment(">> Call Site " + Twine(idx) + " <<");
+        Asm->OutStreamer.AddComment("  On exception at call site "+Twine(idx));
       }
       Asm->EmitULEB128(idx);
 
@@ -543,8 +542,8 @@ void DwarfException::EmitExceptionTable() {
         if (S.Action == 0)
           Asm->OutStreamer.AddComment("  Action: cleanup");
         else
-          Asm->OutStreamer.AddComment(Twine("  Action: ") +
-                                      llvm::utostr((S.Action - 1) / 2 + 1));
+          Asm->OutStreamer.AddComment("  Action: " +
+                                      Twine((S.Action - 1) / 2 + 1));
       }
       Asm->EmitULEB128(S.Action);
     }
@@ -596,8 +595,7 @@ void DwarfException::EmitExceptionTable() {
       // number of 16-byte bundles. The first call site is counted relative to
       // the start of the procedure fragment.
       if (VerboseAsm)
-        Asm->OutStreamer.AddComment(Twine(">> Call Site ") +
-                                    llvm::utostr(++Entry) + " <<");
+        Asm->OutStreamer.AddComment(">> Call Site " + Twine(++Entry) + " <<");
       Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4);
       if (VerboseAsm)
         Asm->OutStreamer.AddComment(Twine("  Call between ") +
@@ -625,8 +623,8 @@ void DwarfException::EmitExceptionTable() {
         if (S.Action == 0)
           Asm->OutStreamer.AddComment("  On action: cleanup");
         else
-          Asm->OutStreamer.AddComment(Twine("  On action: ") +
-                                      llvm::utostr((S.Action - 1) / 2 + 1));
+          Asm->OutStreamer.AddComment("  On action: " +
+                                      Twine((S.Action - 1) / 2 + 1));
       }
       Asm->EmitULEB128(S.Action);
     }
@@ -640,8 +638,7 @@ void DwarfException::EmitExceptionTable() {
 
     if (VerboseAsm) {
       // Emit comments that decode the action table.
-      Asm->OutStreamer.AddComment(Twine(">> Action Record ") +
-                                  llvm::utostr(++Entry) + " <<");
+      Asm->OutStreamer.AddComment(">> Action Record " + Twine(++Entry) + " <<");
     }
 
     // Type Filter
@@ -650,11 +647,11 @@ void DwarfException::EmitExceptionTable() {
     //   type of the catch clauses or the types in the exception specification.
     if (VerboseAsm) {
       if (Action.ValueForTypeID > 0)
-        Asm->OutStreamer.AddComment(Twine("  Catch TypeInfo ") +
-                                    llvm::itostr(Action.ValueForTypeID));
+        Asm->OutStreamer.AddComment("  Catch TypeInfo " +
+                                    Twine(Action.ValueForTypeID));
       else if (Action.ValueForTypeID < 0)
-        Asm->OutStreamer.AddComment(Twine("  Filter TypeInfo ") +
-                                    llvm::itostr(Action.ValueForTypeID));
+        Asm->OutStreamer.AddComment("  Filter TypeInfo " +
+                                    Twine(Action.ValueForTypeID));
       else
         Asm->OutStreamer.AddComment("  Cleanup");
     }
@@ -669,8 +666,7 @@ void DwarfException::EmitExceptionTable() {
         Asm->OutStreamer.AddComment("  No further actions");
       } else {
         unsigned NextAction = Entry + (Action.NextAction + 1) / 2;
-        Asm->OutStreamer.AddComment(Twine("  Continue to action ") +
-                                    llvm::utostr(NextAction));
+        Asm->OutStreamer.AddComment("  Continue to action "+Twine(NextAction));
       }
     }
     Asm->EmitSLEB128(Action.NextAction);
@@ -687,7 +683,7 @@ void DwarfException::EmitExceptionTable() {
          I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
     const GlobalVariable *GV = *I;
     if (VerboseAsm)
-      Asm->OutStreamer.AddComment(Twine("TypeInfo ") + llvm::utostr(Entry--));
+      Asm->OutStreamer.AddComment("TypeInfo " + Twine(Entry--));
     if (GV)
       Asm->EmitReference(GV, TTypeEncoding);
     else
@@ -707,7 +703,7 @@ void DwarfException::EmitExceptionTable() {
     if (VerboseAsm) {
       --Entry;
       if (TypeID != 0)
-        Asm->OutStreamer.AddComment(Twine("FilterInfo ") + llvm::itostr(Entry));
+        Asm->OutStreamer.AddComment("FilterInfo " + Twine(Entry));
     }
 
     Asm->EmitULEB128(TypeID);
@@ -719,17 +715,17 @@ void DwarfException::EmitExceptionTable() {
 /// EndModule - Emit all exception information that should come after the
 /// content.
 void DwarfException::EndModule() {
-  assert(0 && "Should be implemented");
+  llvm_unreachable("Should be implemented");
 }
 
 /// BeginFunction - Gather pre-function exception information. Assumes it's
 /// being emitted immediately after the function entry point.
 void DwarfException::BeginFunction(const MachineFunction *MF) {
-  assert(0 && "Should be implemented");
+  llvm_unreachable("Should be implemented");
 }
 
 /// EndFunction - Gather and emit post-function exception information.
 ///
 void DwarfException::EndFunction() {
-  assert(0 && "Should be implemented");
+  llvm_unreachable("Should be implemented");
 }
diff --git a/lib/CodeGen/AsmPrinter/LLVMBuild.txt b/lib/CodeGen/AsmPrinter/LLVMBuild.txt
new file mode 100644
index 000000000000..20b1f7b45b31
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/CodeGen/AsmPrinter/LLVMBuild.txt -------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AsmPrinter
+parent = Libraries
+required_libraries = Analysis CodeGen Core MC MCParser Support Target
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 75288b0934cb..ef1d2baed9ce 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -23,6 +23,7 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
@@ -61,29 +62,33 @@ TailMergeSize("tail-merge-size",
 
 namespace {
   /// BranchFolderPass - Wrap branch folder in a machine function pass.
-  class BranchFolderPass : public MachineFunctionPass,
-                           public BranchFolder {
+  class BranchFolderPass : public MachineFunctionPass {
   public:
     static char ID;
-    explicit BranchFolderPass(bool defaultEnableTailMerge)
-      : MachineFunctionPass(ID), BranchFolder(defaultEnableTailMerge, true) {}
+    explicit BranchFolderPass(): MachineFunctionPass(ID) {}
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
-    virtual const char *getPassName() const { return "Control Flow Optimizer"; }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<TargetPassConfig>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
   };
 }
 
 char BranchFolderPass::ID = 0;
+char &llvm::BranchFolderPassID = BranchFolderPass::ID;
 
-FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) {
-  return new BranchFolderPass(DefaultEnableTailMerge);
-}
+INITIALIZE_PASS(BranchFolderPass, "branch-folder",
+                "Control Flow Optimizer", false, false)
 
 bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
-  return OptimizeFunction(MF,
-                          MF.getTarget().getInstrInfo(),
-                          MF.getTarget().getRegisterInfo(),
-                          getAnalysisIfAvailable<MachineModuleInfo>());
+  TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
+  BranchFolder Folder(PassConfig->getEnableTailMerge(), /*CommonHoist=*/true);
+  return Folder.OptimizeFunction(MF,
+                                 MF.getTarget().getInstrInfo(),
+                                 MF.getTarget().getRegisterInfo(),
+                                 getAnalysisIfAvailable<MachineModuleInfo>());
 }
 
 
@@ -132,7 +137,7 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) {
       break;
     unsigned Reg = I->getOperand(0).getReg();
     ImpDefRegs.insert(Reg);
-    for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+    for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
          unsigned SubReg = *SubRegs; ++SubRegs)
       ImpDefRegs.insert(SubReg);
     ++I;
@@ -179,8 +184,14 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
   TII = tii;
   TRI = tri;
   MMI = mmi;
+  RS = NULL;
 
-  RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : NULL;
+  // Use a RegScavenger to help update liveness when required.
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  if (MRI.tracksLiveness() && TRI->requiresRegisterScavenging(MF))
+    RS = new RegScavenger();
+  else
+    MRI.invalidateLiveness();
 
   // Fix CFG.  The later algorithms expect it to be right.
   bool MadeChange = false;
@@ -208,7 +219,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
     delete RS;
     return MadeChange;
   }
-  
+
   // Walk the function to find jump tables that are live.
   BitVector JTIsLive(JTI->getJumpTables().size());
   for (MachineFunction::iterator BB = MF.begin(), E = MF.end();
@@ -432,10 +443,9 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
   for (; I != E; ++I) {
     if (I->isDebugValue())
       continue;
-    const MCInstrDesc &MCID = I->getDesc();
-    if (MCID.isCall())
+    if (I->isCall())
       Time += 10;
-    else if (MCID.mayLoad() || MCID.mayStore())
+    else if (I->mayLoad() || I->mayStore())
       Time += 2;
     else
       ++Time;
@@ -484,8 +494,9 @@ BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const {
     // an object with itself.
 #ifndef _GLIBCXX_DEBUG
     llvm_unreachable("Predecessor appears twice");
-#endif
+#else
     return false;
+#endif
   }
 }
 
@@ -502,7 +513,7 @@ static unsigned CountTerminators(MachineBasicBlock *MBB,
       break;
     }
     --I;
-    if (!I->getDesc().isTerminator()) break;
+    if (!I->isTerminator()) break;
     ++NumTerms;
   }
   return NumTerms;
@@ -550,8 +561,8 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1,
   // heuristics.
   unsigned EffectiveTailLen = CommonTailLen;
   if (SuccBB && MBB1 != PredBB && MBB2 != PredBB &&
-      !MBB1->back().getDesc().isBarrier() &&
-      !MBB2->back().getDesc().isBarrier())
+      !MBB1->back().isBarrier() &&
+      !MBB2->back().isBarrier())
     ++EffectiveTailLen;
 
   // Check if the common tail is long enough to be worthwhile.
@@ -870,6 +881,9 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
         // Visit each predecessor only once.
         if (!UniquePreds.insert(PBB))
           continue;
+        // Skip blocks which may jump to a landing pad. Can't tail merge these.
+        if (PBB->getLandingPadSuccessor())
+          continue;
         MachineBasicBlock *TBB = 0, *FBB = 0;
         SmallVector<MachineOperand, 4> Cond;
         if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) {
@@ -924,8 +938,9 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
       if (MergePotentials.size() >= 2)
         MadeChange |= TryTailMergeBlocks(IBB, PredBB);
       // Reinsert an unconditional branch if needed.
-      // The 1 below can occur as a result of removing blocks in TryTailMergeBlocks.
-      PredBB = prior(I);      // this may have been changed in TryTailMergeBlocks
+      // The 1 below can occur as a result of removing blocks in
+      // TryTailMergeBlocks.
+      PredBB = prior(I);     // this may have been changed in TryTailMergeBlocks
       if (MergePotentials.size() == 1 &&
           MergePotentials.begin()->getBlock() != PredBB)
         FixTail(MergePotentials.begin()->getBlock(), IBB, TII);
@@ -980,7 +995,7 @@ static bool IsBranchOnlyBlock(MachineBasicBlock *MBB) {
     if (!MBBI->isDebugValue())
       break;
   }
-  return (MBBI->getDesc().isBranch());
+  return (MBBI->isBranch());
 }
 
 /// IsBetterFallthrough - Return true if it would be clearly better to
@@ -1008,7 +1023,23 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
   MachineBasicBlock::iterator MBB2I = --MBB2->end();
   while (MBB2I->isDebugValue())
     --MBB2I;
-  return MBB2I->getDesc().isCall() && !MBB1I->getDesc().isCall();
+  return MBB2I->isCall() && !MBB1I->isCall();
+}
+
+/// getBranchDebugLoc - Find and return, if any, the DebugLoc of the branch
+/// instructions on the block. Always use the DebugLoc of the first
+/// branching instruction found unless its absent, in which case use the
+/// DebugLoc of the second if present.
+static DebugLoc getBranchDebugLoc(MachineBasicBlock &MBB) {
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin())
+    return DebugLoc();
+  --I;
+  while (I->isDebugValue() && I != MBB.begin())
+    --I;
+  if (I->isBranch())
+    return I->getDebugLoc();
+  return DebugLoc();
 }
 
 /// OptimizeBlock - Analyze and optimize control flow related to the specified
@@ -1016,7 +1047,6 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
 bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
   bool MadeChange = false;
   MachineFunction &MF = *MBB->getParent();
-  DebugLoc dl;  // FIXME: this is nowhere
 ReoptimizeBlock:
 
   MachineFunction::iterator FallThrough = MBB;
@@ -1065,6 +1095,7 @@ ReoptimizeBlock:
     // destination, remove the branch, replacing it with an unconditional one or
     // a fall-through.
     if (PriorTBB && PriorTBB == PriorFBB) {
+      DebugLoc dl = getBranchDebugLoc(PrevBB);
       TII->RemoveBranch(PrevBB);
       PriorCond.clear();
       if (PriorTBB != MBB)
@@ -1091,7 +1122,7 @@ ReoptimizeBlock:
         MachineBasicBlock::iterator PrevBBIter = PrevBB.end();
         --PrevBBIter;
         MachineBasicBlock::iterator MBBIter = MBB->begin();
-        // Check if DBG_VALUE at the end of PrevBB is identical to the 
+        // Check if DBG_VALUE at the end of PrevBB is identical to the
         // DBG_VALUE at the beginning of MBB.
         while (PrevBBIter != PrevBB.begin() && MBBIter != MBB->end()
                && PrevBBIter->isDebugValue() && MBBIter->isDebugValue()) {
@@ -1103,7 +1134,7 @@ ReoptimizeBlock:
         }
       }
       PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end());
-      PrevBB.removeSuccessor(PrevBB.succ_begin());;
+      PrevBB.removeSuccessor(PrevBB.succ_begin());
       assert(PrevBB.succ_empty());
       PrevBB.transferSuccessors(MBB);
       MadeChange = true;
@@ -1122,6 +1153,7 @@ ReoptimizeBlock:
     // If the prior block branches somewhere else on the condition and here if
     // the condition is false, remove the uncond second branch.
     if (PriorFBB == MBB) {
+      DebugLoc dl = getBranchDebugLoc(PrevBB);
       TII->RemoveBranch(PrevBB);
       TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl);
       MadeChange = true;
@@ -1135,6 +1167,7 @@ ReoptimizeBlock:
     if (PriorTBB == MBB) {
       SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
       if (!TII->ReverseBranchCondition(NewPriorCond)) {
+        DebugLoc dl = getBranchDebugLoc(PrevBB);
         TII->RemoveBranch(PrevBB);
         TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond, dl);
         MadeChange = true;
@@ -1172,6 +1205,7 @@ ReoptimizeBlock:
           DEBUG(dbgs() << "\nMoving MBB: " << *MBB
                        << "To make fallthrough to: " << *PriorTBB << "\n");
 
+          DebugLoc dl = getBranchDebugLoc(PrevBB);
           TII->RemoveBranch(PrevBB);
           TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond, dl);
 
@@ -1201,6 +1235,7 @@ ReoptimizeBlock:
     if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) {
       SmallVector<MachineOperand, 4> NewCond(CurCond);
       if (!TII->ReverseBranchCondition(NewCond)) {
+        DebugLoc dl = getBranchDebugLoc(*MBB);
         TII->RemoveBranch(*MBB);
         TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond, dl);
         MadeChange = true;
@@ -1214,6 +1249,7 @@ ReoptimizeBlock:
     if (CurTBB && CurCond.empty() && CurFBB == 0 &&
         IsBranchOnlyBlock(MBB) && CurTBB != MBB &&
         !MBB->hasAddressTaken()) {
+      DebugLoc dl = getBranchDebugLoc(*MBB);
       // This block may contain just an unconditional branch.  Because there can
       // be 'non-branch terminators' in the block, try removing the branch and
       // then seeing if the block is empty.
@@ -1256,8 +1292,9 @@ ReoptimizeBlock:
               assert(PriorFBB == 0 && "Machine CFG out of date!");
               PriorFBB = MBB;
             }
+            DebugLoc pdl = getBranchDebugLoc(PrevBB);
             TII->RemoveBranch(PrevBB);
-            TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, dl);
+            TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, pdl);
           }
 
           // Iterate through all the predecessors, revectoring each in-turn.
@@ -1281,9 +1318,10 @@ ReoptimizeBlock:
               bool NewCurUnAnalyzable = TII->AnalyzeBranch(*PMBB, NewCurTBB,
                       NewCurFBB, NewCurCond, true);
               if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) {
+                DebugLoc pdl = getBranchDebugLoc(*PMBB);
                 TII->RemoveBranch(*PMBB);
                 NewCurCond.clear();
-                TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, dl);
+                TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, pdl);
                 MadeChange = true;
                 ++NumBranchOpts;
                 PMBB->CorrectExtraCFGEdges(NewCurTBB, 0, false);
@@ -1343,7 +1381,7 @@ ReoptimizeBlock:
           if (CurFallsThru) {
             MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
             CurCond.clear();
-            TII->InsertBranch(*MBB, NextBB, 0, CurCond, dl);
+            TII->InsertBranch(*MBB, NextBB, 0, CurCond, DebugLoc());
           }
           MBB->moveAfter(PredBB);
           MadeChange = true;
@@ -1446,7 +1484,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
       continue;
     if (MO.isUse()) {
       Uses.insert(Reg);
-      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+      for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS)
         Uses.insert(*AS);
     } else if (!MO.isDead())
       // Don't try to hoist code in the rare case the terminator defines a
@@ -1469,6 +1507,9 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
   bool IsDef = false;
   for (unsigned i = 0, e = PI->getNumOperands(); !IsDef && i != e; ++i) {
     const MachineOperand &MO = PI->getOperand(i);
+    // If PI has a regmask operand, it is probably a call. Separate away.
+    if (MO.isRegMask())
+      return Loc;
     if (!MO.isReg() || MO.isUse())
       continue;
     unsigned Reg = MO.getReg();
@@ -1505,16 +1546,16 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
       continue;
     if (MO.isUse()) {
       Uses.insert(Reg);
-      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+      for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS)
         Uses.insert(*AS);
     } else {
       if (Uses.count(Reg)) {
         Uses.erase(Reg);
-        for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+        for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
           Uses.erase(*SR); // Use getSubRegisters to be conservative
       }
       Defs.insert(Reg);
-      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+      for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS)
         Defs.insert(*AS);
     }
   }
@@ -1581,6 +1622,11 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
     bool IsSafe = true;
     for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) {
       MachineOperand &MO = TIB->getOperand(i);
+      // Don't attempt to hoist instructions with register masks.
+      if (MO.isRegMask()) {
+        IsSafe = false;
+        break;
+      }
       if (!MO.isReg())
         continue;
       unsigned Reg = MO.getReg();
@@ -1615,6 +1661,11 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
           IsSafe = false;
           break;
         }
+
+        if (MO.isKill() && Uses.count(Reg))
+          // Kills a register that's read by the instruction at the point of
+          // insertion. Remove the kill marker.
+          MO.setIsKill(false);
       }
     }
     if (!IsSafe)
@@ -1632,7 +1683,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
       unsigned Reg = MO.getReg();
       if (!Reg || !LocalDefsSet.count(Reg))
         continue;
-      for (const unsigned *OR = TRI->getOverlaps(Reg); *OR; ++OR)
+      for (const uint16_t *OR = TRI->getOverlaps(Reg); *OR; ++OR)
         LocalDefsSet.erase(*OR);
     }
 
@@ -1645,11 +1696,11 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
       if (!Reg)
         continue;
       LocalDefs.push_back(Reg);
-      for (const unsigned *OR = TRI->getOverlaps(Reg); *OR; ++OR)
+      for (const uint16_t *OR = TRI->getOverlaps(Reg); *OR; ++OR)
         LocalDefsSet.insert(*OR);
     }
 
-    HasDups = true;;
+    HasDups = true;
     ++TIB;
     ++FIB;
   }
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 9a5e55160114..21729cd6c380 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -9,10 +9,9 @@ add_llvm_library(LLVMCodeGen
   CodePlacementOpt.cpp
   CriticalAntiDepBreaker.cpp
   DeadMachineInstructionElim.cpp
+  DFAPacketizer.cpp
   DwarfEHPrepare.cpp
   EdgeBundles.cpp
-  ELFCodeEmitter.cpp
-  ELFWriter.cpp
   ExecutionDepsFix.cpp
   ExpandISelPseudos.cpp
   ExpandPostRAPseudos.cpp
@@ -23,6 +22,7 @@ add_llvm_library(LLVMCodeGen
   InlineSpiller.cpp
   InterferenceCache.cpp
   IntrinsicLowering.cpp
+  JITCodeEmitter.cpp
   LLVMTargetMachine.cpp
   LatencyPriorityQueue.cpp
   LexicalScopes.cpp
@@ -37,7 +37,10 @@ add_llvm_library(LLVMCodeGen
   LocalStackSlotAllocation.cpp
   MachineBasicBlock.cpp
   MachineBlockFrequencyInfo.cpp
+  MachineBlockPlacement.cpp
   MachineBranchProbabilityInfo.cpp
+  MachineCodeEmitter.cpp
+  MachineCopyPropagation.cpp
   MachineCSE.cpp
   MachineDominators.cpp
   MachineFunction.cpp
@@ -45,6 +48,7 @@ add_llvm_library(LLVMCodeGen
   MachineFunctionPass.cpp
   MachineFunctionPrinterPass.cpp
   MachineInstr.cpp
+  MachineInstrBundle.cpp
   MachineLICM.cpp
   MachineLoopInfo.cpp
   MachineLoopRanges.cpp
@@ -53,9 +57,9 @@ add_llvm_library(LLVMCodeGen
   MachinePassRegistry.cpp
   MachineRegisterInfo.cpp
   MachineSSAUpdater.cpp
+  MachineScheduler.cpp
   MachineSink.cpp
   MachineVerifier.cpp
-  ObjectCodeEmitter.cpp
   OcamlGC.cpp
   OptimizePHIs.cpp
   PHIElimination.cpp
@@ -66,17 +70,16 @@ add_llvm_library(LLVMCodeGen
   ProcessImplicitDefs.cpp
   PrologEpilogInserter.cpp
   PseudoSourceValue.cpp
+  RegAllocBase.cpp
   RegAllocBasic.cpp
   RegAllocFast.cpp
   RegAllocGreedy.cpp
-  RegAllocLinearScan.cpp
   RegAllocPBQP.cpp
   RegisterClassInfo.cpp
   RegisterCoalescer.cpp
   RegisterScavenging.cpp
   RenderMachineFunction.cpp
   ScheduleDAG.cpp
-  ScheduleDAGEmit.cpp
   ScheduleDAGInstrs.cpp
   ScheduleDAGPrinter.cpp
   ScoreboardHazardRecognizer.cpp
@@ -87,27 +90,17 @@ add_llvm_library(LLVMCodeGen
   Spiller.cpp
   SpillPlacement.cpp
   SplitKit.cpp
-  Splitter.cpp
   StackProtector.cpp
   StackSlotColoring.cpp
   StrongPHIElimination.cpp
   TailDuplication.cpp
+  TargetFrameLoweringImpl.cpp
   TargetInstrInfoImpl.cpp
   TargetLoweringObjectFileImpl.cpp
+  TargetOptionsImpl.cpp
   TwoAddressInstructionPass.cpp
   UnreachableBlockElim.cpp
   VirtRegMap.cpp
-  VirtRegRewriter.cpp
-  )
-
-add_llvm_library_dependencies(LLVMCodeGen
-  LLVMAnalysis
-  LLVMCore
-  LLVMMC
-  LLVMScalarOpts
-  LLVMSupport
-  LLVMTarget
-  LLVMTransformUtils
   )
 
 add_subdirectory(SelectionDAG)
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index 14eb0541dc8d..2b7dfdbe41a0 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -58,7 +58,7 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
 
 /// MarkAllocated - Mark a register and all of its aliases as allocated.
 void CCState::MarkAllocated(unsigned Reg) {
-  for (const unsigned *Alias = TRI.getOverlaps(Reg);
+  for (const uint16_t *Alias = TRI.getOverlaps(Reg);
        unsigned Reg = *Alias; ++Alias)
     UsedRegs[Reg/32] |= 1 << (Reg&31);
 }
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index 424535ba2a1c..a81bb5cc5566 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -19,36 +19,49 @@ using namespace llvm;
 
 /// initializeCodeGen - Initialize all passes linked into the CodeGen library.
 void llvm::initializeCodeGen(PassRegistry &Registry) {
+  initializeBranchFolderPassPass(Registry);
   initializeCalculateSpillWeightsPass(Registry);
+  initializeCodePlacementOptPass(Registry);
   initializeDeadMachineInstructionElimPass(Registry);
+  initializeExpandPostRAPass(Registry);
+  initializeExpandISelPseudosPass(Registry);
+  initializeFinalizeMachineBundlesPass(Registry);
+  initializeGCMachineCodeAnalysisPass(Registry);
   initializeGCModuleInfoPass(Registry);
   initializeIfConverterPass(Registry);
   initializeLiveDebugVariablesPass(Registry);
   initializeLiveIntervalsPass(Registry);
   initializeLiveStacksPass(Registry);
   initializeLiveVariablesPass(Registry);
+  initializeLocalStackSlotPassPass(Registry);
   initializeMachineBlockFrequencyInfoPass(Registry);
+  initializeMachineBlockPlacementPass(Registry);
+  initializeMachineBlockPlacementStatsPass(Registry);
+  initializeMachineCopyPropagationPass(Registry);
   initializeMachineCSEPass(Registry);
   initializeMachineDominatorTreePass(Registry);
   initializeMachineLICMPass(Registry);
   initializeMachineLoopInfoPass(Registry);
   initializeMachineModuleInfoPass(Registry);
+  initializeMachineSchedulerPass(Registry);
   initializeMachineSinkingPass(Registry);
   initializeMachineVerifierPassPass(Registry);
   initializeOptimizePHIsPass(Registry);
   initializePHIEliminationPass(Registry);
   initializePeepholeOptimizerPass(Registry);
+  initializePostRASchedulerPass(Registry);
   initializeProcessImplicitDefsPass(Registry);
   initializePEIPass(Registry);
-  initializeRALinScanPass(Registry);
   initializeRegisterCoalescerPass(Registry);
   initializeRenderMachineFunctionPass(Registry);
   initializeSlotIndexesPass(Registry);
-  initializeLoopSplitterPass(Registry);
   initializeStackProtectorPass(Registry);
   initializeStackSlotColoringPass(Registry);
   initializeStrongPHIEliminationPass(Registry);
+  initializeTailDuplicatePassPass(Registry);
+  initializeTargetPassConfigPass(Registry);
   initializeTwoAddressInstructionPassPass(Registry);
+  initializeUnpackMachineBundlesPass(Registry);
   initializeUnreachableBlockElimPass(Registry);
   initializeUnreachableMachineBlockElimPass(Registry);
   initializeVirtRegMapPass(Registry);
diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp
index 270c337ef67e..c13c05e26a20 100644
--- a/lib/CodeGen/CodePlacementOpt.cpp
+++ b/lib/CodeGen/CodePlacementOpt.cpp
@@ -39,9 +39,6 @@ namespace {
     CodePlacementOpt() : MachineFunctionPass(ID) {}
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
-    virtual const char *getPassName() const {
-      return "Code Placement Optimizer";
-    }
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequired<MachineLoopInfo>();
@@ -69,9 +66,9 @@ namespace {
   char CodePlacementOpt::ID = 0;
 } // end anonymous namespace
 
-FunctionPass *llvm::createCodePlacementOptPass() {
-  return new CodePlacementOpt();
-}
+char &llvm::CodePlacementOptID = CodePlacementOpt::ID;
+INITIALIZE_PASS(CodePlacementOpt, "code-placement",
+                "Code Placement Optimizer", false, false)
 
 /// HasFallthrough - Test whether the given branch has a fallthrough, either as
 /// a plain fallthrough or as a fallthrough case of a conditional branch.
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 84c4d59c0e41..bad50103b9c3 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -35,7 +35,8 @@ CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI) :
   RegClassInfo(RCI),
   Classes(TRI->getNumRegs(), static_cast<const TargetRegisterClass *>(0)),
   KillIndices(TRI->getNumRegs(), 0),
-  DefIndices(TRI->getNumRegs(), 0) {}
+  DefIndices(TRI->getNumRegs(), 0),
+  KeepRegs(TRI->getNumRegs(), false) {}
 
 CriticalAntiDepBreaker::~CriticalAntiDepBreaker() {
 }
@@ -52,9 +53,9 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
   }
 
   // Clear "do not change" set.
-  KeepRegs.clear();
+  KeepRegs.reset();
 
-  bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn());
+  bool IsReturnBlock = (BBSize != 0 && BB->back().isReturn());
 
   // Determine the live-out physregs for this block.
   if (IsReturnBlock) {
@@ -63,14 +64,14 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
          E = MRI.liveout_end(); I != E; ++I) {
       unsigned Reg = *I;
       Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
-      KillIndices[Reg] = BB->size();
+      KillIndices[Reg] = BBSize;
       DefIndices[Reg] = ~0u;
 
       // Repeat, for all aliases.
-      for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+      for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
         unsigned AliasReg = *Alias;
         Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
-        KillIndices[AliasReg] = BB->size();
+        KillIndices[AliasReg] = BBSize;
         DefIndices[AliasReg] = ~0u;
       }
     }
@@ -85,14 +86,14 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
            E = (*SI)->livein_end(); I != E; ++I) {
       unsigned Reg = *I;
       Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
-      KillIndices[Reg] = BB->size();
+      KillIndices[Reg] = BBSize;
       DefIndices[Reg] = ~0u;
 
       // Repeat, for all aliases.
-      for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+      for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
         unsigned AliasReg = *Alias;
         Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
-        KillIndices[AliasReg] = BB->size();
+        KillIndices[AliasReg] = BBSize;
         DefIndices[AliasReg] = ~0u;
       }
     }
@@ -102,18 +103,18 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
   // callee-saved register that is not saved in the prolog.
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   BitVector Pristine = MFI->getPristineRegs(BB);
-  for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
+  for (const uint16_t *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
     unsigned Reg = *I;
     if (!IsReturnBlock && !Pristine.test(Reg)) continue;
     Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
-    KillIndices[Reg] = BB->size();
+    KillIndices[Reg] = BBSize;
     DefIndices[Reg] = ~0u;
 
     // Repeat, for all aliases.
-    for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+    for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
       unsigned AliasReg = *Alias;
       Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
-      KillIndices[AliasReg] = BB->size();
+      KillIndices[AliasReg] = BBSize;
       DefIndices[AliasReg] = ~0u;
     }
   }
@@ -121,7 +122,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
 
 void CriticalAntiDepBreaker::FinishBlock() {
   RegRefs.clear();
-  KeepRegs.clear();
+  KeepRegs.reset();
 }
 
 void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
@@ -193,8 +194,8 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
   // instruction which may not be executed. The second R6 def may or may not
   // re-define R6 so it's not safe to change it since the last R6 use cannot be
   // changed.
-  bool Special = MI->getDesc().isCall() ||
-    MI->getDesc().hasExtraSrcRegAllocReq() ||
+  bool Special = MI->isCall() ||
+    MI->hasExtraSrcRegAllocReq() ||
     TII->isPredicated(MI);
 
   // Scan the register operands for this instruction and update
@@ -217,7 +218,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
       Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
 
     // Now check for aliases.
-    for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+    for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
       // If an alias of the reg is used during the live range, give up.
       // Note that this allows us to skip checking if AntiDepReg
       // overlaps with any of the aliases, among other things.
@@ -233,10 +234,11 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
       RegRefs.insert(std::make_pair(Reg, &MO));
 
     if (MO.isUse() && Special) {
-      if (KeepRegs.insert(Reg)) {
-        for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+      if (!KeepRegs.test(Reg)) {
+        KeepRegs.set(Reg);
+        for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
              *Subreg; ++Subreg)
-          KeepRegs.insert(*Subreg);
+          KeepRegs.set(*Subreg);
       }
     }
   }
@@ -253,6 +255,17 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
     // address updates.
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
       MachineOperand &MO = MI->getOperand(i);
+
+      if (MO.isRegMask())
+        for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i)
+          if (MO.clobbersPhysReg(i)) {
+            DefIndices[i] = Count;
+            KillIndices[i] = ~0u;
+            KeepRegs.reset(i);
+            Classes[i] = 0;
+            RegRefs.erase(i);
+          }
+
       if (!MO.isReg()) continue;
       unsigned Reg = MO.getReg();
       if (Reg == 0) continue;
@@ -265,21 +278,21 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
       assert(((KillIndices[Reg] == ~0u) !=
               (DefIndices[Reg] == ~0u)) &&
              "Kill and Def maps aren't consistent for Reg!");
-      KeepRegs.erase(Reg);
+      KeepRegs.reset(Reg);
       Classes[Reg] = 0;
       RegRefs.erase(Reg);
       // Repeat, for all subregs.
-      for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+      for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
            *Subreg; ++Subreg) {
         unsigned SubregReg = *Subreg;
         DefIndices[SubregReg] = Count;
         KillIndices[SubregReg] = ~0u;
-        KeepRegs.erase(SubregReg);
+        KeepRegs.reset(SubregReg);
         Classes[SubregReg] = 0;
         RegRefs.erase(SubregReg);
       }
       // Conservatively mark super-registers as unusable.
-      for (const unsigned *Super = TRI->getSuperRegisters(Reg);
+      for (const uint16_t *Super = TRI->getSuperRegisters(Reg);
            *Super; ++Super) {
         unsigned SuperReg = *Super;
         Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1);
@@ -315,7 +328,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
                "Kill and Def maps aren't consistent for Reg!");
     }
     // Repeat, for all aliases.
-    for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+    for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
       unsigned AliasReg = *Alias;
       if (KillIndices[AliasReg] == ~0u) {
         KillIndices[AliasReg] = Count;
@@ -355,6 +368,9 @@ CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin,
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
       const MachineOperand &CheckOper = MI->getOperand(i);
 
+      if (CheckOper.isRegMask() && CheckOper.clobbersPhysReg(NewReg))
+        return true;
+
       if (!CheckOper.isReg() || !CheckOper.isDef() ||
           CheckOper.getReg() != NewReg)
         continue;
@@ -427,6 +443,8 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
 
   // Keep a map of the MachineInstr*'s back to the SUnit representing them.
   // This is used for updating debug information.
+  //
+  // FIXME: Replace this with the existing map in ScheduleDAGInstrs::MISUnitMap
   DenseMap<MachineInstr*,const SUnit*> MISUnitMap;
 
   // Find the node at the bottom of the critical path.
@@ -535,7 +553,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
           if (!RegClassInfo.isAllocatable(AntiDepReg))
             // Don't break anti-dependencies on non-allocatable registers.
             AntiDepReg = 0;
-          else if (KeepRegs.count(AntiDepReg))
+          else if (KeepRegs.test(AntiDepReg))
             // Don't break anti-dependencies if an use down below requires
             // this exact register.
             AntiDepReg = 0;
@@ -572,7 +590,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
     // If MI's defs have a special allocation requirement, don't allow
     // any def registers to be changed. Also assume all registers
     // defined in a call must not be changed (ABI).
-    if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() ||
+    if (MI->isCall() || MI->hasExtraDefRegAllocReq() ||
         TII->isPredicated(MI))
       // If this instruction's defs have special allocation requirement, don't
       // break this anti-dependency.
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index 07107802972d..77462593896e 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -24,7 +24,6 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallSet.h"
 #include <map>
 
 namespace llvm {
@@ -66,7 +65,7 @@ class TargetRegisterInfo;
 
     /// KeepRegs - A set of registers which are live and cannot be changed to
     /// break anti-dependencies.
-    SmallSet<unsigned, 4> KeepRegs;
+    BitVector KeepRegs;
 
   public:
     CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&);
diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp
new file mode 100644
index 000000000000..bfbe7790998f
--- /dev/null
+++ b/lib/CodeGen/DFAPacketizer.cpp
@@ -0,0 +1,223 @@
+//=- llvm/CodeGen/DFAPacketizer.cpp - DFA Packetizer for VLIW -*- C++ -*-=====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This class implements a deterministic finite automaton (DFA) based
+// packetizing mechanism for VLIW architectures. It provides APIs to
+// determine whether there exists a legal mapping of instructions to
+// functional unit assignments in a packet. The DFA is auto-generated from
+// the target's Schedule.td file.
+//
+// A DFA consists of 3 major elements: states, inputs, and transitions. For
+// the packetizing mechanism, the input is the set of instruction classes for
+// a target. The state models all possible combinations of functional unit
+// consumption for a given set of instructions in a packet. A transition
+// models the addition of an instruction to a packet. In the DFA constructed
+// by this class, if an instruction can be added to a packet, then a valid
+// transition exists from the corresponding state. Invalid transitions
+// indicate that the instruction cannot be added to the current packet.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+using namespace llvm;
+
+DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, const int (*SIT)[2],
+                             const unsigned *SET):
+  InstrItins(I), CurrentState(0), DFAStateInputTable(SIT),
+  DFAStateEntryTable(SET) {}
+
+
+//
+// ReadTable - Read the DFA transition table and update CachedTable.
+//
+// Format of the transition tables:
+// DFAStateInputTable[][2] = pairs of <Input, Transition> for all valid
+//                           transitions
+// DFAStateEntryTable[i] = Index of the first entry in DFAStateInputTable
+//                         for the ith state
+//
+void DFAPacketizer::ReadTable(unsigned int state) {
+  unsigned ThisState = DFAStateEntryTable[state];
+  unsigned NextStateInTable = DFAStateEntryTable[state+1];
+  // Early exit in case CachedTable has already contains this
+  // state's transitions.
+  if (CachedTable.count(UnsignPair(state,
+                                   DFAStateInputTable[ThisState][0])))
+    return;
+
+  for (unsigned i = ThisState; i < NextStateInTable; i++)
+    CachedTable[UnsignPair(state, DFAStateInputTable[i][0])] =
+      DFAStateInputTable[i][1];
+}
+
+
+// canReserveResources - Check if the resources occupied by a MCInstrDesc
+// are available in the current state.
+bool DFAPacketizer::canReserveResources(const llvm::MCInstrDesc *MID) {
+  unsigned InsnClass = MID->getSchedClass();
+  const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass);
+  unsigned FuncUnits = IS->getUnits();
+  UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits);
+  ReadTable(CurrentState);
+  return (CachedTable.count(StateTrans) != 0);
+}
+
+
+// reserveResources - Reserve the resources occupied by a MCInstrDesc and
+// change the current state to reflect that change.
+void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) {
+  unsigned InsnClass = MID->getSchedClass();
+  const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass);
+  unsigned FuncUnits = IS->getUnits();
+  UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits);
+  ReadTable(CurrentState);
+  assert(CachedTable.count(StateTrans) != 0);
+  CurrentState = CachedTable[StateTrans];
+}
+
+
+// canReserveResources - Check if the resources occupied by a machine
+// instruction are available in the current state.
+bool DFAPacketizer::canReserveResources(llvm::MachineInstr *MI) {
+  const llvm::MCInstrDesc &MID = MI->getDesc();
+  return canReserveResources(&MID);
+}
+
+// reserveResources - Reserve the resources occupied by a machine
+// instruction and change the current state to reflect that change.
+void DFAPacketizer::reserveResources(llvm::MachineInstr *MI) {
+  const llvm::MCInstrDesc &MID = MI->getDesc();
+  reserveResources(&MID);
+}
+
+namespace llvm {
+// DefaultVLIWScheduler - This class extends ScheduleDAGInstrs and overrides
+// Schedule method to build the dependence graph.
+class DefaultVLIWScheduler : public ScheduleDAGInstrs {
+public:
+  DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI,
+                   MachineDominatorTree &MDT, bool IsPostRA);
+  // Schedule - Actual scheduling work.
+  void schedule();
+};
+}
+
+DefaultVLIWScheduler::DefaultVLIWScheduler(
+  MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
+  bool IsPostRA) :
+  ScheduleDAGInstrs(MF, MLI, MDT, IsPostRA) {
+}
+
+void DefaultVLIWScheduler::schedule() {
+  // Build the scheduling graph.
+  buildSchedGraph(0);
+}
+
+// VLIWPacketizerList Ctor
+VLIWPacketizerList::VLIWPacketizerList(
+  MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
+  bool IsPostRA) : TM(MF.getTarget()), MF(MF)  {
+  TII = TM.getInstrInfo();
+  ResourceTracker = TII->CreateTargetScheduleState(&TM, 0);
+  VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, MDT, IsPostRA);
+}
+
+// VLIWPacketizerList Dtor
+VLIWPacketizerList::~VLIWPacketizerList() {
+  if (VLIWScheduler)
+    delete VLIWScheduler;
+
+  if (ResourceTracker)
+    delete ResourceTracker;
+}
+
+// endPacket - End the current packet, bundle packet instructions and reset
+// DFA state.
+void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
+                                         MachineInstr *MI) {
+  if (CurrentPacketMIs.size() > 1) {
+    MachineInstr *MIFirst = CurrentPacketMIs.front();
+    finalizeBundle(*MBB, MIFirst, MI);
+  }
+  CurrentPacketMIs.clear();
+  ResourceTracker->clearResources();
+}
+
+// PacketizeMIs - Bundle machine instructions into packets.
+void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
+                                      MachineBasicBlock::iterator BeginItr,
+                                      MachineBasicBlock::iterator EndItr) {
+  assert(VLIWScheduler && "VLIW Scheduler is not initialized!");
+  VLIWScheduler->enterRegion(MBB, BeginItr, EndItr, MBB->size());
+  VLIWScheduler->schedule();
+  VLIWScheduler->exitRegion();
+
+  // Generate MI -> SU map.
+  //std::map <MachineInstr*, SUnit*> MIToSUnit;
+  MIToSUnit.clear();
+  for (unsigned i = 0, e = VLIWScheduler->SUnits.size(); i != e; ++i) {
+    SUnit *SU = &VLIWScheduler->SUnits[i];
+    MIToSUnit[SU->getInstr()] = SU;
+  }
+
+  // The main packetizer loop.
+  for (; BeginItr != EndItr; ++BeginItr) {
+    MachineInstr *MI = BeginItr;
+
+    this->initPacketizerState();
+
+    // End the current packet if needed.
+    if (this->isSoloInstruction(MI)) {
+      endPacket(MBB, MI);
+      continue;
+    }
+
+    // Ignore pseudo instructions.
+    if (this->ignorePseudoInstruction(MI, MBB))
+      continue;
+
+    SUnit *SUI = MIToSUnit[MI];
+    assert(SUI && "Missing SUnit Info!");
+
+    // Ask DFA if machine resource is available for MI.
+    bool ResourceAvail = ResourceTracker->canReserveResources(MI);
+    if (ResourceAvail) {
+      // Dependency check for MI with instructions in CurrentPacketMIs.
+      for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(),
+           VE = CurrentPacketMIs.end(); VI != VE; ++VI) {
+        MachineInstr *MJ = *VI;
+        SUnit *SUJ = MIToSUnit[MJ];
+        assert(SUJ && "Missing SUnit Info!");
+
+        // Is it legal to packetize SUI and SUJ together.
+        if (!this->isLegalToPacketizeTogether(SUI, SUJ)) {
+          // Allow packetization if dependency can be pruned.
+          if (!this->isLegalToPruneDependencies(SUI, SUJ)) {
+            // End the packet if dependency cannot be pruned.
+            endPacket(MBB, MI);
+            break;
+          } // !isLegalToPruneDependencies.
+        } // !isLegalToPacketizeTogether.
+      } // For all instructions in CurrentPacketMIs.
+    } else {
+      // End the packet if resource is not available.
+      endPacket(MBB, MI);
+    }
+
+    // Add MI to the current packet.
+    BeginItr = this->addToPacket(MI);
+  } // For all instructions in BB.
+
+  // End any packet left behind.
+  endPacket(MBB, EndItr);
+}
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index 6de6c0cb81bd..aa10d1d41f2b 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -28,11 +28,12 @@ STATISTIC(NumDeletes,          "Number of dead instructions deleted");
 namespace {
   class DeadMachineInstructionElim : public MachineFunctionPass {
     virtual bool runOnMachineFunction(MachineFunction &MF);
-    
+
     const TargetRegisterInfo *TRI;
     const MachineRegisterInfo *MRI;
     const TargetInstrInfo *TII;
     BitVector LivePhysRegs;
+    BitVector ReservedRegs;
 
   public:
     static char ID; // Pass identification, replacement for typeid
@@ -45,14 +46,11 @@ namespace {
   };
 }
 char DeadMachineInstructionElim::ID = 0;
+char &llvm::DeadMachineInstructionElimID = DeadMachineInstructionElim::ID;
 
 INITIALIZE_PASS(DeadMachineInstructionElim, "dead-mi-elimination",
                 "Remove dead machine instructions", false, false)
 
-FunctionPass *llvm::createDeadMachineInstructionElimPass() {
-  return new DeadMachineInstructionElim();
-}
-
 bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
   // Technically speaking inline asm without side effects and no defs can still
   // be deleted. But there is so much bad inline asm code out there, we should
@@ -70,10 +68,14 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
     const MachineOperand &MO = MI->getOperand(i);
     if (MO.isReg() && MO.isDef()) {
       unsigned Reg = MO.getReg();
-      if (TargetRegisterInfo::isPhysicalRegister(Reg) ?
-          LivePhysRegs[Reg] : !MRI->use_nodbg_empty(Reg)) {
-        // This def has a non-debug use. Don't delete the instruction!
-        return false;
+      if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+        // Don't delete live physreg defs, or any reserved register defs.
+        if (LivePhysRegs.test(Reg) || ReservedRegs.test(Reg))
+          return false;
+      } else {
+        if (!MRI->use_nodbg_empty(Reg))
+          // This def has a non-debug use. Don't delete the instruction!
+          return false;
       }
     }
   }
@@ -89,7 +91,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
   TII = MF.getTarget().getInstrInfo();
 
   // Treat reserved registers as always live.
-  BitVector ReservedRegs = TRI->getReservedRegs(MF);
+  ReservedRegs = TRI->getReservedRegs(MF);
 
   // Loop over all instructions in all blocks, from bottom to top, so that it's
   // more likely that chains of dependent but ultimately dead instructions will
@@ -102,7 +104,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
     LivePhysRegs = ReservedRegs;
 
     // Also add any explicit live-out physregs for this block.
-    if (!MBB->empty() && MBB->back().getDesc().isReturn())
+    if (!MBB->empty() && MBB->back().isReturn())
       for (MachineRegisterInfo::liveout_iterator LOI = MRI->liveout_begin(),
            LOE = MRI->liveout_end(); LOI != LOE; ++LOI) {
         unsigned Reg = *LOI;
@@ -169,10 +171,13 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
             // Check the subreg set, not the alias set, because a def
             // of a super-register may still be partially live after
             // this def.
-            for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+            for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
                  *SubRegs; ++SubRegs)
               LivePhysRegs.reset(*SubRegs);
           }
+        } else if (MO.isRegMask()) {
+          // Register mask of preserved registers. All clobbers are dead.
+          LivePhysRegs.clearBitsNotInMask(MO.getRegMask());
         }
       }
       // Record the physreg uses, after the defs, in case a physreg is
@@ -183,7 +188,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
           unsigned Reg = MO.getReg();
           if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
             LivePhysRegs.set(Reg);
-            for (const unsigned *AliasSet = TRI->getAliasSet(Reg);
+            for (const uint16_t *AliasSet = TRI->getAliasSet(Reg);
                  *AliasSet; ++AliasSet)
               LivePhysRegs.set(*AliasSet);
           }
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index ed9e409d3e5a..944dd4fb41c8 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -28,98 +28,34 @@
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 using namespace llvm;
 
-STATISTIC(NumLandingPadsSplit,     "Number of landing pads split");
-STATISTIC(NumUnwindsLowered,       "Number of unwind instructions lowered");
-STATISTIC(NumResumesLowered,       "Number of eh.resume calls lowered");
-STATISTIC(NumExceptionValuesMoved, "Number of eh.exception calls moved");
+STATISTIC(NumResumesLowered, "Number of resume calls lowered");
 
 namespace {
   class DwarfEHPrepare : public FunctionPass {
     const TargetMachine *TM;
     const TargetLowering *TLI;
 
-    // The eh.exception intrinsic.
-    Function *ExceptionValueIntrinsic;
-
-    // The eh.selector intrinsic.
-    Function *SelectorIntrinsic;
-
-    // _Unwind_Resume_or_Rethrow or _Unwind_SjLj_Resume call.
-    Constant *URoR;
-
-    // The EH language-specific catch-all type.
-    GlobalVariable *EHCatchAllValue;
-
-    // _Unwind_Resume or the target equivalent.
+    // RewindFunction - _Unwind_Resume or the target equivalent.
     Constant *RewindFunction;
 
-    // We both use and preserve dominator info.
-    DominatorTree *DT;
-
-    // The function we are running on.
-    Function *F;
-
-    // The landing pads for this function.
-    typedef SmallPtrSet<BasicBlock*, 8> BBSet;
-    BBSet LandingPads;
-
-    bool InsertUnwindResumeCalls();
-
-    bool NormalizeLandingPads();
-    bool LowerUnwindsAndResumes();
-    bool MoveExceptionValueCalls();
-
-    Instruction *CreateExceptionValueCall(BasicBlock *BB);
-
-    /// CleanupSelectors - Any remaining eh.selector intrinsic calls which still
-    /// use the "llvm.eh.catch.all.value" call need to convert to using its
-    /// initializer instead.
-    bool CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels);
-
-    bool HasCatchAllInSelector(IntrinsicInst *);
+    bool InsertUnwindResumeCalls(Function &Fn);
+    Instruction *GetExceptionObject(ResumeInst *RI);
 
-    /// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups.
-    void FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels,
-                                 SmallPtrSet<IntrinsicInst*, 32> &CatchAllSels);
-
-    /// FindAllURoRInvokes - Find all URoR invokes in the function.
-    void FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes);
-
-    /// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" or
-    /// "_Unwind_SjLj_Resume" calls. The "unwind" part of these invokes jump to
-    /// a landing pad within the current function. This is a candidate to merge
-    /// the selector associated with the URoR invoke with the one from the
-    /// URoR's landing pad.
-    bool HandleURoRInvokes();
-
-    /// FindSelectorAndURoR - Find the eh.selector call and URoR call associated
-    /// with the eh.exception call. This recursively looks past instructions
-    /// which don't change the EH pointer value, like casts or PHI nodes.
-    bool FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
-                             SmallPtrSet<IntrinsicInst*, 8> &SelCalls,
-                             SmallPtrSet<PHINode*, 32> &SeenPHIs);
-      
   public:
     static char ID; // Pass identification, replacement for typeid.
     DwarfEHPrepare(const TargetMachine *tm) :
       FunctionPass(ID), TM(tm), TLI(TM->getTargetLowering()),
-      ExceptionValueIntrinsic(0), SelectorIntrinsic(0),
-      URoR(0), EHCatchAllValue(0), RewindFunction(0) {
+      RewindFunction(0) {
         initializeDominatorTreePass(*PassRegistry::getPassRegistry());
       }
 
     virtual bool runOnFunction(Function &Fn);
 
-    // getAnalysisUsage - We need the dominator tree for handling URoR.
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequired<DominatorTree>();
-      AU.addPreserved<DominatorTree>();
-    }
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const { }
 
     const char *getPassName() const {
       return "Exception handling preparation";
     }
-
   };
 } // end anonymous namespace
 
@@ -129,543 +65,52 @@ FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm) {
   return new DwarfEHPrepare(tm);
 }
 
-/// HasCatchAllInSelector - Return true if the intrinsic instruction has a
-/// catch-all.
-bool DwarfEHPrepare::HasCatchAllInSelector(IntrinsicInst *II) {
-  if (!EHCatchAllValue) return false;
-
-  unsigned ArgIdx = II->getNumArgOperands() - 1;
-  GlobalVariable *GV = dyn_cast<GlobalVariable>(II->getArgOperand(ArgIdx));
-  return GV == EHCatchAllValue;
-}
-
-/// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups.
-void DwarfEHPrepare::
-FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels,
-                        SmallPtrSet<IntrinsicInst*, 32> &CatchAllSels) {
-  for (Value::use_iterator
-         I = SelectorIntrinsic->use_begin(),
-         E = SelectorIntrinsic->use_end(); I != E; ++I) {
-    IntrinsicInst *II = cast<IntrinsicInst>(*I);
-
-    if (II->getParent()->getParent() != F)
-      continue;
-
-    if (!HasCatchAllInSelector(II))
-      Sels.insert(II);
-    else
-      CatchAllSels.insert(II);
-  }
-}
-
-/// FindAllURoRInvokes - Find all URoR invokes in the function.
-void DwarfEHPrepare::
-FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes) {
-  for (Value::use_iterator
-         I = URoR->use_begin(),
-         E = URoR->use_end(); I != E; ++I) {
-    if (InvokeInst *II = dyn_cast<InvokeInst>(*I))
-      URoRInvokes.insert(II);
-  }
-}
-
-/// CleanupSelectors - Any remaining eh.selector intrinsic calls which still use
-/// the "llvm.eh.catch.all.value" call need to convert to using its
-/// initializer instead.
-bool DwarfEHPrepare::CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels) {
-  if (!EHCatchAllValue) return false;
-
-  if (!SelectorIntrinsic) {
-    SelectorIntrinsic =
-      Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_selector);
-    if (!SelectorIntrinsic) return false;
-  }
-
-  bool Changed = false;
-  for (SmallPtrSet<IntrinsicInst*, 32>::iterator
-         I = Sels.begin(), E = Sels.end(); I != E; ++I) {
-    IntrinsicInst *Sel = *I;
-
-    // Index of the "llvm.eh.catch.all.value" variable.
-    unsigned OpIdx = Sel->getNumArgOperands() - 1;
-    GlobalVariable *GV = dyn_cast<GlobalVariable>(Sel->getArgOperand(OpIdx));
-    if (GV != EHCatchAllValue) continue;
-    Sel->setArgOperand(OpIdx, EHCatchAllValue->getInitializer());
-    Changed = true;
-  }
-
-  return Changed;
-}
-
-/// FindSelectorAndURoR - Find the eh.selector call associated with the
-/// eh.exception call. And indicate if there is a URoR "invoke" associated with
-/// the eh.exception call. This recursively looks past instructions which don't
-/// change the EH pointer value, like casts or PHI nodes.
-bool
-DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
-                                    SmallPtrSet<IntrinsicInst*, 8> &SelCalls,
-                                    SmallPtrSet<PHINode*, 32> &SeenPHIs) {
-  bool Changed = false;
-
-  for (Value::use_iterator
-         I = Inst->use_begin(), E = Inst->use_end(); I != E; ++I) {
-    Instruction *II = dyn_cast<Instruction>(*I);
-    if (!II || II->getParent()->getParent() != F) continue;
-    
-    if (IntrinsicInst *Sel = dyn_cast<IntrinsicInst>(II)) {
-      if (Sel->getIntrinsicID() == Intrinsic::eh_selector)
-        SelCalls.insert(Sel);
-    } else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(II)) {
-      if (Invoke->getCalledFunction() == URoR)
-        URoRInvoke = true;
-    } else if (CastInst *CI = dyn_cast<CastInst>(II)) {
-      Changed |= FindSelectorAndURoR(CI, URoRInvoke, SelCalls, SeenPHIs);
-    } else if (PHINode *PN = dyn_cast<PHINode>(II)) {
-      if (SeenPHIs.insert(PN))
-        // Don't process a PHI node more than once.
-        Changed |= FindSelectorAndURoR(PN, URoRInvoke, SelCalls, SeenPHIs);
-    }
-  }
-
-  return Changed;
-}
-
-/// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" or
-/// "_Unwind_SjLj_Resume" calls. The "unwind" part of these invokes jump to a
-/// landing pad within the current function. This is a candidate to merge the
-/// selector associated with the URoR invoke with the one from the URoR's
-/// landing pad.
-bool DwarfEHPrepare::HandleURoRInvokes() {
-  if (!EHCatchAllValue) {
-    EHCatchAllValue =
-      F->getParent()->getNamedGlobal("llvm.eh.catch.all.value");
-    if (!EHCatchAllValue) return false;
-  }
-
-  if (!SelectorIntrinsic) {
-    SelectorIntrinsic =
-      Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_selector);
-    if (!SelectorIntrinsic) return false;
-  }
-
-  SmallPtrSet<IntrinsicInst*, 32> Sels;
-  SmallPtrSet<IntrinsicInst*, 32> CatchAllSels;
-  FindAllCleanupSelectors(Sels, CatchAllSels);
-
-  if (!URoR) {
-    URoR = F->getParent()->getFunction("_Unwind_Resume_or_Rethrow");
-    if (!URoR) return CleanupSelectors(CatchAllSels);
-  }
-
-  SmallPtrSet<InvokeInst*, 32> URoRInvokes;
-  FindAllURoRInvokes(URoRInvokes);
-
-  SmallPtrSet<IntrinsicInst*, 32> SelsToConvert;
-
-  for (SmallPtrSet<IntrinsicInst*, 32>::iterator
-         SI = Sels.begin(), SE = Sels.end(); SI != SE; ++SI) {
-    const BasicBlock *SelBB = (*SI)->getParent();
-    for (SmallPtrSet<InvokeInst*, 32>::iterator
-           UI = URoRInvokes.begin(), UE = URoRInvokes.end(); UI != UE; ++UI) {
-      const BasicBlock *URoRBB = (*UI)->getParent();
-      if (DT->dominates(SelBB, URoRBB)) {
-        SelsToConvert.insert(*SI);
-        break;
+/// GetExceptionObject - Return the exception object from the value passed into
+/// the 'resume' instruction (typically an aggregate). Clean up any dead
+/// instructions, including the 'resume' instruction.
+Instruction *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) {
+  Value *V = RI->getOperand(0);
+  Instruction *ExnObj = 0;
+  InsertValueInst *SelIVI = dyn_cast<InsertValueInst>(V);
+  LoadInst *SelLoad = 0;
+  InsertValueInst *ExcIVI = 0;
+  bool EraseIVIs = false;
+
+  if (SelIVI) {
+    if (SelIVI->getNumIndices() == 1 && *SelIVI->idx_begin() == 1) {
+      ExcIVI = dyn_cast<InsertValueInst>(SelIVI->getOperand(0));
+      if (ExcIVI && isa<UndefValue>(ExcIVI->getOperand(0)) &&
+          ExcIVI->getNumIndices() == 1 && *ExcIVI->idx_begin() == 0) {
+        ExnObj = cast<Instruction>(ExcIVI->getOperand(1));
+        SelLoad = dyn_cast<LoadInst>(SelIVI->getOperand(1));
+        EraseIVIs = true;
       }
     }
   }
 
-  bool Changed = false;
-
-  if (Sels.size() != SelsToConvert.size()) {
-    // If we haven't been able to convert all of the clean-up selectors, then
-    // loop through the slow way to see if they still need to be converted.
-    if (!ExceptionValueIntrinsic) {
-      ExceptionValueIntrinsic =
-        Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_exception);
-      if (!ExceptionValueIntrinsic)
-        return CleanupSelectors(CatchAllSels);
-    }
-
-    for (Value::use_iterator
-           I = ExceptionValueIntrinsic->use_begin(),
-           E = ExceptionValueIntrinsic->use_end(); I != E; ++I) {
-      IntrinsicInst *EHPtr = dyn_cast<IntrinsicInst>(*I);
-      if (!EHPtr || EHPtr->getParent()->getParent() != F) continue;
-
-      bool URoRInvoke = false;
-      SmallPtrSet<IntrinsicInst*, 8> SelCalls;
-      SmallPtrSet<PHINode*, 32> SeenPHIs;
-      Changed |= FindSelectorAndURoR(EHPtr, URoRInvoke, SelCalls, SeenPHIs);
-
-      if (URoRInvoke) {
-        // This EH pointer is being used by an invoke of an URoR instruction and
-        // an eh.selector intrinsic call. If the eh.selector is a 'clean-up', we
-        // need to convert it to a 'catch-all'.
-        for (SmallPtrSet<IntrinsicInst*, 8>::iterator
-               SI = SelCalls.begin(), SE = SelCalls.end(); SI != SE; ++SI)
-          if (!HasCatchAllInSelector(*SI))
-              SelsToConvert.insert(*SI);
-      }
-    }
-  }
-
-  if (!SelsToConvert.empty()) {
-    // Convert all clean-up eh.selectors, which are associated with "invokes" of
-    // URoR calls, into catch-all eh.selectors.
-    Changed = true;
-
-    for (SmallPtrSet<IntrinsicInst*, 8>::iterator
-           SI = SelsToConvert.begin(), SE = SelsToConvert.end();
-         SI != SE; ++SI) {
-      IntrinsicInst *II = *SI;
-
-      // Use the exception object pointer and the personality function
-      // from the original selector.
-      CallSite CS(II);
-      IntrinsicInst::op_iterator I = CS.arg_begin();
-      IntrinsicInst::op_iterator E = CS.arg_end();
-      IntrinsicInst::op_iterator B = prior(E);
-
-      // Exclude last argument if it is an integer.
-      if (isa<ConstantInt>(B)) E = B;
+  if (!ExnObj)
+    ExnObj = ExtractValueInst::Create(RI->getOperand(0), 0, "exn.obj", RI);
 
-      // Add exception object pointer (front).
-      // Add personality function (next).
-      // Add in any filter IDs (rest).
-      SmallVector<Value*, 8> Args(I, E);
+  RI->eraseFromParent();
 
-      Args.push_back(EHCatchAllValue->getInitializer()); // Catch-all indicator.
-
-      CallInst *NewSelector =
-        CallInst::Create(SelectorIntrinsic, Args, "eh.sel.catch.all", II);
-
-      NewSelector->setTailCall(II->isTailCall());
-      NewSelector->setAttributes(II->getAttributes());
-      NewSelector->setCallingConv(II->getCallingConv());
-
-      II->replaceAllUsesWith(NewSelector);
-      II->eraseFromParent();
-    }
+  if (EraseIVIs) {
+    if (SelIVI->getNumUses() == 0)
+      SelIVI->eraseFromParent();
+    if (ExcIVI->getNumUses() == 0)
+      ExcIVI->eraseFromParent();
+    if (SelLoad && SelLoad->getNumUses() == 0)
+      SelLoad->eraseFromParent();
   }
 
-  Changed |= CleanupSelectors(CatchAllSels);
-  return Changed;
-}
-
-/// NormalizeLandingPads - Normalize and discover landing pads, noting them
-/// in the LandingPads set.  A landing pad is normal if the only CFG edges
-/// that end at it are unwind edges from invoke instructions. If we inlined
-/// through an invoke we could have a normal branch from the previous
-/// unwind block through to the landing pad for the original invoke.
-/// Abnormal landing pads are fixed up by redirecting all unwind edges to
-/// a new basic block which falls through to the original.
-bool DwarfEHPrepare::NormalizeLandingPads() {
-  bool Changed = false;
-
-  const MCAsmInfo *MAI = TM->getMCAsmInfo();
-  bool usingSjLjEH = MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
-
-  for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
-    TerminatorInst *TI = I->getTerminator();
-    if (!isa<InvokeInst>(TI))
-      continue;
-    BasicBlock *LPad = TI->getSuccessor(1);
-    // Skip landing pads that have already been normalized.
-    if (LandingPads.count(LPad))
-      continue;
-
-    // Check that only invoke unwind edges end at the landing pad.
-    bool OnlyUnwoundTo = true;
-    bool SwitchOK = usingSjLjEH;
-    for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad);
-         PI != PE; ++PI) {
-      TerminatorInst *PT = (*PI)->getTerminator();
-      // The SjLj dispatch block uses a switch instruction. This is effectively
-      // an unwind edge, so we can disregard it here. There will only ever
-      // be one dispatch, however, so if there are multiple switches, one
-      // of them truly is a normal edge, not an unwind edge.
-      if (SwitchOK && isa<SwitchInst>(PT)) {
-        SwitchOK = false;
-        continue;
-      }
-      if (!isa<InvokeInst>(PT) || LPad == PT->getSuccessor(0)) {
-        OnlyUnwoundTo = false;
-        break;
-      }
-    }
-
-    if (OnlyUnwoundTo) {
-      // Only unwind edges lead to the landing pad.  Remember the landing pad.
-      LandingPads.insert(LPad);
-      continue;
-    }
-
-    // At least one normal edge ends at the landing pad.  Redirect the unwind
-    // edges to a new basic block which falls through into this one.
-
-    // Create the new basic block.
-    BasicBlock *NewBB = BasicBlock::Create(F->getContext(),
-                                           LPad->getName() + "_unwind_edge");
-
-    // Insert it into the function right before the original landing pad.
-    LPad->getParent()->getBasicBlockList().insert(LPad, NewBB);
-
-    // Redirect unwind edges from the original landing pad to NewBB.
-    for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad); PI != PE; ) {
-      TerminatorInst *PT = (*PI++)->getTerminator();
-      if (isa<InvokeInst>(PT) && PT->getSuccessor(1) == LPad)
-        // Unwind to the new block.
-        PT->setSuccessor(1, NewBB);
-    }
-
-    // If there are any PHI nodes in LPad, we need to update them so that they
-    // merge incoming values from NewBB instead.
-    for (BasicBlock::iterator II = LPad->begin(); isa<PHINode>(II); ++II) {
-      PHINode *PN = cast<PHINode>(II);
-      pred_iterator PB = pred_begin(NewBB), PE = pred_end(NewBB);
-
-      // Check to see if all of the values coming in via unwind edges are the
-      // same.  If so, we don't need to create a new PHI node.
-      Value *InVal = PN->getIncomingValueForBlock(*PB);
-      for (pred_iterator PI = PB; PI != PE; ++PI) {
-        if (PI != PB && InVal != PN->getIncomingValueForBlock(*PI)) {
-          InVal = 0;
-          break;
-        }
-      }
-
-      if (InVal == 0) {
-        // Different unwind edges have different values.  Create a new PHI node
-        // in NewBB.
-        PHINode *NewPN = PHINode::Create(PN->getType(),
-                                         PN->getNumIncomingValues(),
-                                         PN->getName()+".unwind", NewBB);
-        // Add an entry for each unwind edge, using the value from the old PHI.
-        for (pred_iterator PI = PB; PI != PE; ++PI)
-          NewPN->addIncoming(PN->getIncomingValueForBlock(*PI), *PI);
-
-        // Now use this new PHI as the common incoming value for NewBB in PN.
-        InVal = NewPN;
-      }
-
-      // Revector exactly one entry in the PHI node to come from NewBB
-      // and delete all other entries that come from unwind edges.  If
-      // there are both normal and unwind edges from the same predecessor,
-      // this leaves an entry for the normal edge.
-      for (pred_iterator PI = PB; PI != PE; ++PI)
-        PN->removeIncomingValue(*PI);
-      PN->addIncoming(InVal, NewBB);
-    }
-
-    // Add a fallthrough from NewBB to the original landing pad.
-    BranchInst::Create(LPad, NewBB);
-
-    // Now update DominatorTree analysis information.
-    DT->splitBlock(NewBB);
-
-    // Remember the newly constructed landing pad.  The original landing pad
-    // LPad is no longer a landing pad now that all unwind edges have been
-    // revectored to NewBB.
-    LandingPads.insert(NewBB);
-    ++NumLandingPadsSplit;
-    Changed = true;
-  }
-
-  return Changed;
-}
-
-/// LowerUnwinds - Turn unwind instructions into calls to _Unwind_Resume,
-/// rethrowing any previously caught exception.  This will crash horribly
-/// at runtime if there is no such exception: using unwind to throw a new
-/// exception is currently not supported.
-bool DwarfEHPrepare::LowerUnwindsAndResumes() {
-  SmallVector<Instruction*, 16> ResumeInsts;
-
-  for (Function::iterator fi = F->begin(), fe = F->end(); fi != fe; ++fi) {
-    for (BasicBlock::iterator bi = fi->begin(), be = fi->end(); bi != be; ++bi){
-      if (isa<UnwindInst>(bi))
-        ResumeInsts.push_back(bi);
-      else if (CallInst *call = dyn_cast<CallInst>(bi))
-        if (Function *fn = dyn_cast<Function>(call->getCalledValue()))
-          if (fn->getName() == "llvm.eh.resume")
-            ResumeInsts.push_back(bi);
-    }
-  }
-
-  if (ResumeInsts.empty()) return false;
-
-  // Find the rewind function if we didn't already.
-  if (!RewindFunction) {
-    LLVMContext &Ctx = ResumeInsts[0]->getContext();
-    FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx),
-                                          Type::getInt8PtrTy(Ctx), false);
-    const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME);
-    RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy);
-  }
-
-  bool Changed = false;
-
-  for (SmallVectorImpl<Instruction*>::iterator
-         I = ResumeInsts.begin(), E = ResumeInsts.end(); I != E; ++I) {
-    Instruction *RI = *I;
-
-    // Replace the resuming instruction with a call to _Unwind_Resume (or the
-    // appropriate target equivalent).
-
-    llvm::Value *ExnValue;
-    if (isa<UnwindInst>(RI))
-      ExnValue = CreateExceptionValueCall(RI->getParent());
-    else
-      ExnValue = cast<CallInst>(RI)->getArgOperand(0);
-
-    // Create the call...
-    CallInst *CI = CallInst::Create(RewindFunction, ExnValue, "", RI);
-    CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME));
-
-    // ...followed by an UnreachableInst, if it was an unwind.
-    // Calls to llvm.eh.resume are typically already followed by this.
-    if (isa<UnwindInst>(RI))
-      new UnreachableInst(RI->getContext(), RI);
-
-    if (isa<UnwindInst>(RI))
-      ++NumUnwindsLowered;
-    else
-      ++NumResumesLowered;
-
-    // Nuke the resume instruction.
-    RI->eraseFromParent();
-
-    Changed = true;
-  }
-
-  return Changed;
-}
-
-/// MoveExceptionValueCalls - Ensure that eh.exception is only ever called from
-/// landing pads by replacing calls outside of landing pads with direct use of
-/// a register holding the appropriate value; this requires adding calls inside
-/// all landing pads to initialize the register.  Also, move eh.exception calls
-/// inside landing pads to the start of the landing pad (optional, but may make
-/// things simpler for later passes).
-bool DwarfEHPrepare::MoveExceptionValueCalls() {
-  // If the eh.exception intrinsic is not declared in the module then there is
-  // nothing to do.  Speed up compilation by checking for this common case.
-  if (!ExceptionValueIntrinsic &&
-      !F->getParent()->getFunction(Intrinsic::getName(Intrinsic::eh_exception)))
-    return false;
-
-  bool Changed = false;
-
-  // Move calls to eh.exception that are inside a landing pad to the start of
-  // the landing pad.
-  for (BBSet::const_iterator LI = LandingPads.begin(), LE = LandingPads.end();
-       LI != LE; ++LI) {
-    BasicBlock *LP = *LI;
-    for (BasicBlock::iterator II = LP->getFirstNonPHIOrDbg(), IE = LP->end();
-         II != IE;)
-      if (EHExceptionInst *EI = dyn_cast<EHExceptionInst>(II++)) {
-        // Found a call to eh.exception.
-        if (!EI->use_empty()) {
-          // If there is already a call to eh.exception at the start of the
-          // landing pad, then get hold of it; otherwise create such a call.
-          Value *CallAtStart = CreateExceptionValueCall(LP);
-
-          // If the call was at the start of a landing pad then leave it alone.
-          if (EI == CallAtStart)
-            continue;
-          EI->replaceAllUsesWith(CallAtStart);
-        }
-        EI->eraseFromParent();
-        ++NumExceptionValuesMoved;
-        Changed = true;
-      }
-  }
-
-  // Look for calls to eh.exception that are not in a landing pad.  If one is
-  // found, then a register that holds the exception value will be created in
-  // each landing pad, and the SSAUpdater will be used to compute the values
-  // returned by eh.exception calls outside of landing pads.
-  SSAUpdater SSA;
-
-  // Remember where we found the eh.exception call, to avoid rescanning earlier
-  // basic blocks which we already know contain no eh.exception calls.
-  bool FoundCallOutsideLandingPad = false;
-  Function::iterator BB = F->begin();
-  for (Function::iterator BE = F->end(); BB != BE; ++BB) {
-    // Skip over landing pads.
-    if (LandingPads.count(BB))
-      continue;
-
-    for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
-         II != IE; ++II)
-      if (isa<EHExceptionInst>(II)) {
-        SSA.Initialize(II->getType(), II->getName());
-        FoundCallOutsideLandingPad = true;
-        break;
-      }
-
-    if (FoundCallOutsideLandingPad)
-      break;
-  }
-
-  // If all calls to eh.exception are in landing pads then we are done.
-  if (!FoundCallOutsideLandingPad)
-    return Changed;
-
-  // Add a call to eh.exception at the start of each landing pad, and tell the
-  // SSAUpdater that this is the value produced by the landing pad.
-  for (BBSet::iterator LI = LandingPads.begin(), LE = LandingPads.end();
-       LI != LE; ++LI)
-    SSA.AddAvailableValue(*LI, CreateExceptionValueCall(*LI));
-
-  // Now turn all calls to eh.exception that are not in a landing pad into a use
-  // of the appropriate register.
-  for (Function::iterator BE = F->end(); BB != BE; ++BB) {
-    // Skip over landing pads.
-    if (LandingPads.count(BB))
-      continue;
-
-    for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
-         II != IE;)
-      if (EHExceptionInst *EI = dyn_cast<EHExceptionInst>(II++)) {
-        // Found a call to eh.exception, replace it with the value from any
-        // upstream landing pad(s).
-        EI->replaceAllUsesWith(SSA.GetValueAtEndOfBlock(BB));
-        EI->eraseFromParent();
-        ++NumExceptionValuesMoved;
-      }
-  }
-
-  return true;
-}
-
-/// CreateExceptionValueCall - Insert a call to the eh.exception intrinsic at
-/// the start of the basic block (unless there already is one, in which case
-/// the existing call is returned).
-Instruction *DwarfEHPrepare::CreateExceptionValueCall(BasicBlock *BB) {
-  Instruction *Start = BB->getFirstNonPHIOrDbg();
-  // Is this a call to eh.exception?
-  if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Start))
-    if (CI->getIntrinsicID() == Intrinsic::eh_exception)
-      // Reuse the existing call.
-      return Start;
-
-  // Find the eh.exception intrinsic if we didn't already.
-  if (!ExceptionValueIntrinsic)
-    ExceptionValueIntrinsic = Intrinsic::getDeclaration(F->getParent(),
-                                                       Intrinsic::eh_exception);
-
-  // Create the call.
-  return CallInst::Create(ExceptionValueIntrinsic, "eh.value.call", Start);
+  return ExnObj;
 }
 
 /// InsertUnwindResumeCalls - Convert the ResumeInsts that are still present
 /// into calls to the appropriate _Unwind_Resume function.
-bool DwarfEHPrepare::InsertUnwindResumeCalls() {
+bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
   bool UsesNewEH = false;
   SmallVector<ResumeInst*, 16> Resumes;
-  for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
+  for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
     TerminatorInst *TI = I->getTerminator();
     if (ResumeInst *RI = dyn_cast<ResumeInst>(TI))
       Resumes.push_back(RI);
@@ -682,27 +127,45 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() {
     FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx),
                                           Type::getInt8PtrTy(Ctx), false);
     const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME);
-    RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy);
+    RewindFunction = Fn.getParent()->getOrInsertFunction(RewindName, FTy);
   }
 
   // Create the basic block where the _Unwind_Resume call will live.
-  LLVMContext &Ctx = F->getContext();
-  BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", F);
-  PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), Resumes.size(),
+  LLVMContext &Ctx = Fn.getContext();
+  unsigned ResumesSize = Resumes.size();
+
+  if (ResumesSize == 1) {
+    // Instead of creating a new BB and PHI node, just append the call to
+    // _Unwind_Resume to the end of the single resume block.
+    ResumeInst *RI = Resumes.front();
+    BasicBlock *UnwindBB = RI->getParent();
+    Instruction *ExnObj = GetExceptionObject(RI);
+
+    // Call the _Unwind_Resume function.
+    CallInst *CI = CallInst::Create(RewindFunction, ExnObj, "", UnwindBB);
+    CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME));
+
+    // We never expect _Unwind_Resume to return.
+    new UnreachableInst(Ctx, UnwindBB);
+    return true;
+  }
+
+  BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", &Fn);
+  PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesSize,
                                 "exn.obj", UnwindBB);
 
   // Extract the exception object from the ResumeInst and add it to the PHI node
   // that feeds the _Unwind_Resume call.
-  BasicBlock *UnwindBBDom = Resumes[0]->getParent();
   for (SmallVectorImpl<ResumeInst*>::iterator
          I = Resumes.begin(), E = Resumes.end(); I != E; ++I) {
     ResumeInst *RI = *I;
-    BranchInst::Create(UnwindBB, RI->getParent());
-    ExtractValueInst *ExnObj = ExtractValueInst::Create(RI->getOperand(0),
-                                                        0, "exn.obj", RI);
-    PN->addIncoming(ExnObj, RI->getParent());
-    UnwindBBDom = DT->findNearestCommonDominator(RI->getParent(), UnwindBBDom);
-    RI->eraseFromParent();
+    BasicBlock *Parent = RI->getParent();
+    BranchInst::Create(UnwindBB, Parent);
+
+    Instruction *ExnObj = GetExceptionObject(RI);
+    PN->addIncoming(ExnObj, Parent);
+
+    ++NumResumesLowered;
   }
 
   // Call the function.
@@ -711,40 +174,10 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() {
 
   // We never expect _Unwind_Resume to return.
   new UnreachableInst(Ctx, UnwindBB);
-
-  // Now update DominatorTree analysis information.
-  DT->addNewBlock(UnwindBB, UnwindBBDom);
   return true;
 }
 
 bool DwarfEHPrepare::runOnFunction(Function &Fn) {
-  bool Changed = false;
-
-  // Initialize internal state.
-  DT = &getAnalysis<DominatorTree>(); // FIXME: We won't need this with the new EH.
-  F = &Fn;
-
-  if (InsertUnwindResumeCalls()) {
-    // FIXME: The reset of this function can go once the new EH is done.
-    LandingPads.clear();
-    return true;
-  }
-
-  // Ensure that only unwind edges end at landing pads (a landing pad is a
-  // basic block where an invoke unwind edge ends).
-  Changed |= NormalizeLandingPads();
-
-  // Turn unwind instructions and eh.resume calls into libcalls.
-  Changed |= LowerUnwindsAndResumes();
-
-  // TODO: Move eh.selector calls to landing pads and combine them.
-
-  // Move eh.exception calls to landing pads.
-  Changed |= MoveExceptionValueCalls();
-
-  Changed |= HandleURoRInvokes();
-
-  LandingPads.clear();
-
+  bool Changed = InsertUnwindResumeCalls(Fn);
   return Changed;
 }
diff --git a/lib/CodeGen/ELF.h b/lib/CodeGen/ELF.h
deleted file mode 100644
index 5b634682cc87..000000000000
--- a/lib/CodeGen/ELF.h
+++ /dev/null
@@ -1,227 +0,0 @@
-//===-- lib/CodeGen/ELF.h - ELF constants and data structures ---*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This header contains common, non-processor-specific data structures and
-// constants for the ELF file format.
-//
-// The details of the ELF32 bits in this file are largely based on the Tool
-// Interface Standard (TIS) Executable and Linking Format (ELF) Specification
-// Version 1.2, May 1995. The ELF64 is based on HP/Intel definition of the
-// ELF-64 object file format document, Version 1.5 Draft 2 May 27, 1998
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef CODEGEN_ELF_H
-#define CODEGEN_ELF_H
-
-#include "llvm/CodeGen/BinaryObject.h"
-#include "llvm/CodeGen/MachineRelocation.h"
-#include "llvm/Support/ELF.h"
-#include "llvm/Support/DataTypes.h"
-
-namespace llvm {
-  class GlobalValue;
-
-  /// ELFSym - This struct contains information about each symbol that is
-  /// added to logical symbol table for the module.  This is eventually
-  /// turned into a real symbol table in the file.
-  struct ELFSym {
-
-    // ELF symbols are related to llvm ones by being one of the two llvm
-    // types, for the other ones (section, file, func) a null pointer is
-    // assumed by default.
-    union {
-      const GlobalValue *GV;  // If this is a pointer to a GV
-      const char *Ext;        // If this is a pointer to a named symbol
-    } Source;
-
-    // Describes from which source type this ELF symbol comes from,
-    // they can be GlobalValue, ExternalSymbol or neither.
-    enum {
-      isGV,      // The Source.GV field is valid.
-      isExtSym,  // The Source.ExtSym field is valid.
-      isOther    // Not a GlobalValue or External Symbol
-    };
-    unsigned SourceType;
-
-    bool isGlobalValue() const { return SourceType == isGV; }
-    bool isExternalSym() const { return SourceType == isExtSym; }
-
-    // getGlobalValue - If this is a global value which originated the
-    // elf symbol, return a reference to it.
-    const GlobalValue *getGlobalValue() const {
-      assert(SourceType == isGV && "This is not a global value");
-      return Source.GV;
-    }
-
-    // getExternalSym - If this is an external symbol which originated the
-    // elf symbol, return a reference to it.
-    const char *getExternalSymbol() const {
-      assert(SourceType == isExtSym && "This is not an external symbol");
-      return Source.Ext;
-    }
-
-    // getGV - From a global value return a elf symbol to represent it
-    static ELFSym *getGV(const GlobalValue *GV, unsigned Bind,
-                         unsigned Type, unsigned Visibility) {
-      ELFSym *Sym = new ELFSym();
-      Sym->Source.GV = GV;
-      Sym->setBind(Bind);
-      Sym->setType(Type);
-      Sym->setVisibility(Visibility);
-      Sym->SourceType = isGV;
-      return Sym;
-    }
-
-    // getExtSym - Create and return an elf symbol to represent an
-    // external symbol
-    static ELFSym *getExtSym(const char *Ext) {
-      ELFSym *Sym = new ELFSym();
-      Sym->Source.Ext = Ext;
-      Sym->setBind(ELF::STB_GLOBAL);
-      Sym->setType(ELF::STT_NOTYPE);
-      Sym->setVisibility(ELF::STV_DEFAULT);
-      Sym->SourceType = isExtSym;
-      return Sym;
-    }
-
-    // getSectionSym - Returns a elf symbol to represent an elf section
-    static ELFSym *getSectionSym() {
-      ELFSym *Sym = new ELFSym();
-      Sym->setBind(ELF::STB_LOCAL);
-      Sym->setType(ELF::STT_SECTION);
-      Sym->setVisibility(ELF::STV_DEFAULT);
-      Sym->SourceType = isOther;
-      return Sym;
-    }
-
-    // getFileSym - Returns a elf symbol to represent the module identifier
-    static ELFSym *getFileSym() {
-      ELFSym *Sym = new ELFSym();
-      Sym->setBind(ELF::STB_LOCAL);
-      Sym->setType(ELF::STT_FILE);
-      Sym->setVisibility(ELF::STV_DEFAULT);
-      Sym->SectionIdx = 0xfff1;  // ELFSection::SHN_ABS;
-      Sym->SourceType = isOther;
-      return Sym;
-    }
-
-    // getUndefGV - Returns a STT_NOTYPE symbol
-    static ELFSym *getUndefGV(const GlobalValue *GV, unsigned Bind) {
-      ELFSym *Sym = new ELFSym();
-      Sym->Source.GV = GV;
-      Sym->setBind(Bind);
-      Sym->setType(ELF::STT_NOTYPE);
-      Sym->setVisibility(ELF::STV_DEFAULT);
-      Sym->SectionIdx = 0;  //ELFSection::SHN_UNDEF;
-      Sym->SourceType = isGV;
-      return Sym;
-    }
-
-    // ELF specific fields
-    unsigned NameIdx;         // Index in .strtab of name, once emitted.
-    uint64_t Value;
-    unsigned Size;
-    uint8_t Info;
-    uint8_t Other;
-    unsigned short SectionIdx;
-
-    // Symbol index into the Symbol table
-    unsigned SymTabIdx;
-
-    ELFSym() : SourceType(isOther), NameIdx(0), Value(0),
-               Size(0), Info(0), Other(ELF::STV_DEFAULT), SectionIdx(0),
-               SymTabIdx(0) {}
-
-    unsigned getBind() const { return (Info >> 4) & 0xf; }
-    unsigned getType() const { return Info & 0xf; }
-    bool isLocalBind() const { return getBind() == ELF::STB_LOCAL; }
-    bool isFileType() const { return getType() == ELF::STT_FILE; }
-
-    void setBind(unsigned X) {
-      assert(X == (X & 0xF) && "Bind value out of range!");
-      Info = (Info & 0x0F) | (X << 4);
-    }
-
-    void setType(unsigned X) {
-      assert(X == (X & 0xF) && "Type value out of range!");
-      Info = (Info & 0xF0) | X;
-    }
-
-    void setVisibility(unsigned V) {
-      assert(V == (V & 0x3) && "Visibility value out of range!");
-      Other = V;
-    }
-  };
-
-  /// ELFSection - This struct contains information about each section that is
-  /// emitted to the file.  This is eventually turned into the section header
-  /// table at the end of the file.
-  class ELFSection : public BinaryObject {
-    public:
-    // ELF specific fields
-    unsigned NameIdx;   // sh_name - .shstrtab idx of name, once emitted.
-    unsigned Type;      // sh_type - Section contents & semantics 
-    unsigned Flags;     // sh_flags - Section flags.
-    uint64_t Addr;      // sh_addr - The mem addr this section is in.
-    unsigned Offset;    // sh_offset - Offset from the file start
-    unsigned Size;      // sh_size - The section size.
-    unsigned Link;      // sh_link - Section header table index link.
-    unsigned Info;      // sh_info - Auxiliary information.
-    unsigned Align;     // sh_addralign - Alignment of section.
-    unsigned EntSize;   // sh_entsize - Size of entries in the section e
-
-    /// SectionIdx - The number of the section in the Section Table.
-    unsigned short SectionIdx;
-
-    /// Sym - The symbol to represent this section if it has one.
-    ELFSym *Sym;
-
-    /// getSymIndex - Returns the symbol table index of the symbol
-    /// representing this section.
-    unsigned getSymbolTableIndex() const {
-      assert(Sym && "section not present in the symbol table");
-      return Sym->SymTabIdx;
-    }
-
-    ELFSection(const std::string &name, bool isLittleEndian, bool is64Bit)
-      : BinaryObject(name, isLittleEndian, is64Bit), Type(0), Flags(0), Addr(0),
-        Offset(0), Size(0), Link(0), Info(0), Align(0), EntSize(0), Sym(0) {}
-  };
-
-  /// ELFRelocation - This class contains all the information necessary to
-  /// to generate any 32-bit or 64-bit ELF relocation entry.
-  class ELFRelocation {
-    uint64_t r_offset;    // offset in the section of the object this applies to
-    uint32_t r_symidx;    // symbol table index of the symbol to use
-    uint32_t r_type;      // machine specific relocation type
-    int64_t  r_add;       // explicit relocation addend
-    bool     r_rela;      // if true then the addend is part of the entry
-                          // otherwise the addend is at the location specified
-                          // by r_offset
-  public:
-    uint64_t getInfo(bool is64Bit) const {
-      if (is64Bit)
-        return ((uint64_t)r_symidx << 32) + ((uint64_t)r_type & 0xFFFFFFFFL);
-      else
-        return (r_symidx << 8)  + (r_type & 0xFFL);
-    }
-
-    uint64_t getOffset() const { return r_offset; }
-    int64_t getAddend() const { return r_add; }
-
-    ELFRelocation(uint64_t off, uint32_t sym, uint32_t type,
-                  bool rela = true, int64_t addend = 0) :
-      r_offset(off), r_symidx(sym), r_type(type),
-      r_add(addend), r_rela(rela) {}
-  };
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/CodeGen/ELFCodeEmitter.cpp b/lib/CodeGen/ELFCodeEmitter.cpp
deleted file mode 100644
index 660424c3c141..000000000000
--- a/lib/CodeGen/ELFCodeEmitter.cpp
+++ /dev/null
@@ -1,205 +0,0 @@
-//===-- lib/CodeGen/ELFCodeEmitter.cpp ------------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "elfce"
-
-#include "ELF.h"
-#include "ELFWriter.h"
-#include "ELFCodeEmitter.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/BinaryObject.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/CodeGen/MachineRelocation.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetELFWriterInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-
-//===----------------------------------------------------------------------===//
-//                       ELFCodeEmitter Implementation
-//===----------------------------------------------------------------------===//
-
-namespace llvm {
-
-/// startFunction - This callback is invoked when a new machine function is
-/// about to be emitted.
-void ELFCodeEmitter::startFunction(MachineFunction &MF) {
-  DEBUG(dbgs() << "processing function: "
-        << MF.getFunction()->getName() << "\n");
-
-  // Get the ELF Section that this function belongs in.
-  ES = &EW.getTextSection(MF.getFunction());
-
-  // Set the desired binary object to be used by the code emitters
-  setBinaryObject(ES);
-
-  // Get the function alignment in bytes
-  unsigned Align = (1 << MF.getAlignment());
-
-  // The function must start on its required alignment
-  ES->emitAlignment(Align);
-
-  // Update the section alignment if needed.
-  ES->Align = std::max(ES->Align, Align);
-
-  // Record the function start offset
-  FnStartOff = ES->getCurrentPCOffset();
-
-  // Emit constant pool and jump tables to their appropriate sections.
-  // They need to be emitted before the function because in some targets
-  // the later may reference JT or CP entry address.
-  emitConstantPool(MF.getConstantPool());
-  if (MF.getJumpTableInfo())
-    emitJumpTables(MF.getJumpTableInfo());
-}
-
-/// finishFunction - This callback is invoked after the function is completely
-/// finished.
-bool ELFCodeEmitter::finishFunction(MachineFunction &MF) {
-  // Add a symbol to represent the function.
-  const Function *F = MF.getFunction();
-  ELFSym *FnSym = ELFSym::getGV(F, EW.getGlobalELFBinding(F), ELF::STT_FUNC,
-                                EW.getGlobalELFVisibility(F));
-  FnSym->SectionIdx = ES->SectionIdx;
-  FnSym->Size = ES->getCurrentPCOffset()-FnStartOff;
-  EW.AddPendingGlobalSymbol(F, true);
-
-  // Offset from start of Section
-  FnSym->Value = FnStartOff;
-
-  if (!F->hasPrivateLinkage())
-    EW.SymbolList.push_back(FnSym);
-
-  // Patch up Jump Table Section relocations to use the real MBBs offsets
-  // now that the MBB label offsets inside the function are known.
-  if (MF.getJumpTableInfo()) {
-    ELFSection &JTSection = EW.getJumpTableSection();
-    for (std::vector<MachineRelocation>::iterator MRI = JTRelocations.begin(),
-         MRE = JTRelocations.end(); MRI != MRE; ++MRI) {
-      MachineRelocation &MR = *MRI;
-      uintptr_t MBBOffset = getMachineBasicBlockAddress(MR.getBasicBlock());
-      MR.setResultPointer((void*)MBBOffset);
-      MR.setConstantVal(ES->SectionIdx);
-      JTSection.addRelocation(MR);
-    }
-  }
-
-  // If we have emitted any relocations to function-specific objects such as
-  // basic blocks, constant pools entries, or jump tables, record their
-  // addresses now so that we can rewrite them with the correct addresses later
-  for (unsigned i = 0, e = Relocations.size(); i != e; ++i) {
-    MachineRelocation &MR = Relocations[i];
-    intptr_t Addr;
-    if (MR.isGlobalValue()) {
-      EW.AddPendingGlobalSymbol(MR.getGlobalValue());
-    } else if (MR.isExternalSymbol()) {
-      EW.AddPendingExternalSymbol(MR.getExternalSymbol());
-    } else if (MR.isBasicBlock()) {
-      Addr = getMachineBasicBlockAddress(MR.getBasicBlock());
-      MR.setConstantVal(ES->SectionIdx);
-      MR.setResultPointer((void*)Addr);
-    } else if (MR.isConstantPoolIndex()) {
-      Addr = getConstantPoolEntryAddress(MR.getConstantPoolIndex());
-      MR.setConstantVal(CPSections[MR.getConstantPoolIndex()]);
-      MR.setResultPointer((void*)Addr);
-    } else if (MR.isJumpTableIndex()) {
-      ELFSection &JTSection = EW.getJumpTableSection();
-      Addr = getJumpTableEntryAddress(MR.getJumpTableIndex());
-      MR.setConstantVal(JTSection.SectionIdx);
-      MR.setResultPointer((void*)Addr);
-    } else {
-      llvm_unreachable("Unhandled relocation type");
-    }
-    ES->addRelocation(MR);
-  }
-
-  // Clear per-function data structures.
-  JTRelocations.clear();
-  Relocations.clear();
-  CPLocations.clear();
-  CPSections.clear();
-  JTLocations.clear();
-  MBBLocations.clear();
-  return false;
-}
-
-/// emitConstantPool - For each constant pool entry, figure out which section
-/// the constant should live in and emit the constant
-void ELFCodeEmitter::emitConstantPool(MachineConstantPool *MCP) {
-  const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants();
-  if (CP.empty()) return;
-
-  // TODO: handle PIC codegen
-  assert(TM.getRelocationModel() != Reloc::PIC_ &&
-         "PIC codegen not yet handled for elf constant pools!");
-
-  for (unsigned i = 0, e = CP.size(); i != e; ++i) {
-    MachineConstantPoolEntry CPE = CP[i];
-
-    // Record the constant pool location and the section index
-    ELFSection &CstPool = EW.getConstantPoolSection(CPE);
-    CPLocations.push_back(CstPool.size());
-    CPSections.push_back(CstPool.SectionIdx);
-
-    if (CPE.isMachineConstantPoolEntry())
-      assert(0 && "CPE.isMachineConstantPoolEntry not supported yet");
-
-    // Emit the constant to constant pool section
-    EW.EmitGlobalConstant(CPE.Val.ConstVal, CstPool);
-  }
-}
-
-/// emitJumpTables - Emit all the jump tables for a given jump table info
-/// record to the appropriate section.
-void ELFCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) {
-  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
-  if (JT.empty()) return;
-
-  // FIXME: handle PIC codegen
-  assert(TM.getRelocationModel() != Reloc::PIC_ &&
-         "PIC codegen not yet handled for elf jump tables!");
-
-  const TargetELFWriterInfo *TEW = TM.getELFWriterInfo();
-  unsigned EntrySize = 4; //MJTI->getEntrySize();
-
-  // Get the ELF Section to emit the jump table
-  ELFSection &JTSection = EW.getJumpTableSection();
-
-  // For each JT, record its offset from the start of the section
-  for (unsigned i = 0, e = JT.size(); i != e; ++i) {
-    const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
-
-    // Record JT 'i' offset in the JT section
-    JTLocations.push_back(JTSection.size());
-
-    // Each MBB entry in the Jump table section has a relocation entry
-    // against the current text section.
-    for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) {
-      unsigned MachineRelTy = TEW->getAbsoluteLabelMachineRelTy();
-      MachineRelocation MR =
-        MachineRelocation::getBB(JTSection.size(), MachineRelTy, MBBs[mi]);
-
-      // Add the relocation to the Jump Table section
-      JTRelocations.push_back(MR);
-
-      // Output placeholder for MBB in the JT section
-      for (unsigned s=0; s < EntrySize; ++s)
-        JTSection.emitByte(0);
-    }
-  }
-}
-
-} // end namespace llvm
diff --git a/lib/CodeGen/ELFCodeEmitter.h b/lib/CodeGen/ELFCodeEmitter.h
deleted file mode 100644
index 8671c674eecf..000000000000
--- a/lib/CodeGen/ELFCodeEmitter.h
+++ /dev/null
@@ -1,78 +0,0 @@
-//===-- lib/CodeGen/ELFCodeEmitter.h ----------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ELFCODEEMITTER_H
-#define ELFCODEEMITTER_H
-
-#include "llvm/CodeGen/ObjectCodeEmitter.h"
-#include <vector>
-
-namespace llvm {
-  class ELFWriter;
-  class ELFSection;
-
-  /// ELFCodeEmitter - This class is used by the ELFWriter to 
-  /// emit the code for functions to the ELF file.
-  class ELFCodeEmitter : public ObjectCodeEmitter {
-    ELFWriter &EW;
-
-    /// Target machine description
-    TargetMachine &TM;
-
-    /// Section containing code for functions
-    ELFSection *ES;
-
-    /// Relocations - Record relocations needed by the current function 
-    std::vector<MachineRelocation> Relocations;
-
-    /// JTRelocations - Record relocations needed by the relocation
-    /// section.
-    std::vector<MachineRelocation> JTRelocations;
-
-    /// FnStartPtr - Function offset from the beginning of ELFSection 'ES'
-    uintptr_t FnStartOff;
-  public:
-    explicit ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM) {}
-
-    /// addRelocation - Register new relocations for this function
-    void addRelocation(const MachineRelocation &MR) {
-      Relocations.push_back(MR);
-    }
-
-    /// emitConstantPool - For each constant pool entry, figure out which
-    /// section the constant should live in and emit data to it
-    void emitConstantPool(MachineConstantPool *MCP);
-
-    /// emitJumpTables - Emit all the jump tables for a given jump table
-    /// info and record them to the appropriate section.
-    void emitJumpTables(MachineJumpTableInfo *MJTI);
-
-    void startFunction(MachineFunction &F);
-    bool finishFunction(MachineFunction &F);
-
-    /// emitLabel - Emits a label
-    virtual void emitLabel(MCSymbol *Label) {
-      assert(0 && "emitLabel not implemented");
-    }
-
-    /// getLabelAddress - Return the address of the specified LabelID, 
-    /// only usable after the LabelID has been emitted.
-    virtual uintptr_t getLabelAddress(MCSymbol *Label) const {
-      assert(0 && "getLabelAddress not implemented");
-      return 0;
-    }
-
-    virtual void setModuleInfo(llvm::MachineModuleInfo* MMI) {}
-
-};  // end class ELFCodeEmitter
-
-} // end namespace llvm
-
-#endif
-
diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp
deleted file mode 100644
index f2c218565854..000000000000
--- a/lib/CodeGen/ELFWriter.cpp
+++ /dev/null
@@ -1,1105 +0,0 @@
-//===-- ELFWriter.cpp - Target-independent ELF Writer code ----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the target-independent ELF writer.  This file writes out
-// the ELF file in the following order:
-//
-//  #1. ELF Header
-//  #2. '.text' section
-//  #3. '.data' section
-//  #4. '.bss' section  (conceptual position in file)
-//  ...
-//  #X. '.shstrtab' section
-//  #Y. Section Table
-//
-// The entries in the section table are laid out as:
-//  #0. Null entry [required]
-//  #1. ".text" entry - the program code
-//  #2. ".data" entry - global variables with initializers.     [ if needed ]
-//  #3. ".bss" entry  - global variables without initializers.  [ if needed ]
-//  ...
-//  #N. ".shstrtab" entry - String table for the section names.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "elfwriter"
-#include "ELF.h"
-#include "ELFWriter.h"
-#include "ELFCodeEmitter.h"
-#include "llvm/Constants.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/CodeGen/BinaryObject.h"
-#include "llvm/CodeGen/MachineCodeEmitter.h"
-#include "llvm/CodeGen/ObjectCodeEmitter.h"
-#include "llvm/CodeGen/MachineCodeEmitter.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCSectionELF.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetELFWriterInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallString.h"
-using namespace llvm;
-
-char ELFWriter::ID = 0;
-
-//===----------------------------------------------------------------------===//
-//                          ELFWriter Implementation
-//===----------------------------------------------------------------------===//
-
-ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm)
-  : MachineFunctionPass(ID), O(o), TM(tm),
-    OutContext(*new MCContext(*TM.getMCAsmInfo(), *TM.getRegisterInfo(),
-                              &TM.getTargetLowering()->getObjFileLowering())),
-    TLOF(TM.getTargetLowering()->getObjFileLowering()),
-    is64Bit(TM.getTargetData()->getPointerSizeInBits() == 64),
-    isLittleEndian(TM.getTargetData()->isLittleEndian()),
-    ElfHdr(isLittleEndian, is64Bit) {
-
-  MAI = TM.getMCAsmInfo();
-  TEW = TM.getELFWriterInfo();
-
-  // Create the object code emitter object for this target.
-  ElfCE = new ELFCodeEmitter(*this);
-
-  // Initial number of sections
-  NumSections = 0;
-}
-
-ELFWriter::~ELFWriter() {
-  delete ElfCE;
-  delete &OutContext;
-
-  while(!SymbolList.empty()) {
-    delete SymbolList.back(); 
-    SymbolList.pop_back();
-  }
-
-  while(!PrivateSyms.empty()) {
-    delete PrivateSyms.back(); 
-    PrivateSyms.pop_back();
-  }
-
-  while(!SectionList.empty()) {
-    delete SectionList.back(); 
-    SectionList.pop_back();
-  }
-
-  // Release the name mangler object.
-  delete Mang; Mang = 0;
-}
-
-// doInitialization - Emit the file header and all of the global variables for
-// the module to the ELF file.
-bool ELFWriter::doInitialization(Module &M) {
-  // Initialize TargetLoweringObjectFile.
-  const_cast<TargetLoweringObjectFile&>(TLOF).Initialize(OutContext, TM);
-  
-  Mang = new Mangler(OutContext, *TM.getTargetData());
-
-  // ELF Header
-  // ----------
-  // Fields e_shnum e_shstrndx are only known after all section have
-  // been emitted. They locations in the ouput buffer are recorded so
-  // to be patched up later.
-  //
-  // Note
-  // ----
-  // emitWord method behaves differently for ELF32 and ELF64, writing
-  // 4 bytes in the former and 8 in the last for *_off and *_addr elf types
-
-  ElfHdr.emitByte(0x7f); // e_ident[EI_MAG0]
-  ElfHdr.emitByte('E');  // e_ident[EI_MAG1]
-  ElfHdr.emitByte('L');  // e_ident[EI_MAG2]
-  ElfHdr.emitByte('F');  // e_ident[EI_MAG3]
-
-  ElfHdr.emitByte(TEW->getEIClass()); // e_ident[EI_CLASS]
-  ElfHdr.emitByte(TEW->getEIData());  // e_ident[EI_DATA]
-  ElfHdr.emitByte(ELF::EV_CURRENT);   // e_ident[EI_VERSION]
-  ElfHdr.emitAlignment(16);           // e_ident[EI_NIDENT-EI_PAD]
-
-  ElfHdr.emitWord16(ELF::ET_REL);        // e_type
-  ElfHdr.emitWord16(TEW->getEMachine()); // e_machine = target
-  ElfHdr.emitWord32(ELF::EV_CURRENT);    // e_version
-  ElfHdr.emitWord(0);                    // e_entry, no entry point in .o file
-  ElfHdr.emitWord(0);                    // e_phoff, no program header for .o
-  ELFHdr_e_shoff_Offset = ElfHdr.size();
-  ElfHdr.emitWord(0);                    // e_shoff = sec hdr table off in bytes
-  ElfHdr.emitWord32(TEW->getEFlags());   // e_flags = whatever the target wants
-  ElfHdr.emitWord16(TEW->getHdrSize());  // e_ehsize = ELF header size
-  ElfHdr.emitWord16(0);                  // e_phentsize = prog header entry size
-  ElfHdr.emitWord16(0);                  // e_phnum = # prog header entries = 0
-
-  // e_shentsize = Section header entry size
-  ElfHdr.emitWord16(TEW->getSHdrSize());
-
-  // e_shnum     = # of section header ents
-  ELFHdr_e_shnum_Offset = ElfHdr.size();
-  ElfHdr.emitWord16(0); // Placeholder
-
-  // e_shstrndx  = Section # of '.shstrtab'
-  ELFHdr_e_shstrndx_Offset = ElfHdr.size();
-  ElfHdr.emitWord16(0); // Placeholder
-
-  // Add the null section, which is required to be first in the file.
-  getNullSection();
-
-  // The first entry in the symtab is the null symbol and the second
-  // is a local symbol containing the module/file name
-  SymbolList.push_back(new ELFSym());
-  SymbolList.push_back(ELFSym::getFileSym());
-
-  return false;
-}
-
-// AddPendingGlobalSymbol - Add a global to be processed and to
-// the global symbol lookup, use a zero index because the table
-// index will be determined later.
-void ELFWriter::AddPendingGlobalSymbol(const GlobalValue *GV, 
-                                       bool AddToLookup /* = false */) {
-  PendingGlobals.insert(GV);
-  if (AddToLookup) 
-    GblSymLookup[GV] = 0;
-}
-
-// AddPendingExternalSymbol - Add the external to be processed
-// and to the external symbol lookup, use a zero index because
-// the symbol table index will be determined later.
-void ELFWriter::AddPendingExternalSymbol(const char *External) {
-  PendingExternals.insert(External);
-  ExtSymLookup[External] = 0;
-}
-
-ELFSection &ELFWriter::getDataSection() {
-  const MCSectionELF *Data = (const MCSectionELF *)TLOF.getDataSection();
-  return getSection(Data->getSectionName(), Data->getType(), 
-                    Data->getFlags(), 4);
-}
-
-ELFSection &ELFWriter::getBSSSection() {
-  const MCSectionELF *BSS = (const MCSectionELF *)TLOF.getBSSSection();
-  return getSection(BSS->getSectionName(), BSS->getType(), BSS->getFlags(), 4);
-}
-
-// getCtorSection - Get the static constructor section
-ELFSection &ELFWriter::getCtorSection() {
-  const MCSectionELF *Ctor = (const MCSectionELF *)TLOF.getStaticCtorSection();
-  return getSection(Ctor->getSectionName(), Ctor->getType(), Ctor->getFlags()); 
-}
-
-// getDtorSection - Get the static destructor section
-ELFSection &ELFWriter::getDtorSection() {
-  const MCSectionELF *Dtor = (const MCSectionELF *)TLOF.getStaticDtorSection();
-  return getSection(Dtor->getSectionName(), Dtor->getType(), Dtor->getFlags());
-}
-
-// getTextSection - Get the text section for the specified function
-ELFSection &ELFWriter::getTextSection(const Function *F) {
-  const MCSectionELF *Text = 
-    (const MCSectionELF *)TLOF.SectionForGlobal(F, Mang, TM);
-  return getSection(Text->getSectionName(), Text->getType(), Text->getFlags());
-}
-
-// getJumpTableSection - Get a read only section for constants when 
-// emitting jump tables. TODO: add PIC support
-ELFSection &ELFWriter::getJumpTableSection() {
-  const MCSectionELF *JT = 
-    (const MCSectionELF *)TLOF.getSectionForConstant(SectionKind::getReadOnly());
-  return getSection(JT->getSectionName(), JT->getType(), JT->getFlags(),
-                    TM.getTargetData()->getPointerABIAlignment());
-}
-
-// getConstantPoolSection - Get a constant pool section based on the machine 
-// constant pool entry type and relocation info.
-ELFSection &ELFWriter::getConstantPoolSection(MachineConstantPoolEntry &CPE) {
-  SectionKind Kind;
-  switch (CPE.getRelocationInfo()) {
-  default: llvm_unreachable("Unknown section kind");
-  case 2: Kind = SectionKind::getReadOnlyWithRel(); break;
-  case 1:
-    Kind = SectionKind::getReadOnlyWithRelLocal();
-    break;
-  case 0:
-    switch (TM.getTargetData()->getTypeAllocSize(CPE.getType())) {
-    case 4:  Kind = SectionKind::getMergeableConst4(); break;
-    case 8:  Kind = SectionKind::getMergeableConst8(); break;
-    case 16: Kind = SectionKind::getMergeableConst16(); break;
-    default: Kind = SectionKind::getMergeableConst(); break;
-    }
-  }
-
-  const MCSectionELF *CPSect = 
-    (const MCSectionELF *)TLOF.getSectionForConstant(Kind);
-  return getSection(CPSect->getSectionName(), CPSect->getType(), 
-                    CPSect->getFlags(), CPE.getAlignment());
-}
-
-// getRelocSection - Return the relocation section of section 'S'. 'RelA' 
-// is true if the relocation section contains entries with addends.
-ELFSection &ELFWriter::getRelocSection(ELFSection &S) {
-  unsigned SectionType = TEW->hasRelocationAddend() ?
-                ELF::SHT_RELA : ELF::SHT_REL;
-
-  std::string SectionName(".rel");
-  if (TEW->hasRelocationAddend())
-    SectionName.append("a");
-  SectionName.append(S.getName());
-
-  return getSection(SectionName, SectionType, 0, TEW->getPrefELFAlignment());
-}
-
-// getGlobalELFVisibility - Returns the ELF specific visibility type
-unsigned ELFWriter::getGlobalELFVisibility(const GlobalValue *GV) {
-  switch (GV->getVisibility()) {
-  default:
-    llvm_unreachable("unknown visibility type");
-  case GlobalValue::DefaultVisibility:
-    return ELF::STV_DEFAULT;
-  case GlobalValue::HiddenVisibility:
-    return ELF::STV_HIDDEN;
-  case GlobalValue::ProtectedVisibility:
-    return ELF::STV_PROTECTED;
-  }
-  return 0;
-}
-
-// getGlobalELFBinding - Returns the ELF specific binding type
-unsigned ELFWriter::getGlobalELFBinding(const GlobalValue *GV) {
-  if (GV->hasInternalLinkage())
-    return ELF::STB_LOCAL;
-
-  if (GV->isWeakForLinker() && !GV->hasCommonLinkage())
-    return ELF::STB_WEAK;
-
-  return ELF::STB_GLOBAL;
-}
-
-// getGlobalELFType - Returns the ELF specific type for a global
-unsigned ELFWriter::getGlobalELFType(const GlobalValue *GV) {
-  if (GV->isDeclaration())
-    return ELF::STT_NOTYPE;
-
-  if (isa<Function>(GV))
-    return ELF::STT_FUNC;
-
-  return ELF::STT_OBJECT;
-}
-
-// IsELFUndefSym - True if the global value must be marked as a symbol
-// which points to a SHN_UNDEF section. This means that the symbol has
-// no definition on the module.
-static bool IsELFUndefSym(const GlobalValue *GV) {
-  return GV->isDeclaration() || (isa<Function>(GV));
-}
-
-// AddToSymbolList - Update the symbol lookup and If the symbol is 
-// private add it to PrivateSyms list, otherwise to SymbolList. 
-void ELFWriter::AddToSymbolList(ELFSym *GblSym) {
-  assert(GblSym->isGlobalValue() && "Symbol must be a global value");
-
-  const GlobalValue *GV = GblSym->getGlobalValue(); 
-  if (GV->hasPrivateLinkage()) {
-    // For a private symbols, keep track of the index inside 
-    // the private list since it will never go to the symbol 
-    // table and won't be patched up later.
-    PrivateSyms.push_back(GblSym);
-    GblSymLookup[GV] = PrivateSyms.size()-1;
-  } else {
-    // Non private symbol are left with zero indices until 
-    // they are patched up during the symbol table emition 
-    // (where the indicies are created).
-    SymbolList.push_back(GblSym);
-    GblSymLookup[GV] = 0;
-  }
-}
-
-/// HasCommonSymbols - True if this section holds common symbols, this is
-/// indicated on the ELF object file by a symbol with SHN_COMMON section
-/// header index.
-static bool HasCommonSymbols(const MCSectionELF &S) {
-  // FIXME: this is wrong, a common symbol can be in .data for example.
-  if (StringRef(S.getSectionName()).startswith(".gnu.linkonce."))
-    return true;
-
-  return false;
-}
-
-
-// EmitGlobal - Choose the right section for global and emit it
-void ELFWriter::EmitGlobal(const GlobalValue *GV) {
-
-  // Check if the referenced symbol is already emitted
-  if (GblSymLookup.find(GV) != GblSymLookup.end())
-    return;
-
-  // Handle ELF Bind, Visibility and Type for the current symbol
-  unsigned SymBind = getGlobalELFBinding(GV);
-  unsigned SymType = getGlobalELFType(GV);
-  bool IsUndefSym = IsELFUndefSym(GV);
-
-  ELFSym *GblSym = IsUndefSym ? ELFSym::getUndefGV(GV, SymBind)
-    : ELFSym::getGV(GV, SymBind, SymType, getGlobalELFVisibility(GV));
-
-  if (!IsUndefSym) {
-    assert(isa<GlobalVariable>(GV) && "GV not a global variable!");
-    const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
-
-    // Handle special llvm globals
-    if (EmitSpecialLLVMGlobal(GVar))
-      return;
-
-    // Get the ELF section where this global belongs from TLOF
-    const MCSectionELF *S = 
-      (const MCSectionELF *)TLOF.SectionForGlobal(GV, Mang, TM);
-    ELFSection &ES = 
-      getSection(S->getSectionName(), S->getType(), S->getFlags());
-    SectionKind Kind = S->getKind();
-
-    // The symbol align should update the section alignment if needed
-    const TargetData *TD = TM.getTargetData();
-    unsigned Align = TD->getPreferredAlignment(GVar);
-    unsigned Size = TD->getTypeAllocSize(GVar->getInitializer()->getType());
-    GblSym->Size = Size;
-
-    if (HasCommonSymbols(*S)) { // Symbol must go to a common section
-      GblSym->SectionIdx = ELF::SHN_COMMON;
-
-      // A new linkonce section is created for each global in the
-      // common section, the default alignment is 1 and the symbol
-      // value contains its alignment.
-      ES.Align = 1;
-      GblSym->Value = Align;
-
-    } else if (Kind.isBSS() || Kind.isThreadBSS()) { // Symbol goes to BSS.
-      GblSym->SectionIdx = ES.SectionIdx;
-
-      // Update the size with alignment and the next object can
-      // start in the right offset in the section
-      if (Align) ES.Size = (ES.Size + Align-1) & ~(Align-1);
-      ES.Align = std::max(ES.Align, Align);
-
-      // GblSym->Value should contain the virtual offset inside the section.
-      // Virtual because the BSS space is not allocated on ELF objects
-      GblSym->Value = ES.Size;
-      ES.Size += Size;
-
-    } else { // The symbol must go to some kind of data section
-      GblSym->SectionIdx = ES.SectionIdx;
-
-      // GblSym->Value should contain the symbol offset inside the section,
-      // and all symbols should start on their required alignment boundary
-      ES.Align = std::max(ES.Align, Align);
-      ES.emitAlignment(Align);
-      GblSym->Value = ES.size();
-
-      // Emit the global to the data section 'ES'
-      EmitGlobalConstant(GVar->getInitializer(), ES);
-    }
-  }
-
-  AddToSymbolList(GblSym);
-}
-
-void ELFWriter::EmitGlobalConstantStruct(const ConstantStruct *CVS,
-                                         ELFSection &GblS) {
-
-  // Print the fields in successive locations. Pad to align if needed!
-  const TargetData *TD = TM.getTargetData();
-  unsigned Size = TD->getTypeAllocSize(CVS->getType());
-  const StructLayout *cvsLayout = TD->getStructLayout(CVS->getType());
-  uint64_t sizeSoFar = 0;
-  for (unsigned i = 0, e = CVS->getNumOperands(); i != e; ++i) {
-    const Constant* field = CVS->getOperand(i);
-
-    // Check if padding is needed and insert one or more 0s.
-    uint64_t fieldSize = TD->getTypeAllocSize(field->getType());
-    uint64_t padSize = ((i == e-1 ? Size : cvsLayout->getElementOffset(i+1))
-                        - cvsLayout->getElementOffset(i)) - fieldSize;
-    sizeSoFar += fieldSize + padSize;
-
-    // Now print the actual field value.
-    EmitGlobalConstant(field, GblS);
-
-    // Insert padding - this may include padding to increase the size of the
-    // current field up to the ABI size (if the struct is not packed) as well
-    // as padding to ensure that the next field starts at the right offset.
-    GblS.emitZeros(padSize);
-  }
-  assert(sizeSoFar == cvsLayout->getSizeInBytes() &&
-         "Layout of constant struct may be incorrect!");
-}
-
-void ELFWriter::EmitGlobalConstant(const Constant *CV, ELFSection &GblS) {
-  const TargetData *TD = TM.getTargetData();
-  unsigned Size = TD->getTypeAllocSize(CV->getType());
-
-  if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) {
-    for (unsigned i = 0, e = CVA->getNumOperands(); i != e; ++i)
-      EmitGlobalConstant(CVA->getOperand(i), GblS);
-    return;
-  } else if (isa<ConstantAggregateZero>(CV)) {
-    GblS.emitZeros(Size);
-    return;
-  } else if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) {
-    EmitGlobalConstantStruct(CVS, GblS);
-    return;
-  } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
-    APInt Val = CFP->getValueAPF().bitcastToAPInt();
-    if (CFP->getType()->isDoubleTy())
-      GblS.emitWord64(Val.getZExtValue());
-    else if (CFP->getType()->isFloatTy())
-      GblS.emitWord32(Val.getZExtValue());
-    else if (CFP->getType()->isX86_FP80Ty()) {
-      unsigned PadSize = TD->getTypeAllocSize(CFP->getType())-
-                         TD->getTypeStoreSize(CFP->getType());
-      GblS.emitWordFP80(Val.getRawData(), PadSize);
-    } else if (CFP->getType()->isPPC_FP128Ty())
-      llvm_unreachable("PPC_FP128Ty global emission not implemented");
-    return;
-  } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
-    if (Size == 1)
-      GblS.emitByte(CI->getZExtValue());
-    else if (Size == 2) 
-      GblS.emitWord16(CI->getZExtValue());
-    else if (Size == 4)
-      GblS.emitWord32(CI->getZExtValue());
-    else 
-      EmitGlobalConstantLargeInt(CI, GblS);
-    return;
-  } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
-    VectorType *PTy = CP->getType();
-    for (unsigned I = 0, E = PTy->getNumElements(); I < E; ++I)
-      EmitGlobalConstant(CP->getOperand(I), GblS);
-    return;
-  } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
-    // Resolve a constant expression which returns a (Constant, Offset)
-    // pair. If 'Res.first' is a GlobalValue, emit a relocation with 
-    // the offset 'Res.second', otherwise emit a global constant like
-    // it is always done for not contant expression types.
-    CstExprResTy Res = ResolveConstantExpr(CE);
-    const Constant *Op = Res.first;
-
-    if (isa<GlobalValue>(Op))
-      EmitGlobalDataRelocation(cast<const GlobalValue>(Op), 
-                               TD->getTypeAllocSize(Op->getType()), 
-                               GblS, Res.second);
-    else
-      EmitGlobalConstant(Op, GblS);
-
-    return;
-  } else if (CV->getType()->getTypeID() == Type::PointerTyID) {
-    // Fill the data entry with zeros or emit a relocation entry
-    if (isa<ConstantPointerNull>(CV))
-      GblS.emitZeros(Size);
-    else 
-      EmitGlobalDataRelocation(cast<const GlobalValue>(CV), 
-                               Size, GblS);
-    return;
-  } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
-    // This is a constant address for a global variable or function and
-    // therefore must be referenced using a relocation entry.
-    EmitGlobalDataRelocation(GV, Size, GblS);
-    return;
-  }
-
-  std::string msg;
-  raw_string_ostream ErrorMsg(msg);
-  ErrorMsg << "Constant unimp for type: " << *CV->getType();
-  report_fatal_error(ErrorMsg.str());
-}
-
-// ResolveConstantExpr - Resolve the constant expression until it stop
-// yielding other constant expressions.
-CstExprResTy ELFWriter::ResolveConstantExpr(const Constant *CV) {
-  const TargetData *TD = TM.getTargetData();
-  
-  // There ins't constant expression inside others anymore
-  if (!isa<ConstantExpr>(CV))
-    return std::make_pair(CV, 0);
-
-  const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
-  switch (CE->getOpcode()) {
-  case Instruction::BitCast:
-    return ResolveConstantExpr(CE->getOperand(0));
-  
-  case Instruction::GetElementPtr: {
-    const Constant *ptrVal = CE->getOperand(0);
-    SmallVector<Value*, 8> idxVec(CE->op_begin()+1, CE->op_end());
-    int64_t Offset = TD->getIndexedOffset(ptrVal->getType(), idxVec);
-    return std::make_pair(ptrVal, Offset);
-  }
-  case Instruction::IntToPtr: {
-    Constant *Op = CE->getOperand(0);
-    Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(CV->getContext()),
-                                      false/*ZExt*/);
-    return ResolveConstantExpr(Op);
-  }
-  case Instruction::PtrToInt: {
-    Constant *Op = CE->getOperand(0);
-    Type *Ty = CE->getType();
-
-    // We can emit the pointer value into this slot if the slot is an
-    // integer slot greater or equal to the size of the pointer.
-    if (TD->getTypeAllocSize(Ty) == TD->getTypeAllocSize(Op->getType()))
-      return ResolveConstantExpr(Op);
-
-    llvm_unreachable("Integer size less then pointer size");
-  }
-  case Instruction::Add:
-  case Instruction::Sub: {
-    // Only handle cases where there's a constant expression with GlobalValue
-    // as first operand and ConstantInt as second, which are the cases we can
-    // solve direclty using a relocation entry. GlobalValue=Op0, CstInt=Op1
-    // 1)  Instruction::Add  => (global) + CstInt
-    // 2)  Instruction::Sub  => (global) + -CstInt
-    const Constant *Op0 = CE->getOperand(0); 
-    const Constant *Op1 = CE->getOperand(1); 
-    assert(isa<ConstantInt>(Op1) && "Op1 must be a ConstantInt");
-
-    CstExprResTy Res = ResolveConstantExpr(Op0);
-    assert(isa<GlobalValue>(Res.first) && "Op0 must be a GlobalValue");
-
-    const APInt &RHS = cast<ConstantInt>(Op1)->getValue();
-    switch (CE->getOpcode()) {
-    case Instruction::Add: 
-      return std::make_pair(Res.first, RHS.getSExtValue());
-    case Instruction::Sub:
-      return std::make_pair(Res.first, (-RHS).getSExtValue());
-    }
-  }
-  }
-
-  report_fatal_error(CE->getOpcodeName() +
-                     StringRef(": Unsupported ConstantExpr type"));
-
-  return std::make_pair(CV, 0); // silence warning
-}
-
-void ELFWriter::EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size,
-                                         ELFSection &GblS, int64_t Offset) {
-  // Create the relocation entry for the global value
-  MachineRelocation MR =
-    MachineRelocation::getGV(GblS.getCurrentPCOffset(),
-                             TEW->getAbsoluteLabelMachineRelTy(),
-                             const_cast<GlobalValue*>(GV),
-                             Offset);
-
-  // Fill the data entry with zeros
-  GblS.emitZeros(Size);
-
-  // Add the relocation entry for the current data section
-  GblS.addRelocation(MR);
-}
-
-void ELFWriter::EmitGlobalConstantLargeInt(const ConstantInt *CI, 
-                                           ELFSection &S) {
-  const TargetData *TD = TM.getTargetData();
-  unsigned BitWidth = CI->getBitWidth();
-  assert(isPowerOf2_32(BitWidth) &&
-         "Non-power-of-2-sized integers not handled!");
-
-  const uint64_t *RawData = CI->getValue().getRawData();
-  uint64_t Val = 0;
-  for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) {
-    Val = (TD->isBigEndian()) ? RawData[e - i - 1] : RawData[i];
-    S.emitWord64(Val);
-  }
-}
-
-/// EmitSpecialLLVMGlobal - Check to see if the specified global is a
-/// special global used by LLVM.  If so, emit it and return true, otherwise
-/// do nothing and return false.
-bool ELFWriter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
-  if (GV->getName() == "llvm.used")
-    llvm_unreachable("not implemented yet");
-
-  // Ignore debug and non-emitted data.  This handles llvm.compiler.used.
-  if (GV->getSection() == "llvm.metadata" ||
-      GV->hasAvailableExternallyLinkage())
-    return true;
-  
-  if (!GV->hasAppendingLinkage()) return false;
-
-  assert(GV->hasInitializer() && "Not a special LLVM global!");
-  
-  const TargetData *TD = TM.getTargetData();
-  unsigned Align = TD->getPointerPrefAlignment();
-  if (GV->getName() == "llvm.global_ctors") {
-    ELFSection &Ctor = getCtorSection();
-    Ctor.emitAlignment(Align);
-    EmitXXStructorList(GV->getInitializer(), Ctor);
-    return true;
-  } 
-  
-  if (GV->getName() == "llvm.global_dtors") {
-    ELFSection &Dtor = getDtorSection();
-    Dtor.emitAlignment(Align);
-    EmitXXStructorList(GV->getInitializer(), Dtor);
-    return true;
-  }
-  
-  return false;
-}
-
-/// EmitXXStructorList - Emit the ctor or dtor list.  This just emits out the 
-/// function pointers, ignoring the init priority.
-void ELFWriter::EmitXXStructorList(const Constant *List, ELFSection &Xtor) {
-  // Should be an array of '{ i32, void ()* }' structs.  The first value is the
-  // init priority, which we ignore.
-  if (List->isNullValue()) return;
-  const ConstantArray *InitList = cast<ConstantArray>(List);
-  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
-    if (InitList->getOperand(i)->isNullValue())
-      continue;
-    ConstantStruct *CS = cast<ConstantStruct>(InitList->getOperand(i));
-
-    if (CS->getOperand(1)->isNullValue())
-      continue;
-
-    // Emit the function pointer.
-    EmitGlobalConstant(CS->getOperand(1), Xtor);
-  }
-}
-
-bool ELFWriter::runOnMachineFunction(MachineFunction &MF) {
-  // Nothing to do here, this is all done through the ElfCE object above.
-  return false;
-}
-
-/// doFinalization - Now that the module has been completely processed, emit
-/// the ELF file to 'O'.
-bool ELFWriter::doFinalization(Module &M) {
-  // Emit .data section placeholder
-  getDataSection();
-
-  // Emit .bss section placeholder
-  getBSSSection();
-
-  // Build and emit data, bss and "common" sections.
-  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
-       I != E; ++I)
-    EmitGlobal(I);
-
-  // Emit all pending globals
-  for (PendingGblsIter I = PendingGlobals.begin(), E = PendingGlobals.end();
-       I != E; ++I)
-    EmitGlobal(*I);
-
-  // Emit all pending externals
-  for (PendingExtsIter I = PendingExternals.begin(), E = PendingExternals.end();
-       I != E; ++I)
-    SymbolList.push_back(ELFSym::getExtSym(*I));
-
-  // Emit a symbol for each section created until now, skip null section
-  for (unsigned i = 1, e = SectionList.size(); i < e; ++i) {
-    ELFSection &ES = *SectionList[i];
-    ELFSym *SectionSym = ELFSym::getSectionSym();
-    SectionSym->SectionIdx = ES.SectionIdx;
-    SymbolList.push_back(SectionSym);
-    ES.Sym = SymbolList.back();
-  }
-
-  // Emit string table
-  EmitStringTable(M.getModuleIdentifier());
-
-  // Emit the symbol table now, if non-empty.
-  EmitSymbolTable();
-
-  // Emit the relocation sections.
-  EmitRelocations();
-
-  // Emit the sections string table.
-  EmitSectionTableStringTable();
-
-  // Dump the sections and section table to the .o file.
-  OutputSectionsAndSectionTable();
-
-  return false;
-}
-
-// RelocateField - Patch relocatable field with 'Offset' in 'BO'
-// using a 'Value' of known 'Size'
-void ELFWriter::RelocateField(BinaryObject &BO, uint32_t Offset,
-                              int64_t Value, unsigned Size) {
-  if (Size == 32)
-    BO.fixWord32(Value, Offset);
-  else if (Size == 64)
-    BO.fixWord64(Value, Offset);
-  else
-    llvm_unreachable("don't know howto patch relocatable field");
-}
-
-/// EmitRelocations - Emit relocations
-void ELFWriter::EmitRelocations() {
-
-  // True if the target uses the relocation entry to hold the addend,
-  // otherwise the addend is written directly to the relocatable field.
-  bool HasRelA = TEW->hasRelocationAddend();
-
-  // Create Relocation sections for each section which needs it.
-  for (unsigned i=0, e=SectionList.size(); i != e; ++i) {
-    ELFSection &S = *SectionList[i];
-
-    // This section does not have relocations
-    if (!S.hasRelocations()) continue;
-    ELFSection &RelSec = getRelocSection(S);
-
-    // 'Link' - Section hdr idx of the associated symbol table
-    // 'Info' - Section hdr idx of the section to which the relocation applies
-    ELFSection &SymTab = getSymbolTableSection();
-    RelSec.Link = SymTab.SectionIdx;
-    RelSec.Info = S.SectionIdx;
-    RelSec.EntSize = TEW->getRelocationEntrySize();
-
-    // Get the relocations from Section
-    std::vector<MachineRelocation> Relos = S.getRelocations();
-    for (std::vector<MachineRelocation>::iterator MRI = Relos.begin(),
-         MRE = Relos.end(); MRI != MRE; ++MRI) {
-      MachineRelocation &MR = *MRI;
-
-      // Relocatable field offset from the section start
-      unsigned RelOffset = MR.getMachineCodeOffset();
-
-      // Symbol index in the symbol table
-      unsigned SymIdx = 0;
-
-      // Target specific relocation field type and size
-      unsigned RelType = TEW->getRelocationType(MR.getRelocationType());
-      unsigned RelTySize = TEW->getRelocationTySize(RelType);
-      int64_t Addend = 0;
-
-      // There are several machine relocations types, and each one of
-      // them needs a different approach to retrieve the symbol table index.
-      if (MR.isGlobalValue()) {
-        const GlobalValue *G = MR.getGlobalValue();
-        int64_t GlobalOffset = MR.getConstantVal();
-        SymIdx = GblSymLookup[G];
-        if (G->hasPrivateLinkage()) {
-          // If the target uses a section offset in the relocation:
-          // SymIdx + Addend = section sym for global + section offset
-          unsigned SectionIdx = PrivateSyms[SymIdx]->SectionIdx;
-          Addend = PrivateSyms[SymIdx]->Value + GlobalOffset;
-          SymIdx = SectionList[SectionIdx]->getSymbolTableIndex();
-        } else {
-          Addend = TEW->getDefaultAddendForRelTy(RelType, GlobalOffset);
-        }
-      } else if (MR.isExternalSymbol()) {
-        const char *ExtSym = MR.getExternalSymbol();
-        SymIdx = ExtSymLookup[ExtSym];
-        Addend = TEW->getDefaultAddendForRelTy(RelType);
-      } else {
-        // Get the symbol index for the section symbol
-        unsigned SectionIdx = MR.getConstantVal();
-        SymIdx = SectionList[SectionIdx]->getSymbolTableIndex();
-
-        // The symbol offset inside the section
-        int64_t SymOffset = (int64_t)MR.getResultPointer();
-
-        // For pc relative relocations where symbols are defined in the same
-        // section they are referenced, ignore the relocation entry and patch
-        // the relocatable field with the symbol offset directly.
-        if (S.SectionIdx == SectionIdx && TEW->isPCRelativeRel(RelType)) {
-          int64_t Value = TEW->computeRelocation(SymOffset, RelOffset, RelType);
-          RelocateField(S, RelOffset, Value, RelTySize);
-          continue;
-        }
-
-        Addend = TEW->getDefaultAddendForRelTy(RelType, SymOffset);
-      }
-
-      // The target without addend on the relocation symbol must be
-      // patched in the relocation place itself to contain the addend
-      // otherwise write zeros to make sure there is no garbage there
-      RelocateField(S, RelOffset, HasRelA ? 0 : Addend, RelTySize);
-
-      // Get the relocation entry and emit to the relocation section
-      ELFRelocation Rel(RelOffset, SymIdx, RelType, HasRelA, Addend);
-      EmitRelocation(RelSec, Rel, HasRelA);
-    }
-  }
-}
-
-/// EmitRelocation - Write relocation 'Rel' to the relocation section 'Rel'
-void ELFWriter::EmitRelocation(BinaryObject &RelSec, ELFRelocation &Rel,
-                               bool HasRelA) {
-  RelSec.emitWord(Rel.getOffset());
-  RelSec.emitWord(Rel.getInfo(is64Bit));
-  if (HasRelA)
-    RelSec.emitWord(Rel.getAddend());
-}
-
-/// EmitSymbol - Write symbol 'Sym' to the symbol table 'SymbolTable'
-void ELFWriter::EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym) {
-  if (is64Bit) {
-    SymbolTable.emitWord32(Sym.NameIdx);
-    SymbolTable.emitByte(Sym.Info);
-    SymbolTable.emitByte(Sym.Other);
-    SymbolTable.emitWord16(Sym.SectionIdx);
-    SymbolTable.emitWord64(Sym.Value);
-    SymbolTable.emitWord64(Sym.Size);
-  } else {
-    SymbolTable.emitWord32(Sym.NameIdx);
-    SymbolTable.emitWord32(Sym.Value);
-    SymbolTable.emitWord32(Sym.Size);
-    SymbolTable.emitByte(Sym.Info);
-    SymbolTable.emitByte(Sym.Other);
-    SymbolTable.emitWord16(Sym.SectionIdx);
-  }
-}
-
-/// EmitSectionHeader - Write section 'Section' header in 'SHdrTab'
-/// Section Header Table
-void ELFWriter::EmitSectionHeader(BinaryObject &SHdrTab,
-                                  const ELFSection &SHdr) {
-  SHdrTab.emitWord32(SHdr.NameIdx);
-  SHdrTab.emitWord32(SHdr.Type);
-  if (is64Bit) {
-    SHdrTab.emitWord64(SHdr.Flags);
-    SHdrTab.emitWord(SHdr.Addr);
-    SHdrTab.emitWord(SHdr.Offset);
-    SHdrTab.emitWord64(SHdr.Size);
-    SHdrTab.emitWord32(SHdr.Link);
-    SHdrTab.emitWord32(SHdr.Info);
-    SHdrTab.emitWord64(SHdr.Align);
-    SHdrTab.emitWord64(SHdr.EntSize);
-  } else {
-    SHdrTab.emitWord32(SHdr.Flags);
-    SHdrTab.emitWord(SHdr.Addr);
-    SHdrTab.emitWord(SHdr.Offset);
-    SHdrTab.emitWord32(SHdr.Size);
-    SHdrTab.emitWord32(SHdr.Link);
-    SHdrTab.emitWord32(SHdr.Info);
-    SHdrTab.emitWord32(SHdr.Align);
-    SHdrTab.emitWord32(SHdr.EntSize);
-  }
-}
-
-/// EmitStringTable - If the current symbol table is non-empty, emit the string
-/// table for it
-void ELFWriter::EmitStringTable(const std::string &ModuleName) {
-  if (!SymbolList.size()) return;  // Empty symbol table.
-  ELFSection &StrTab = getStringTableSection();
-
-  // Set the zero'th symbol to a null byte, as required.
-  StrTab.emitByte(0);
-
-  // Walk on the symbol list and write symbol names into the string table.
-  unsigned Index = 1;
-  for (ELFSymIter I=SymbolList.begin(), E=SymbolList.end(); I != E; ++I) {
-    ELFSym &Sym = *(*I);
-
-    std::string Name;
-    if (Sym.isGlobalValue()) {
-      SmallString<40> NameStr;
-      Mang->getNameWithPrefix(NameStr, Sym.getGlobalValue(), false);
-      Name.append(NameStr.begin(), NameStr.end());
-    } else if (Sym.isExternalSym())
-      Name.append(Sym.getExternalSymbol());
-    else if (Sym.isFileType())
-      Name.append(ModuleName);
-
-    if (Name.empty()) {
-      Sym.NameIdx = 0;
-    } else {
-      Sym.NameIdx = Index;
-      StrTab.emitString(Name);
-
-      // Keep track of the number of bytes emitted to this section.
-      Index += Name.size()+1;
-    }
-  }
-  assert(Index == StrTab.size());
-  StrTab.Size = Index;
-}
-
-// SortSymbols - On the symbol table local symbols must come before
-// all other symbols with non-local bindings. The return value is
-// the position of the first non local symbol.
-unsigned ELFWriter::SortSymbols() {
-  unsigned FirstNonLocalSymbol;
-  std::vector<ELFSym*> LocalSyms, OtherSyms;
-
-  for (ELFSymIter I=SymbolList.begin(), E=SymbolList.end(); I != E; ++I) {
-    if ((*I)->isLocalBind())
-      LocalSyms.push_back(*I);
-    else
-      OtherSyms.push_back(*I);
-  }
-  SymbolList.clear();
-  FirstNonLocalSymbol = LocalSyms.size();
-
-  for (unsigned i = 0; i < FirstNonLocalSymbol; ++i)
-    SymbolList.push_back(LocalSyms[i]);
-
-  for (ELFSymIter I=OtherSyms.begin(), E=OtherSyms.end(); I != E; ++I)
-    SymbolList.push_back(*I);
-
-  LocalSyms.clear();
-  OtherSyms.clear();
-
-  return FirstNonLocalSymbol;
-}
-
-/// EmitSymbolTable - Emit the symbol table itself.
-void ELFWriter::EmitSymbolTable() {
-  if (!SymbolList.size()) return;  // Empty symbol table.
-
-  // Now that we have emitted the string table and know the offset into the
-  // string table of each symbol, emit the symbol table itself.
-  ELFSection &SymTab = getSymbolTableSection();
-  SymTab.Align = TEW->getPrefELFAlignment();
-
-  // Section Index of .strtab.
-  SymTab.Link = getStringTableSection().SectionIdx;
-
-  // Size of each symtab entry.
-  SymTab.EntSize = TEW->getSymTabEntrySize();
-
-  // Reorder the symbol table with local symbols first!
-  unsigned FirstNonLocalSymbol = SortSymbols();
-
-  // Emit all the symbols to the symbol table.
-  for (unsigned i = 0, e = SymbolList.size(); i < e; ++i) {
-    ELFSym &Sym = *SymbolList[i];
-
-    // Emit symbol to the symbol table
-    EmitSymbol(SymTab, Sym);
-
-    // Record the symbol table index for each symbol
-    if (Sym.isGlobalValue())
-      GblSymLookup[Sym.getGlobalValue()] = i;
-    else if (Sym.isExternalSym())
-      ExtSymLookup[Sym.getExternalSymbol()] = i;
-
-    // Keep track on the symbol index into the symbol table
-    Sym.SymTabIdx = i;
-  }
-
-  // One greater than the symbol table index of the last local symbol
-  SymTab.Info = FirstNonLocalSymbol;
-  SymTab.Size = SymTab.size();
-}
-
-/// EmitSectionTableStringTable - This method adds and emits a section for the
-/// ELF Section Table string table: the string table that holds all of the
-/// section names.
-void ELFWriter::EmitSectionTableStringTable() {
-  // First step: add the section for the string table to the list of sections:
-  ELFSection &SHStrTab = getSectionHeaderStringTableSection();
-
-  // Now that we know which section number is the .shstrtab section, update the
-  // e_shstrndx entry in the ELF header.
-  ElfHdr.fixWord16(SHStrTab.SectionIdx, ELFHdr_e_shstrndx_Offset);
-
-  // Set the NameIdx of each section in the string table and emit the bytes for
-  // the string table.
-  unsigned Index = 0;
-
-  for (ELFSectionIter I=SectionList.begin(), E=SectionList.end(); I != E; ++I) {
-    ELFSection &S = *(*I);
-    // Set the index into the table.  Note if we have lots of entries with
-    // common suffixes, we could memoize them here if we cared.
-    S.NameIdx = Index;
-    SHStrTab.emitString(S.getName());
-
-    // Keep track of the number of bytes emitted to this section.
-    Index += S.getName().size()+1;
-  }
-
-  // Set the size of .shstrtab now that we know what it is.
-  assert(Index == SHStrTab.size());
-  SHStrTab.Size = Index;
-}
-
-/// OutputSectionsAndSectionTable - Now that we have constructed the file header
-/// and all of the sections, emit these to the ostream destination and emit the
-/// SectionTable.
-void ELFWriter::OutputSectionsAndSectionTable() {
-  // Pass #1: Compute the file offset for each section.
-  size_t FileOff = ElfHdr.size();   // File header first.
-
-  // Adjust alignment of all section if needed, skip the null section.
-  for (unsigned i=1, e=SectionList.size(); i < e; ++i) {
-    ELFSection &ES = *SectionList[i];
-    if (!ES.size()) {
-      ES.Offset = FileOff;
-      continue;
-    }
-
-    // Update Section size
-    if (!ES.Size)
-      ES.Size = ES.size();
-
-    // Align FileOff to whatever the alignment restrictions of the section are.
-    if (ES.Align)
-      FileOff = (FileOff+ES.Align-1) & ~(ES.Align-1);
-
-    ES.Offset = FileOff;
-    FileOff += ES.Size;
-  }
-
-  // Align Section Header.
-  unsigned TableAlign = TEW->getPrefELFAlignment();
-  FileOff = (FileOff+TableAlign-1) & ~(TableAlign-1);
-
-  // Now that we know where all of the sections will be emitted, set the e_shnum
-  // entry in the ELF header.
-  ElfHdr.fixWord16(NumSections, ELFHdr_e_shnum_Offset);
-
-  // Now that we know the offset in the file of the section table, update the
-  // e_shoff address in the ELF header.
-  ElfHdr.fixWord(FileOff, ELFHdr_e_shoff_Offset);
-
-  // Now that we know all of the data in the file header, emit it and all of the
-  // sections!
-  O.write((char *)&ElfHdr.getData()[0], ElfHdr.size());
-  FileOff = ElfHdr.size();
-
-  // Section Header Table blob
-  BinaryObject SHdrTable(isLittleEndian, is64Bit);
-
-  // Emit all of sections to the file and build the section header table.
-  for (ELFSectionIter I=SectionList.begin(), E=SectionList.end(); I != E; ++I) {
-    ELFSection &S = *(*I);
-    DEBUG(dbgs() << "SectionIdx: " << S.SectionIdx << ", Name: " << S.getName()
-                 << ", Size: " << S.Size << ", Offset: " << S.Offset
-                 << ", SectionData Size: " << S.size() << "\n");
-
-    // Align FileOff to whatever the alignment restrictions of the section are.
-    if (S.size()) {
-      if (S.Align)  {
-        for (size_t NewFileOff = (FileOff+S.Align-1) & ~(S.Align-1);
-             FileOff != NewFileOff; ++FileOff)
-          O << (char)0xAB;
-      }
-      O.write((char *)&S.getData()[0], S.Size);
-      FileOff += S.Size;
-    }
-
-    EmitSectionHeader(SHdrTable, S);
-  }
-
-  // Align output for the section table.
-  for (size_t NewFileOff = (FileOff+TableAlign-1) & ~(TableAlign-1);
-       FileOff != NewFileOff; ++FileOff)
-    O << (char)0xAB;
-
-  // Emit the section table itself.
-  O.write((char *)&SHdrTable.getData()[0], SHdrTable.size());
-}
diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h
deleted file mode 100644
index 6f7fbace8aba..000000000000
--- a/lib/CodeGen/ELFWriter.h
+++ /dev/null
@@ -1,251 +0,0 @@
-//===-- ELFWriter.h - Target-independent ELF writer support -----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the ELFWriter class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ELFWRITER_H
-#define ELFWRITER_H
-
-#include "llvm/ADT/SetVector.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include <map>
-
-namespace llvm {
-  class BinaryObject;
-  class Constant;
-  class ConstantInt;
-  class ConstantStruct;
-  class ELFCodeEmitter;
-  class ELFRelocation;
-  class ELFSection;
-  struct ELFSym;
-  class GlobalVariable;
-  class JITDebugRegisterer;
-  class Mangler;
-  class MachineCodeEmitter;
-  class MachineConstantPoolEntry;
-  class ObjectCodeEmitter;
-  class MCAsmInfo;
-  class TargetELFWriterInfo;
-  class TargetLoweringObjectFile;
-  class raw_ostream;
-  class SectionKind;
-  class MCContext;
-  class TargetMachine;
-
-  typedef std::vector<ELFSym*>::iterator ELFSymIter;
-  typedef std::vector<ELFSection*>::iterator ELFSectionIter;
-  typedef SetVector<const GlobalValue*>::const_iterator PendingGblsIter;
-  typedef SetVector<const char *>::const_iterator PendingExtsIter;
-  typedef std::pair<const Constant *, int64_t> CstExprResTy;
-
-  /// ELFWriter - This class implements the common target-independent code for
-  /// writing ELF files.  Targets should derive a class from this to
-  /// parameterize the output format.
-  ///
-  class ELFWriter : public MachineFunctionPass {
-    friend class ELFCodeEmitter;
-    friend class JITDebugRegisterer;
-  public:
-    static char ID;
-
-    /// Return the ELFCodeEmitter as an instance of ObjectCodeEmitter
-    ObjectCodeEmitter *getObjectCodeEmitter() {
-      return reinterpret_cast<ObjectCodeEmitter*>(ElfCE);
-    }
-
-    ELFWriter(raw_ostream &O, TargetMachine &TM);
-    ~ELFWriter();
-
-  protected:
-    /// Output stream to send the resultant object file to.
-    raw_ostream &O;
-
-    /// Target machine description.
-    TargetMachine &TM;
-
-    /// Context object for machine code objects.
-    MCContext &OutContext;
-    
-    /// Target Elf Writer description.
-    const TargetELFWriterInfo *TEW;
-
-    /// Mang - The object used to perform name mangling for this module.
-    Mangler *Mang;
-
-    /// MCE - The MachineCodeEmitter object that we are exposing to emit machine
-    /// code for functions to the .o file.
-    ELFCodeEmitter *ElfCE;
-
-    /// TLOF - Target Lowering Object File, provide section names for globals 
-    /// and other object file specific stuff
-    const TargetLoweringObjectFile &TLOF;
-
-    /// MAI - Target Asm Info, provide information about section names for
-    /// globals and other target specific stuff.
-    const MCAsmInfo *MAI;
-
-    //===------------------------------------------------------------------===//
-    // Properties inferred automatically from the target machine.
-    //===------------------------------------------------------------------===//
-
-    /// is64Bit/isLittleEndian - This information is inferred from the target
-    /// machine directly, indicating whether to emit a 32- or 64-bit ELF file.
-    bool is64Bit, isLittleEndian;
-
-    /// doInitialization - Emit the file header and all of the global variables
-    /// for the module to the ELF file.
-    bool doInitialization(Module &M);
-    bool runOnMachineFunction(MachineFunction &MF);
-
-    /// doFinalization - Now that the module has been completely processed, emit
-    /// the ELF file to 'O'.
-    bool doFinalization(Module &M);
-
-  private:
-    /// Blob containing the Elf header
-    BinaryObject ElfHdr;
-
-    /// SectionList - This is the list of sections that we have emitted to the
-    /// file. Once the file has been completely built, the section header table
-    /// is constructed from this info.
-    std::vector<ELFSection*> SectionList;
-    unsigned NumSections;   // Always = SectionList.size()
-
-    /// SectionLookup - This is a mapping from section name to section number in
-    /// the SectionList. Used to quickly gather the Section Index from MAI names
-    std::map<std::string, ELFSection*> SectionLookup;
-
-    /// PendingGlobals - Globals not processed as symbols yet.
-    SetVector<const GlobalValue*> PendingGlobals;
-
-    /// GblSymLookup - This is a mapping from global value to a symbol index
-    /// in the symbol table or private symbols list. This is useful since reloc
-    /// symbol references must be quickly mapped to their indices on the lists.
-    std::map<const GlobalValue*, uint32_t> GblSymLookup;
-
-    /// PendingExternals - Externals not processed as symbols yet.
-    SetVector<const char *> PendingExternals;
-
-    /// ExtSymLookup - This is a mapping from externals to a symbol index
-    /// in the symbol table list. This is useful since reloc symbol references
-    /// must be quickly mapped to their symbol table indices.
-    std::map<const char *, uint32_t> ExtSymLookup;
-
-    /// SymbolList - This is the list of symbols emitted to the symbol table.
-    /// When the SymbolList is finally built, local symbols must be placed in
-    /// the beginning while non-locals at the end.
-    std::vector<ELFSym*> SymbolList;
-
-    /// PrivateSyms - Record private symbols, every symbol here must never be
-    /// present in the SymbolList.
-    std::vector<ELFSym*> PrivateSyms;
-
-    /// getSection - Return the section with the specified name, creating a new
-    /// section if one does not already exist.
-    ELFSection &getSection(const std::string &Name, unsigned Type,
-                           unsigned Flags = 0, unsigned Align = 0) {
-      ELFSection *&SN = SectionLookup[Name];
-      if (SN) return *SN;
-
-      SectionList.push_back(new ELFSection(Name, isLittleEndian, is64Bit));
-      SN = SectionList.back();
-      SN->SectionIdx = NumSections++;
-      SN->Type = Type;
-      SN->Flags = Flags;
-      SN->Link = ELF::SHN_UNDEF;
-      SN->Align = Align;
-      return *SN;
-    }
-
-    ELFSection &getNonExecStackSection() {
-      return getSection(".note.GNU-stack", ELF::SHT_PROGBITS, 0, 1);
-    }
-
-    ELFSection &getSymbolTableSection() {
-      return getSection(".symtab", ELF::SHT_SYMTAB, 0);
-    }
-
-    ELFSection &getStringTableSection() {
-      return getSection(".strtab", ELF::SHT_STRTAB, 0, 1);
-    }
-
-    ELFSection &getSectionHeaderStringTableSection() {
-      return getSection(".shstrtab", ELF::SHT_STRTAB, 0, 1);
-    }
-
-    ELFSection &getNullSection() {
-      return getSection("", ELF::SHT_NULL, 0);
-    }
-
-    ELFSection &getDataSection();
-    ELFSection &getBSSSection();
-    ELFSection &getCtorSection();
-    ELFSection &getDtorSection();
-    ELFSection &getJumpTableSection();
-    ELFSection &getConstantPoolSection(MachineConstantPoolEntry &CPE);
-    ELFSection &getTextSection(const Function *F);
-    ELFSection &getRelocSection(ELFSection &S);
-
-    // Helpers for obtaining ELF specific info.
-    unsigned getGlobalELFBinding(const GlobalValue *GV);
-    unsigned getGlobalELFType(const GlobalValue *GV);
-    unsigned getGlobalELFVisibility(const GlobalValue *GV);
-
-    // AddPendingGlobalSymbol - Add a global to be processed and to
-    // the global symbol lookup, use a zero index because the table
-    // index will be determined later.
-    void AddPendingGlobalSymbol(const GlobalValue *GV, 
-                                bool AddToLookup = false);
-    
-    // AddPendingExternalSymbol - Add the external to be processed
-    // and to the external symbol lookup, use a zero index because
-    // the symbol table index will be determined later.
-    void AddPendingExternalSymbol(const char *External);
-
-    // AddToSymbolList - Update the symbol lookup and If the symbol is 
-    // private add it to PrivateSyms list, otherwise to SymbolList. 
-    void AddToSymbolList(ELFSym *GblSym);
-
-    // As we complete the ELF file, we need to update fields in the ELF header
-    // (e.g. the location of the section table).  These members keep track of
-    // the offset in ELFHeader of these various pieces to update and other
-    // locations in the file.
-    unsigned ELFHdr_e_shoff_Offset;     // e_shoff    in ELF header.
-    unsigned ELFHdr_e_shstrndx_Offset;  // e_shstrndx in ELF header.
-    unsigned ELFHdr_e_shnum_Offset;     // e_shnum    in ELF header.
-
-  private:
-    void EmitGlobal(const GlobalValue *GV);
-    void EmitGlobalConstant(const Constant *C, ELFSection &GblS);
-    void EmitGlobalConstantStruct(const ConstantStruct *CVS,
-                                  ELFSection &GblS);
-    void EmitGlobalConstantLargeInt(const ConstantInt *CI, ELFSection &S);
-    void EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size, 
-                                  ELFSection &GblS, int64_t Offset = 0);
-    bool EmitSpecialLLVMGlobal(const GlobalVariable *GV);
-    void EmitXXStructorList(const Constant *List, ELFSection &Xtor);
-    void EmitRelocations();
-    void EmitRelocation(BinaryObject &RelSec, ELFRelocation &Rel, bool HasRelA);
-    void EmitSectionHeader(BinaryObject &SHdrTab, const ELFSection &SHdr);
-    void EmitSectionTableStringTable();
-    void EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym);
-    void EmitSymbolTable();
-    void EmitStringTable(const std::string &ModuleName);
-    void OutputSectionsAndSectionTable();
-    void RelocateField(BinaryObject &BO, uint32_t Offset, int64_t Value,
-                       unsigned Size);
-    unsigned SortSymbols();
-    CstExprResTy ResolveConstantExpr(const Constant *CV);
-  };
-}
-
-#endif
diff --git a/lib/CodeGen/EdgeBundles.cpp b/lib/CodeGen/EdgeBundles.cpp
index a7aba89b87f3..3bb04657b58a 100644
--- a/lib/CodeGen/EdgeBundles.cpp
+++ b/lib/CodeGen/EdgeBundles.cpp
@@ -77,7 +77,7 @@ void EdgeBundles::view() const {
 /// Specialize WriteGraph, the standard implementation won't work.
 raw_ostream &llvm::WriteGraph(raw_ostream &O, const EdgeBundles &G,
                               bool ShortNames,
-                              const std::string &Title) {
+                              const Twine &Title) {
   const MachineFunction *MF = G.getMachineFunction();
 
   O << "digraph {\n";
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
index 01dccdb71e4b..a48c5400abcb 100644
--- a/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -26,7 +26,7 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -45,7 +45,7 @@ using namespace llvm;
 /// DomainValue for each register, but it may contain multiple execution
 /// domains. A register value is initially created in a single execution
 /// domain, but if we were forced to pay the penalty of a domain crossing, we
-/// keep track of the fact the the register is now available in multiple
+/// keep track of the fact that the register is now available in multiple
 /// domains.
 namespace {
 struct DomainValue {
@@ -57,8 +57,10 @@ struct DomainValue {
   // domains where the register is available for free.
   unsigned AvailableDomains;
 
-  // Position of the last defining instruction.
-  unsigned Dist;
+  // Pointer to the next DomainValue in a chain.  When two DomainValues are
+  // merged, Victim.Next is set to point to Victor, so old DomainValue
+  // references can be updated by folowing the chain.
+  DomainValue *Next;
 
   // Twiddleable instructions using or defining these registers.
   SmallVector<MachineInstr*, 8> Instrs;
@@ -92,16 +94,33 @@ struct DomainValue {
     return CountTrailingZeros_32(AvailableDomains);
   }
 
-  DomainValue() { clear(); }
+  DomainValue() : Refs(0) { clear(); }
 
+  // Clear this DomainValue and point to next which has all its data.
   void clear() {
-    Refs = AvailableDomains = Dist = 0;
+    AvailableDomains = 0;
+    Next = 0;
     Instrs.clear();
   }
 };
 }
 
 namespace {
+/// LiveReg - Information about a live register.
+struct LiveReg {
+  /// Value currently in this register, or NULL when no value is being tracked.
+  /// This counts as a DomainValue reference.
+  DomainValue *Value;
+
+  /// Instruction that defined this register, relative to the beginning of the
+  /// current basic block.  When a LiveReg is used to represent a live-out
+  /// register, this value is relative to the end of the basic block, so it
+  /// will be a negative number.
+  int Def;
+};
+} // anonynous namespace
+
+namespace {
 class ExeDepsFix : public MachineFunctionPass {
   static char ID;
   SpecificBumpPtrAllocator<DomainValue> Allocator;
@@ -111,13 +130,19 @@ class ExeDepsFix : public MachineFunctionPass {
   MachineFunction *MF;
   const TargetInstrInfo *TII;
   const TargetRegisterInfo *TRI;
-  MachineBasicBlock *MBB;
   std::vector<int> AliasMap;
   const unsigned NumRegs;
-  DomainValue **LiveRegs;
-  typedef DenseMap<MachineBasicBlock*,DomainValue**> LiveOutMap;
+  LiveReg *LiveRegs;
+  typedef DenseMap<MachineBasicBlock*, LiveReg*> LiveOutMap;
   LiveOutMap LiveOuts;
-  unsigned Distance;
+
+  /// Current instruction number.
+  /// The first instruction in each basic block is 0.
+  int CurInstr;
+
+  /// True when the current block has a predecessor that hasn't been visited
+  /// yet.
+  bool SeenUnknownBackEdge;
 
 public:
   ExeDepsFix(const TargetRegisterClass *rc)
@@ -131,26 +156,33 @@ public:
   virtual bool runOnMachineFunction(MachineFunction &MF);
 
   virtual const char *getPassName() const {
-    return "SSE execution domain fixup";
+    return "Execution dependency fix";
   }
 
 private:
   // Register mapping.
-  int RegIndex(unsigned Reg);
+  int regIndex(unsigned Reg);
 
   // DomainValue allocation.
-  DomainValue *Alloc(int domain = -1);
-  void Recycle(DomainValue*);
+  DomainValue *alloc(int domain = -1);
+  DomainValue *retain(DomainValue *DV) {
+    if (DV) ++DV->Refs;
+    return DV;
+  }
+  void release(DomainValue*);
+  DomainValue *resolve(DomainValue*&);
 
   // LiveRegs manipulations.
-  void SetLiveReg(int rx, DomainValue *DV);
-  void Kill(int rx);
-  void Force(int rx, unsigned domain);
-  void Collapse(DomainValue *dv, unsigned domain);
-  bool Merge(DomainValue *A, DomainValue *B);
-
-  void enterBasicBlock();
-  void visitGenericInstr(MachineInstr*);
+  void setLiveReg(int rx, DomainValue *DV);
+  void kill(int rx);
+  void force(int rx, unsigned domain);
+  void collapse(DomainValue *dv, unsigned domain);
+  bool merge(DomainValue *A, DomainValue *B);
+
+  void enterBasicBlock(MachineBasicBlock*);
+  void leaveBasicBlock(MachineBasicBlock*);
+  void visitInstr(MachineInstr*);
+  void processDefs(MachineInstr*, bool Kill);
   void visitSoftInstr(MachineInstr*, unsigned mask);
   void visitHardInstr(MachineInstr*, unsigned domain);
 };
@@ -160,83 +192,108 @@ char ExeDepsFix::ID = 0;
 
 /// Translate TRI register number to an index into our smaller tables of
 /// interesting registers. Return -1 for boring registers.
-int ExeDepsFix::RegIndex(unsigned Reg) {
+int ExeDepsFix::regIndex(unsigned Reg) {
   assert(Reg < AliasMap.size() && "Invalid register");
   return AliasMap[Reg];
 }
 
-DomainValue *ExeDepsFix::Alloc(int domain) {
+DomainValue *ExeDepsFix::alloc(int domain) {
   DomainValue *dv = Avail.empty() ?
                       new(Allocator.Allocate()) DomainValue :
                       Avail.pop_back_val();
-  dv->Dist = Distance;
   if (domain >= 0)
     dv->addDomain(domain);
+  assert(dv->Refs == 0 && "Reference count wasn't cleared");
+  assert(!dv->Next && "Chained DomainValue shouldn't have been recycled");
   return dv;
 }
 
-void ExeDepsFix::Recycle(DomainValue *dv) {
-  assert(dv && "Cannot recycle NULL");
-  dv->clear();
-  Avail.push_back(dv);
+/// release - Release a reference to DV.  When the last reference is released,
+/// collapse if needed.
+void ExeDepsFix::release(DomainValue *DV) {
+  while (DV) {
+    assert(DV->Refs && "Bad DomainValue");
+    if (--DV->Refs)
+      return;
+
+    // There are no more DV references. Collapse any contained instructions.
+    if (DV->AvailableDomains && !DV->isCollapsed())
+      collapse(DV, DV->getFirstDomain());
+
+    DomainValue *Next = DV->Next;
+    DV->clear();
+    Avail.push_back(DV);
+    // Also release the next DomainValue in the chain.
+    DV = Next;
+  }
+}
+
+/// resolve - Follow the chain of dead DomainValues until a live DomainValue is
+/// reached.  Update the referenced pointer when necessary.
+DomainValue *ExeDepsFix::resolve(DomainValue *&DVRef) {
+  DomainValue *DV = DVRef;
+  if (!DV || !DV->Next)
+    return DV;
+
+  // DV has a chain. Find the end.
+  do DV = DV->Next;
+  while (DV->Next);
+
+  // Update DVRef to point to DV.
+  retain(DV);
+  release(DVRef);
+  DVRef = DV;
+  return DV;
 }
 
 /// Set LiveRegs[rx] = dv, updating reference counts.
-void ExeDepsFix::SetLiveReg(int rx, DomainValue *dv) {
+void ExeDepsFix::setLiveReg(int rx, DomainValue *dv) {
   assert(unsigned(rx) < NumRegs && "Invalid index");
-  if (!LiveRegs) {
-    LiveRegs = new DomainValue*[NumRegs];
-    std::fill(LiveRegs, LiveRegs+NumRegs, (DomainValue*)0);
-  }
+  assert(LiveRegs && "Must enter basic block first.");
 
-  if (LiveRegs[rx] == dv)
+  if (LiveRegs[rx].Value == dv)
     return;
-  if (LiveRegs[rx]) {
-    assert(LiveRegs[rx]->Refs && "Bad refcount");
-    if (--LiveRegs[rx]->Refs == 0) Recycle(LiveRegs[rx]);
-  }
-  LiveRegs[rx] = dv;
-  if (dv) ++dv->Refs;
+  if (LiveRegs[rx].Value)
+    release(LiveRegs[rx].Value);
+  LiveRegs[rx].Value = retain(dv);
 }
 
 // Kill register rx, recycle or collapse any DomainValue.
-void ExeDepsFix::Kill(int rx) {
+void ExeDepsFix::kill(int rx) {
   assert(unsigned(rx) < NumRegs && "Invalid index");
-  if (!LiveRegs || !LiveRegs[rx]) return;
-
-  // Before killing the last reference to an open DomainValue, collapse it to
-  // the first available domain.
-  if (LiveRegs[rx]->Refs == 1 && !LiveRegs[rx]->isCollapsed())
-    Collapse(LiveRegs[rx], LiveRegs[rx]->getFirstDomain());
-  else
-    SetLiveReg(rx, 0);
+  assert(LiveRegs && "Must enter basic block first.");
+  if (!LiveRegs[rx].Value)
+    return;
+
+  release(LiveRegs[rx].Value);
+  LiveRegs[rx].Value = 0;
 }
 
 /// Force register rx into domain.
-void ExeDepsFix::Force(int rx, unsigned domain) {
+void ExeDepsFix::force(int rx, unsigned domain) {
   assert(unsigned(rx) < NumRegs && "Invalid index");
-  DomainValue *dv;
-  if (LiveRegs && (dv = LiveRegs[rx])) {
+  assert(LiveRegs && "Must enter basic block first.");
+  if (DomainValue *dv = LiveRegs[rx].Value) {
     if (dv->isCollapsed())
       dv->addDomain(domain);
     else if (dv->hasDomain(domain))
-      Collapse(dv, domain);
+      collapse(dv, domain);
     else {
       // This is an incompatible open DomainValue. Collapse it to whatever and
       // force the new value into domain. This costs a domain crossing.
-      Collapse(dv, dv->getFirstDomain());
-      assert(LiveRegs[rx] && "Not live after collapse?");
-      LiveRegs[rx]->addDomain(domain);
+      collapse(dv, dv->getFirstDomain());
+      assert(LiveRegs[rx].Value && "Not live after collapse?");
+      LiveRegs[rx].Value->addDomain(domain);
     }
   } else {
     // Set up basic collapsed DomainValue.
-    SetLiveReg(rx, Alloc(domain));
+    setLiveReg(rx, alloc(domain));
   }
 }
 
 /// Collapse open DomainValue into given domain. If there are multiple
 /// registers using dv, they each get a unique collapsed DomainValue.
-void ExeDepsFix::Collapse(DomainValue *dv, unsigned domain) {
+void ExeDepsFix::collapse(DomainValue *dv, unsigned domain) {
   assert(dv->hasDomain(domain) && "Cannot collapse");
 
   // Collapse all the instructions.
@@ -247,13 +304,13 @@ void ExeDepsFix::Collapse(DomainValue *dv, unsigned domain) {
   // If there are multiple users, give them new, unique DomainValues.
   if (LiveRegs && dv->Refs > 1)
     for (unsigned rx = 0; rx != NumRegs; ++rx)
-      if (LiveRegs[rx] == dv)
-        SetLiveReg(rx, Alloc(domain));
+      if (LiveRegs[rx].Value == dv)
+        setLiveReg(rx, alloc(domain));
 }
 
 /// Merge - All instructions and registers in B are moved to A, and B is
 /// released.
-bool ExeDepsFix::Merge(DomainValue *A, DomainValue *B) {
+bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) {
   assert(!A->isCollapsed() && "Cannot merge into collapsed");
   assert(!B->isCollapsed() && "Cannot merge from collapsed");
   if (A == B)
@@ -263,47 +320,188 @@ bool ExeDepsFix::Merge(DomainValue *A, DomainValue *B) {
   if (!common)
     return false;
   A->AvailableDomains = common;
-  A->Dist = std::max(A->Dist, B->Dist);
   A->Instrs.append(B->Instrs.begin(), B->Instrs.end());
+
+  // Clear the old DomainValue so we won't try to swizzle instructions twice.
+  B->clear();
+  // All uses of B are referred to A.
+  B->Next = retain(A);
+
   for (unsigned rx = 0; rx != NumRegs; ++rx)
-    if (LiveRegs[rx] == B)
-      SetLiveReg(rx, A);
+    if (LiveRegs[rx].Value == B)
+      setLiveReg(rx, A);
   return true;
 }
 
-void ExeDepsFix::enterBasicBlock() {
-  // Try to coalesce live-out registers from predecessors.
-  for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(),
+// enterBasicBlock - Set up LiveRegs by merging predecessor live-out values.
+void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
+  // Detect back-edges from predecessors we haven't processed yet.
+  SeenUnknownBackEdge = false;
+
+  // Reset instruction counter in each basic block.
+  CurInstr = 0;
+
+  // Set up LiveRegs to represent registers entering MBB.
+  if (!LiveRegs)
+    LiveRegs = new LiveReg[NumRegs];
+
+  // Default values are 'nothing happened a long time ago'.
+  for (unsigned rx = 0; rx != NumRegs; ++rx) {
+    LiveRegs[rx].Value = 0;
+    LiveRegs[rx].Def = -(1 << 20);
+  }
+
+  // This is the entry block.
+  if (MBB->pred_empty()) {
+    for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(),
          e = MBB->livein_end(); i != e; ++i) {
-    int rx = RegIndex(*i);
-    if (rx < 0) continue;
-    for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(),
-           pe = MBB->pred_end(); pi != pe; ++pi) {
-      LiveOutMap::const_iterator fi = LiveOuts.find(*pi);
-      if (fi == LiveOuts.end()) continue;
-      DomainValue *pdv = fi->second[rx];
-      if (!pdv) continue;
-      if (!LiveRegs || !LiveRegs[rx]) {
-        SetLiveReg(rx, pdv);
+      int rx = regIndex(*i);
+      if (rx < 0)
+        continue;
+      // Treat function live-ins as if they were defined just before the first
+      // instruction.  Usually, function arguments are set up immediately
+      // before the call.
+      LiveRegs[rx].Def = -1;
+    }
+    DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": entry\n");
+    return;
+  }
+
+  // Try to coalesce live-out registers from predecessors.
+  for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(),
+       pe = MBB->pred_end(); pi != pe; ++pi) {
+    LiveOutMap::const_iterator fi = LiveOuts.find(*pi);
+    if (fi == LiveOuts.end()) {
+      SeenUnknownBackEdge = true;
+      continue;
+    }
+    assert(fi->second && "Can't have NULL entries");
+
+    for (unsigned rx = 0; rx != NumRegs; ++rx) {
+      // Use the most recent predecessor def for each register.
+      LiveRegs[rx].Def = std::max(LiveRegs[rx].Def, fi->second[rx].Def);
+
+      DomainValue *pdv = resolve(fi->second[rx].Value);
+      if (!pdv)
+        continue;
+      if (!LiveRegs[rx].Value) {
+        setLiveReg(rx, pdv);
         continue;
       }
 
       // We have a live DomainValue from more than one predecessor.
-      if (LiveRegs[rx]->isCollapsed()) {
+      if (LiveRegs[rx].Value->isCollapsed()) {
         // We are already collapsed, but predecessor is not. Force him.
-        unsigned domain = LiveRegs[rx]->getFirstDomain();
-        if (!pdv->isCollapsed() && pdv->hasDomain(domain))
-          Collapse(pdv, domain);
+        unsigned Domain = LiveRegs[rx].Value->getFirstDomain();
+        if (!pdv->isCollapsed() && pdv->hasDomain(Domain))
+          collapse(pdv, Domain);
         continue;
       }
 
       // Currently open, merge in predecessor.
       if (!pdv->isCollapsed())
-        Merge(LiveRegs[rx], pdv);
+        merge(LiveRegs[rx].Value, pdv);
       else
-        Force(rx, pdv->getFirstDomain());
+        force(rx, pdv->getFirstDomain());
+    }
+  }
+  DEBUG(dbgs() << "BB#" << MBB->getNumber()
+        << (SeenUnknownBackEdge ? ": incomplete\n" : ": all preds known\n"));
+}
+
+void ExeDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) {
+  assert(LiveRegs && "Must enter basic block first.");
+  // Save live registers at end of MBB - used by enterBasicBlock().
+  // Also use LiveOuts as a visited set to detect back-edges.
+  bool First = LiveOuts.insert(std::make_pair(MBB, LiveRegs)).second;
+
+  if (First) {
+    // LiveRegs was inserted in LiveOuts.  Adjust all defs to be relative to
+    // the end of this block instead of the beginning.
+    for (unsigned i = 0, e = NumRegs; i != e; ++i)
+      LiveRegs[i].Def -= CurInstr;
+  } else {
+    // Insertion failed, this must be the second pass.
+    // Release all the DomainValues instead of keeping them.
+    for (unsigned i = 0, e = NumRegs; i != e; ++i)
+      release(LiveRegs[i].Value);
+    delete[] LiveRegs;
+  }
+  LiveRegs = 0;
+}
+
+void ExeDepsFix::visitInstr(MachineInstr *MI) {
+  if (MI->isDebugValue())
+    return;
+
+  // Update instructions with explicit execution domains.
+  std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(MI);
+  if (DomP.first) {
+    if (DomP.second)
+      visitSoftInstr(MI, DomP.second);
+    else
+      visitHardInstr(MI, DomP.first);
+  }
+
+  // Process defs to track register ages, and kill values clobbered by generic
+  // instructions.
+  processDefs(MI, !DomP.first);
+}
+
+// Update def-ages for registers defined by MI.
+// If Kill is set, also kill off DomainValues clobbered by the defs.
+void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) {
+  assert(!MI->isDebugValue() && "Won't process debug values");
+  const MCInstrDesc &MCID = MI->getDesc();
+  for (unsigned i = 0,
+         e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs();
+         i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    if (MO.isImplicit())
+      break;
+    if (MO.isUse())
+      continue;
+    int rx = regIndex(MO.getReg());
+    if (rx < 0)
+      continue;
+
+    // This instruction explicitly defines rx.
+    DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr
+                 << '\t' << *MI);
+
+    // How many instructions since rx was last written?
+    unsigned Clearance = CurInstr - LiveRegs[rx].Def;
+    LiveRegs[rx].Def = CurInstr;
+
+    // Kill off domains redefined by generic instructions.
+    if (Kill)
+      kill(rx);
+
+    // Verify clearance before partial register updates.
+    unsigned Pref = TII->getPartialRegUpdateClearance(MI, i, TRI);
+    if (!Pref)
+      continue;
+    DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref);
+    if (Pref > Clearance) {
+      DEBUG(dbgs() << ": Break dependency.\n");
+      TII->breakPartialRegDependency(MI, i, TRI);
+      continue;
+    }
+
+    // The current clearance seems OK, but we may be ignoring a def from a
+    // back-edge.
+    if (!SeenUnknownBackEdge || Pref <= unsigned(CurInstr)) {
+      DEBUG(dbgs() << ": OK.\n");
+      continue;
     }
+
+    // A def from an unprocessed back-edge may make us break this dependency.
+    DEBUG(dbgs() << ": Wait for back-edge to resolve.\n");
   }
+
+  ++CurInstr;
 }
 
 // A hard instruction only works in one domain. All input registers will be
@@ -314,19 +512,19 @@ void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
                 e = mi->getDesc().getNumOperands(); i != e; ++i) {
     MachineOperand &mo = mi->getOperand(i);
     if (!mo.isReg()) continue;
-    int rx = RegIndex(mo.getReg());
+    int rx = regIndex(mo.getReg());
     if (rx < 0) continue;
-    Force(rx, domain);
+    force(rx, domain);
   }
 
   // Kill all defs and force them.
   for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
     MachineOperand &mo = mi->getOperand(i);
     if (!mo.isReg()) continue;
-    int rx = RegIndex(mo.getReg());
+    int rx = regIndex(mo.getReg());
     if (rx < 0) continue;
-    Kill(rx);
-    Force(rx, domain);
+    kill(rx);
+    force(rx, domain);
   }
 }
 
@@ -343,9 +541,9 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
                   e = mi->getDesc().getNumOperands(); i != e; ++i) {
       MachineOperand &mo = mi->getOperand(i);
       if (!mo.isReg()) continue;
-      int rx = RegIndex(mo.getReg());
+      int rx = regIndex(mo.getReg());
       if (rx < 0) continue;
-      if (DomainValue *dv = LiveRegs[rx]) {
+      if (DomainValue *dv = LiveRegs[rx].Value) {
         // Bitmask of domains that dv and available have in common.
         unsigned common = dv->getCommonDomains(available);
         // Is it possible to use this collapsed register for free?
@@ -360,7 +558,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
         else
           // Open DomainValue is not compatible with instruction. It is useless
           // now.
-          Kill(rx);
+          kill(rx);
       }
     }
 
@@ -374,94 +572,89 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
 
   // Kill off any remaining uses that don't match available, and build a list of
   // incoming DomainValues that we want to merge.
-  SmallVector<DomainValue*,4> doms;
+  SmallVector<LiveReg, 4> Regs;
   for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) {
     int rx = *i;
-    DomainValue *dv = LiveRegs[rx];
+    const LiveReg &LR = LiveRegs[rx];
     // This useless DomainValue could have been missed above.
-    if (!dv->getCommonDomains(available)) {
-      Kill(*i);
+    if (!LR.Value->getCommonDomains(available)) {
+      kill(rx);
       continue;
     }
-    // sorted, uniqued insert.
-    bool inserted = false;
-    for (SmallVector<DomainValue*,4>::iterator i = doms.begin(), e = doms.end();
-           i != e && !inserted; ++i) {
-      if (dv == *i)
-        inserted = true;
-      else if (dv->Dist < (*i)->Dist) {
-        inserted = true;
-        doms.insert(i, dv);
+    // Sorted insertion.
+    bool Inserted = false;
+    for (SmallVector<LiveReg, 4>::iterator i = Regs.begin(), e = Regs.end();
+           i != e && !Inserted; ++i) {
+      if (LR.Def < i->Def) {
+        Inserted = true;
+        Regs.insert(i, LR);
       }
     }
-    if (!inserted)
-      doms.push_back(dv);
+    if (!Inserted)
+      Regs.push_back(LR);
   }
 
   // doms are now sorted in order of appearance. Try to merge them all, giving
   // priority to the latest ones.
   DomainValue *dv = 0;
-  while (!doms.empty()) {
+  while (!Regs.empty()) {
     if (!dv) {
-      dv = doms.pop_back_val();
+      dv = Regs.pop_back_val().Value;
+      // Force the first dv to match the current instruction.
+      dv->AvailableDomains = dv->getCommonDomains(available);
+      assert(dv->AvailableDomains && "Domain should have been filtered");
       continue;
     }
 
-    DomainValue *latest = doms.pop_back_val();
-    if (Merge(dv, latest)) continue;
+    DomainValue *Latest = Regs.pop_back_val().Value;
+    // Skip already merged values.
+    if (Latest == dv || Latest->Next)
+      continue;
+    if (merge(dv, Latest))
+      continue;
 
     // If latest didn't merge, it is useless now. Kill all registers using it.
     for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i != e; ++i)
-      if (LiveRegs[*i] == latest)
-        Kill(*i);
+      if (LiveRegs[*i].Value == Latest)
+        kill(*i);
   }
 
   // dv is the DomainValue we are going to use for this instruction.
-  if (!dv)
-    dv = Alloc();
-  dv->Dist = Distance;
-  dv->AvailableDomains = available;
+  if (!dv) {
+    dv = alloc();
+    dv->AvailableDomains = available;
+  }
   dv->Instrs.push_back(mi);
 
   // Finally set all defs and non-collapsed uses to dv.
   for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) {
     MachineOperand &mo = mi->getOperand(i);
     if (!mo.isReg()) continue;
-    int rx = RegIndex(mo.getReg());
+    int rx = regIndex(mo.getReg());
     if (rx < 0) continue;
-    if (!LiveRegs || !LiveRegs[rx] || (mo.isDef() && LiveRegs[rx]!=dv)) {
-      Kill(rx);
-      SetLiveReg(rx, dv);
+    if (!LiveRegs[rx].Value || (mo.isDef() && LiveRegs[rx].Value != dv)) {
+      kill(rx);
+      setLiveReg(rx, dv);
     }
   }
 }
 
-void ExeDepsFix::visitGenericInstr(MachineInstr *mi) {
-  // Process explicit defs, kill any relevant registers redefined.
-  for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
-    MachineOperand &mo = mi->getOperand(i);
-    if (!mo.isReg()) continue;
-    int rx = RegIndex(mo.getReg());
-    if (rx < 0) continue;
-    Kill(rx);
-  }
-}
-
 bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
   MF = &mf;
   TII = MF->getTarget().getInstrInfo();
   TRI = MF->getTarget().getRegisterInfo();
-  MBB = 0;
   LiveRegs = 0;
-  Distance = 0;
   assert(NumRegs == RC->getNumRegs() && "Bad regclass");
 
+  DEBUG(dbgs() << "********** FIX EXECUTION DEPENDENCIES: "
+               << RC->getName() << " **********\n");
+
   // If no relevant registers are used in the function, we can skip it
   // completely.
   bool anyregs = false;
   for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end();
        I != E; ++I)
-    if (MF->getRegInfo().isPhysRegUsed(*I)) {
+    if (MF->getRegInfo().isPhysRegOrOverlapUsed(*I)) {
       anyregs = true;
       break;
     }
@@ -473,43 +666,48 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
     // or -1.
     AliasMap.resize(TRI->getNumRegs(), -1);
     for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i)
-      for (const unsigned *AI = TRI->getOverlaps(RC->getRegister(i)); *AI; ++AI)
+      for (const uint16_t *AI = TRI->getOverlaps(RC->getRegister(i)); *AI; ++AI)
         AliasMap[*AI] = i;
   }
 
   MachineBasicBlock *Entry = MF->begin();
-  SmallPtrSet<MachineBasicBlock*, 16> Visited;
-  for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 16> >
-         DFI = df_ext_begin(Entry, Visited), DFE = df_ext_end(Entry, Visited);
-         DFI != DFE; ++DFI) {
-    MBB = *DFI;
-    enterBasicBlock();
+  ReversePostOrderTraversal<MachineBasicBlock*> RPOT(Entry);
+  SmallVector<MachineBasicBlock*, 16> Loops;
+  for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
+         MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
+    MachineBasicBlock *MBB = *MBBI;
+    enterBasicBlock(MBB);
+    if (SeenUnknownBackEdge)
+      Loops.push_back(MBB);
     for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
-        ++I) {
-      MachineInstr *mi = I;
-      if (mi->isDebugValue()) continue;
-      ++Distance;
-      std::pair<uint16_t, uint16_t> domp = TII->getExecutionDomain(mi);
-      if (domp.first)
-        if (domp.second)
-          visitSoftInstr(mi, domp.second);
-        else
-          visitHardInstr(mi, domp.first);
-      else if (LiveRegs)
-        visitGenericInstr(mi);
-    }
+        ++I)
+      visitInstr(I);
+    leaveBasicBlock(MBB);
+  }
 
-    // Save live registers at end of MBB - used by enterBasicBlock().
-    if (LiveRegs)
-      LiveOuts.insert(std::make_pair(MBB, LiveRegs));
-    LiveRegs = 0;
+  // Visit all the loop blocks again in order to merge DomainValues from
+  // back-edges.
+  for (unsigned i = 0, e = Loops.size(); i != e; ++i) {
+    MachineBasicBlock *MBB = Loops[i];
+    enterBasicBlock(MBB);
+    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+        ++I)
+      if (!I->isDebugValue())
+        processDefs(I, false);
+    leaveBasicBlock(MBB);
   }
 
-  // Clear the LiveOuts vectors. Should we also collapse any remaining
-  // DomainValues?
-  for (LiveOutMap::const_iterator i = LiveOuts.begin(), e = LiveOuts.end();
-         i != e; ++i)
-    delete[] i->second;
+  // Clear the LiveOuts vectors and collapse any remaining DomainValues.
+  for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
+         MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
+    LiveOutMap::const_iterator FI = LiveOuts.find(*MBBI);
+    if (FI == LiveOuts.end() || !FI->second)
+      continue;
+    for (unsigned i = 0, e = NumRegs; i != e; ++i)
+      if (FI->second[i].Value)
+        release(FI->second[i].Value);
+    delete[] FI->second;
+  }
   LiveOuts.clear();
   Avail.clear();
   Allocator.DestroyAll();
diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp
index a67140ece4a5..2c4a93543cc3 100644
--- a/lib/CodeGen/ExpandISelPseudos.cpp
+++ b/lib/CodeGen/ExpandISelPseudos.cpp
@@ -32,10 +32,6 @@ namespace {
   private:
     virtual bool runOnMachineFunction(MachineFunction &MF);
 
-    const char *getPassName() const {
-      return "Expand ISel Pseudo-instructions";
-    }
-
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       MachineFunctionPass::getAnalysisUsage(AU);
     }
@@ -43,12 +39,9 @@ namespace {
 } // end anonymous namespace
 
 char ExpandISelPseudos::ID = 0;
+char &llvm::ExpandISelPseudosID = ExpandISelPseudos::ID;
 INITIALIZE_PASS(ExpandISelPseudos, "expand-isel-pseudos",
-                "Expand CodeGen Pseudo-instructions", false, false)
-
-FunctionPass *llvm::createExpandISelPseudosPass() {
-  return new ExpandISelPseudos();
-}
+                "Expand ISel Pseudo-instructions", false, false)
 
 bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
   bool Changed = false;
@@ -62,8 +55,7 @@ bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
       MachineInstr *MI = MBBI++;
 
       // If MI is a pseudo, expand it.
-      const MCInstrDesc &MCID = MI->getDesc();
-      if (MCID.usesCustomInsertionHook()) {
+      if (MI->usesCustomInsertionHook()) {
         Changed = true;
         MachineBasicBlock *NewMBB =
           TLI->EmitInstrWithCustomInserter(MI, MBB);
diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp
index e2a14a8dfd97..b14afc286d49 100644
--- a/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -36,10 +36,6 @@ public:
   static char ID; // Pass identification, replacement for typeid
   ExpandPostRA() : MachineFunctionPass(ID) {}
 
-  const char *getPassName() const {
-    return "Post-RA pseudo instruction expansion pass";
-  }
-
   virtual void getAnalysisUsage(AnalysisUsage &AU) const {
     AU.setPreservesCFG();
     AU.addPreservedID(MachineLoopInfoID);
@@ -61,10 +57,10 @@ private:
 } // end anonymous namespace
 
 char ExpandPostRA::ID = 0;
+char &llvm::ExpandPostRAPseudosID = ExpandPostRA::ID;
 
-FunctionPass *llvm::createExpandPostRAPseudosPass() {
-  return new ExpandPostRA();
-}
+INITIALIZE_PASS(ExpandPostRA, "postrapseudos",
+                "Post-RA pseudo instruction expansion pass", false, false)
 
 /// TransferDeadFlag - MI is a pseudo-instruction with DstReg dead,
 /// and the lowered replacement instructions immediately precede it.
@@ -207,7 +203,7 @@ bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) {
       ++mi;
 
       // Only expand pseudos.
-      if (!MI->getDesc().isPseudo())
+      if (!MI->isPseudo())
         continue;
 
       // Give targets a chance to expand even standard pseudos.
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index d757cf409d50..1caf8c233976 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -143,12 +143,12 @@ void Printer::getAnalysisUsage(AnalysisUsage &AU) const {
 
 static const char *DescKind(GC::PointKind Kind) {
   switch (Kind) {
-    default: llvm_unreachable("Unknown GC point kind");
     case GC::Loop:     return "loop";
     case GC::Return:   return "return";
     case GC::PreCall:  return "pre-call";
     case GC::PostCall: return "post-call";
   }
+  llvm_unreachable("Invalid point kind");
 }
 
 bool Printer::runOnFunction(Function &F) {
@@ -156,12 +156,12 @@ bool Printer::runOnFunction(Function &F) {
   
   GCFunctionInfo *FD = &getAnalysis<GCModuleInfo>().getFunctionInfo(F);
   
-  OS << "GC roots for " << FD->getFunction().getNameStr() << ":\n";
+  OS << "GC roots for " << FD->getFunction().getName() << ":\n";
   for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(),
                                       RE = FD->roots_end(); RI != RE; ++RI)
     OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n";
   
-  OS << "GC safe points for " << FD->getFunction().getNameStr() << ":\n";
+  OS << "GC safe points for " << FD->getFunction().getName() << ":\n";
   for (GCFunctionInfo::iterator PI = FD->begin(),
                                 PE = FD->end(); PI != PE; ++PI) {
     
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index 766c6ee542a9..506b5cf09457 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -10,8 +10,8 @@
 // This file implements target- and collector-independent garbage collection
 // infrastructure.
 //
-// MachineCodeAnalysis identifies the GC safe points in the machine code. Roots
-// are identified in SelectionDAGISel.
+// GCMachineCodeAnalysis identifies the GC safe points in the machine code.
+// Roots are identified in SelectionDAGISel.
 //
 //===----------------------------------------------------------------------===//
 
@@ -35,9 +35,9 @@
 using namespace llvm;
 
 namespace {
-  
+
   /// LowerIntrinsics - This pass rewrites calls to the llvm.gcread or
-  /// llvm.gcwrite intrinsics, replacing them with simple loads and stores as 
+  /// llvm.gcwrite intrinsics, replacing them with simple loads and stores as
   /// directed by the GCStrategy. It also performs automatic root initialization
   /// and custom intrinsic lowering.
   class LowerIntrinsics : public FunctionPass {
@@ -47,47 +47,46 @@ namespace {
     bool PerformDefaultLowering(Function &F, GCStrategy &Coll);
     static bool InsertRootInitializers(Function &F,
                                        AllocaInst **Roots, unsigned Count);
-    
+
   public:
     static char ID;
-    
+
     LowerIntrinsics();
     const char *getPassName() const;
     void getAnalysisUsage(AnalysisUsage &AU) const;
-    
+
     bool doInitialization(Module &M);
     bool runOnFunction(Function &F);
   };
-  
-  
-  /// MachineCodeAnalysis - This is a target-independent pass over the machine 
+
+
+  /// GCMachineCodeAnalysis - This is a target-independent pass over the machine
   /// function representation to identify safe points for the garbage collector
   /// in the machine code. It inserts labels at safe points and populates a
   /// GCMetadata record for each function.
-  class MachineCodeAnalysis : public MachineFunctionPass {
+  class GCMachineCodeAnalysis : public MachineFunctionPass {
     const TargetMachine *TM;
     GCFunctionInfo *FI;
     MachineModuleInfo *MMI;
     const TargetInstrInfo *TII;
-    
+
     void FindSafePoints(MachineFunction &MF);
     void VisitCallPoint(MachineBasicBlock::iterator MI);
-    MCSymbol *InsertLabel(MachineBasicBlock &MBB, 
+    MCSymbol *InsertLabel(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator MI,
                           DebugLoc DL) const;
-    
+
     void FindStackOffsets(MachineFunction &MF);
-    
+
   public:
     static char ID;
-    
-    MachineCodeAnalysis();
-    const char *getPassName() const;
+
+    GCMachineCodeAnalysis();
     void getAnalysisUsage(AnalysisUsage &AU) const;
-    
+
     bool runOnMachineFunction(MachineFunction &MF);
   };
-  
+
 }
 
 // -----------------------------------------------------------------------------
@@ -97,6 +96,7 @@ GCStrategy::GCStrategy() :
   CustomReadBarriers(false),
   CustomWriteBarriers(false),
   CustomRoots(false),
+  CustomSafePoints(false),
   InitRoots(true),
   UsesMetadata(false)
 {}
@@ -104,18 +104,24 @@ GCStrategy::GCStrategy() :
 GCStrategy::~GCStrategy() {
   for (iterator I = begin(), E = end(); I != E; ++I)
     delete *I;
-  
+
   Functions.clear();
 }
- 
+
 bool GCStrategy::initializeCustomLowering(Module &M) { return false; }
- 
+
 bool GCStrategy::performCustomLowering(Function &F) {
   dbgs() << "gc " << getName() << " must override performCustomLowering.\n";
+  llvm_unreachable("must override performCustomLowering");
+}
+
+
+bool GCStrategy::findCustomSafePoints(GCFunctionInfo& FI, MachineFunction &F) {
+  dbgs() << "gc " << getName() << " must override findCustomSafePoints.\n";
   llvm_unreachable(0);
-  return 0;
 }
 
+
 GCFunctionInfo *GCStrategy::insertFunctionInfo(const Function &F) {
   GCFunctionInfo *FI = new GCFunctionInfo(F, *this);
   Functions.push_back(FI);
@@ -132,7 +138,7 @@ INITIALIZE_PASS_END(LowerIntrinsics, "gc-lowering", "GC Lowering", false, false)
 FunctionPass *llvm::createGCLoweringPass() {
   return new LowerIntrinsics();
 }
- 
+
 char LowerIntrinsics::ID = 0;
 
 LowerIntrinsics::LowerIntrinsics()
@@ -143,7 +149,7 @@ LowerIntrinsics::LowerIntrinsics()
 const char *LowerIntrinsics::getPassName() const {
   return "Lower Garbage Collection Instructions";
 }
-    
+
 void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const {
   FunctionPass::getAnalysisUsage(AU);
   AU.addRequired<GCModuleInfo>();
@@ -161,22 +167,22 @@ bool LowerIntrinsics::doInitialization(Module &M) {
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
     if (!I->isDeclaration() && I->hasGC())
       MI->getFunctionInfo(*I); // Instantiate the GC strategy.
-  
+
   bool MadeChange = false;
   for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I)
     if (NeedsCustomLoweringPass(**I))
       if ((*I)->initializeCustomLowering(M))
         MadeChange = true;
-  
+
   return MadeChange;
 }
 
-bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots, 
+bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots,
                                                           unsigned Count) {
   // Scroll past alloca instructions.
   BasicBlock::iterator IP = F.getEntryBlock().begin();
   while (isa<AllocaInst>(IP)) ++IP;
-  
+
   // Search for initializers in the initial BB.
   SmallPtrSet<AllocaInst*,16> InitedRoots;
   for (; !CouldBecomeSafePoint(IP); ++IP)
@@ -184,10 +190,10 @@ bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots,
       if (AllocaInst *AI =
           dyn_cast<AllocaInst>(SI->getOperand(1)->stripPointerCasts()))
         InitedRoots.insert(AI);
-  
+
   // Add root initializers.
   bool MadeChange = false;
-  
+
   for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I)
     if (!InitedRoots.count(*I)) {
       StoreInst* SI = new StoreInst(ConstantPointerNull::get(cast<PointerType>(
@@ -196,7 +202,7 @@ bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots,
       SI->insertAfter(*I);
       MadeChange = true;
     }
-  
+
   return MadeChange;
 }
 
@@ -220,26 +226,26 @@ bool LowerIntrinsics::NeedsCustomLoweringPass(const GCStrategy &C) {
 bool LowerIntrinsics::CouldBecomeSafePoint(Instruction *I) {
   // The natural definition of instructions which could introduce safe points
   // are:
-  // 
+  //
   //   - call, invoke (AfterCall, BeforeCall)
   //   - phis (Loops)
   //   - invoke, ret, unwind (Exit)
-  // 
+  //
   // However, instructions as seemingly inoccuous as arithmetic can become
   // libcalls upon lowering (e.g., div i64 on a 32-bit platform), so instead
   // it is necessary to take a conservative approach.
-  
+
   if (isa<AllocaInst>(I) || isa<GetElementPtrInst>(I) ||
       isa<StoreInst>(I) || isa<LoadInst>(I))
     return false;
-  
+
   // llvm.gcroot is safe because it doesn't do anything at runtime.
   if (CallInst *CI = dyn_cast<CallInst>(I))
     if (Function *F = CI->getCalledFunction())
       if (unsigned IID = F->getIntrinsicID())
         if (IID == Intrinsic::gcroot)
           return false;
-  
+
   return true;
 }
 
@@ -249,15 +255,15 @@ bool LowerIntrinsics::runOnFunction(Function &F) {
   // Quick exit for functions that do not use GC.
   if (!F.hasGC())
     return false;
-  
+
   GCFunctionInfo &FI = getAnalysis<GCModuleInfo>().getFunctionInfo(F);
   GCStrategy &S = FI.getStrategy();
-  
+
   bool MadeChange = false;
-  
+
   if (NeedsDefaultLoweringPass(S))
     MadeChange |= PerformDefaultLowering(F, S);
-  
+
   bool UseCustomLoweringPass = NeedsCustomLoweringPass(S);
   if (UseCustomLoweringPass)
     MadeChange |= S.performCustomLowering(F);
@@ -275,9 +281,9 @@ bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
   bool LowerWr = !S.customWriteBarrier();
   bool LowerRd = !S.customReadBarrier();
   bool InitRoots = S.initializeRoots();
-  
+
   SmallVector<AllocaInst*, 32> Roots;
-  
+
   bool MadeChange = false;
   for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
     for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
@@ -313,104 +319,104 @@ bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
         default:
           continue;
         }
-        
+
         MadeChange = true;
       }
     }
   }
-  
+
   if (Roots.size())
     MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size());
-  
+
   return MadeChange;
 }
 
 // -----------------------------------------------------------------------------
 
-FunctionPass *llvm::createGCMachineCodeAnalysisPass() {
-  return new MachineCodeAnalysis();
-}
+char GCMachineCodeAnalysis::ID = 0;
+char &llvm::GCMachineCodeAnalysisID = GCMachineCodeAnalysis::ID;
 
-char MachineCodeAnalysis::ID = 0;
+INITIALIZE_PASS(GCMachineCodeAnalysis, "gc-analysis",
+                "Analyze Machine Code For Garbage Collection", false, false)
 
-MachineCodeAnalysis::MachineCodeAnalysis()
+GCMachineCodeAnalysis::GCMachineCodeAnalysis()
   : MachineFunctionPass(ID) {}
 
-const char *MachineCodeAnalysis::getPassName() const {
-  return "Analyze Machine Code For Garbage Collection";
-}
-
-void MachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+void GCMachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
   MachineFunctionPass::getAnalysisUsage(AU);
   AU.setPreservesAll();
   AU.addRequired<MachineModuleInfo>();
   AU.addRequired<GCModuleInfo>();
 }
 
-MCSymbol *MachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB, 
-                                           MachineBasicBlock::iterator MI,
-                                           DebugLoc DL) const {
+MCSymbol *GCMachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB,
+                                             MachineBasicBlock::iterator MI,
+                                             DebugLoc DL) const {
   MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol();
   BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label);
   return Label;
 }
 
-void MachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
+void GCMachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
   // Find the return address (next instruction), too, so as to bracket the call
   // instruction.
-  MachineBasicBlock::iterator RAI = CI; 
-  ++RAI;                                
-  
+  MachineBasicBlock::iterator RAI = CI;
+  ++RAI;
+
   if (FI->getStrategy().needsSafePoint(GC::PreCall)) {
     MCSymbol* Label = InsertLabel(*CI->getParent(), CI, CI->getDebugLoc());
     FI->addSafePoint(GC::PreCall, Label, CI->getDebugLoc());
   }
-  
+
   if (FI->getStrategy().needsSafePoint(GC::PostCall)) {
     MCSymbol* Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc());
     FI->addSafePoint(GC::PostCall, Label, CI->getDebugLoc());
   }
 }
 
-void MachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
+void GCMachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
   for (MachineFunction::iterator BBI = MF.begin(),
                                  BBE = MF.end(); BBI != BBE; ++BBI)
     for (MachineBasicBlock::iterator MI = BBI->begin(),
                                      ME = BBI->end(); MI != ME; ++MI)
-      if (MI->getDesc().isCall())
+      if (MI->isCall())
         VisitCallPoint(MI);
 }
 
-void MachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
+void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
   const TargetFrameLowering *TFI = TM->getFrameLowering();
   assert(TFI && "TargetRegisterInfo not available!");
-  
+
   for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(),
                                       RE = FI->roots_end(); RI != RE; ++RI)
     RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num);
 }
 
-bool MachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
+bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
   // Quick exit for functions that do not use GC.
   if (!MF.getFunction()->hasGC())
     return false;
-  
+
   FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF.getFunction());
   if (!FI->getStrategy().needsSafePoints())
     return false;
-  
+
   TM = &MF.getTarget();
   MMI = &getAnalysis<MachineModuleInfo>();
   TII = TM->getInstrInfo();
-  
+
   // Find the size of the stack frame.
   FI->setFrameSize(MF.getFrameInfo()->getStackSize());
-  
+
   // Find all safe points.
-  FindSafePoints(MF);
-  
+  if (FI->getStrategy().customSafePoints()) {
+    FI->getStrategy().findCustomSafePoints(*FI, MF);
+  } else {
+    FindSafePoints(MF);
+  }
+
   // Find the stack offsets for all roots.
   FindStackOffsets(MF);
-  
+
   return false;
 }
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index ce7ed293daac..75ae5b9c2c27 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -62,6 +62,7 @@ STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed");
 STATISTIC(NumDiamonds,     "Number of diamond if-conversions performed");
 STATISTIC(NumIfConvBBs,    "Number of if-converted blocks");
 STATISTIC(NumDupBBs,       "Number of duplicated blocks");
+STATISTIC(NumUnpred,       "Number of true blocks of diamonds unpredicated");
 
 namespace {
   class IfConverter : public MachineFunctionPass {
@@ -169,7 +170,6 @@ namespace {
     }
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
-    virtual const char *getPassName() const { return "If Converter"; }
 
   private:
     bool ReverseBranchCondition(BBInfo &BBI);
@@ -195,7 +195,8 @@ namespace {
     void PredicateBlock(BBInfo &BBI,
                         MachineBasicBlock::iterator E,
                         SmallVectorImpl<MachineOperand> &Cond,
-                        SmallSet<unsigned, 4> &Redefs);
+                        SmallSet<unsigned, 4> &Redefs,
+                        SmallSet<unsigned, 4> *LaterRedefs = 0);
     void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
                                SmallVectorImpl<MachineOperand> &Cond,
                                SmallSet<unsigned, 4> &Redefs,
@@ -251,12 +252,12 @@ namespace {
   char IfConverter::ID = 0;
 }
 
+char &llvm::IfConverterID = IfConverter::ID;
+
 INITIALIZE_PASS_BEGIN(IfConverter, "if-converter", "If Converter", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
 INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false)
 
-FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); }
-
 bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
   TLI = MF.getTarget().getTargetLowering();
   TII = MF.getTarget().getInstrInfo();
@@ -313,8 +314,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
 
       bool RetVal = false;
       switch (Kind) {
-      default: assert(false && "Unexpected!");
-        break;
+      default: llvm_unreachable("Unexpected!");
       case ICSimple:
       case ICSimpleFalse: {
         bool isFalse = Kind == ICSimpleFalse;
@@ -573,12 +573,12 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
   // blocks, move the end iterators up past any branch instructions.
   while (TIE != TIB) {
     --TIE;
-    if (!TIE->getDesc().isBranch())
+    if (!TIE->isBranch())
       break;
   }
   while (FIE != FIB) {
     --FIE;
-    if (!FIE->getDesc().isBranch())
+    if (!FIE->isBranch())
       break;
   }
 
@@ -651,12 +651,11 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
     if (I->isDebugValue())
       continue;
 
-    const MCInstrDesc &MCID = I->getDesc();
-    if (MCID.isNotDuplicable())
+    if (I->isNotDuplicable())
       BBI.CannotBeCopied = true;
 
     bool isPredicated = TII->isPredicated(I);
-    bool isCondBr = BBI.IsBrAnalyzable && MCID.isConditionalBranch();
+    bool isCondBr = BBI.IsBrAnalyzable && I->isConditionalBranch();
 
     if (!isCondBr) {
       if (!isPredicated) {
@@ -963,7 +962,7 @@ static void InitPredRedefs(MachineBasicBlock *BB, SmallSet<unsigned,4> &Redefs,
          E = BB->livein_end(); I != E; ++I) {
     unsigned Reg = *I;
     Redefs.insert(Reg);
-    for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+    for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
          *Subreg; ++Subreg)
       Redefs.insert(*Subreg);
   }
@@ -984,7 +983,7 @@ static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs,
       Defs.push_back(Reg);
     else if (MO.isKill()) {
       Redefs.erase(Reg);
-      for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+      for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
         Redefs.erase(*SR);
     }
   }
@@ -997,7 +996,7 @@ static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs,
                                                 true/*IsImp*/,false/*IsKill*/));
     } else {
       Redefs.insert(Reg);
-      for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+      for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
         Redefs.insert(*SR);
     }
   }
@@ -1035,7 +1034,7 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
 
   if (Kind == ICSimpleFalse)
     if (TII->ReverseBranchCondition(Cond))
-      assert(false && "Unable to reverse branch condition!");
+      llvm_unreachable("Unable to reverse branch condition!");
 
   // Initialize liveins to the first BB. These are potentiall redefined by
   // predicated instructions.
@@ -1108,7 +1107,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
 
   if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
     if (TII->ReverseBranchCondition(Cond))
-      assert(false && "Unable to reverse branch condition!");
+      llvm_unreachable("Unable to reverse branch condition!");
 
   if (Kind == ICTriangleRev || Kind == ICTriangleFRev) {
     if (ReverseBranchCondition(*CvtBBI)) {
@@ -1155,7 +1154,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
     SmallVector<MachineOperand, 4> RevCond(CvtBBI->BrCond.begin(),
                                            CvtBBI->BrCond.end());
     if (TII->ReverseBranchCondition(RevCond))
-      assert(false && "Unable to reverse branch condition!");
+      llvm_unreachable("Unable to reverse branch condition!");
     TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond, dl);
     BBI.BB->addSuccessor(CvtBBI->FalseBB);
   }
@@ -1227,7 +1226,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
   BBInfo *BBI2 = &FalseBBI;
   SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
   if (TII->ReverseBranchCondition(RevCond))
-    assert(false && "Unable to reverse branch condition!");
+    llvm_unreachable("Unable to reverse branch condition!");
   SmallVector<MachineOperand, 4> *Cond1 = &BBI.BrCond;
   SmallVector<MachineOperand, 4> *Cond2 = &RevCond;
 
@@ -1281,7 +1280,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
   BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1);
   BBI2->BB->erase(BBI2->BB->begin(), DI2);
 
-  // Predicate the 'true' block after removing its branch.
+  // Remove branch from 'true' block and remove duplicated instructions.
   BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB);
   DI1 = BBI1->BB->end();
   for (unsigned i = 0; i != NumDups2; ) {
@@ -1294,9 +1293,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
       ++i;
   }
   BBI1->BB->erase(DI1, BBI1->BB->end());
-  PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, Redefs);
 
-  // Predicate the 'false' block.
+  // Remove 'false' block branch and find the last instruction to predicate.
   BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB);
   DI2 = BBI2->BB->end();
   while (NumDups2 != 0) {
@@ -1308,6 +1306,55 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
     if (!DI2->isDebugValue())
       --NumDups2;
   }
+
+  // Remember which registers would later be defined by the false block.
+  // This allows us not to predicate instructions in the true block that would
+  // later be re-defined. That is, rather than
+  //   subeq  r0, r1, #1
+  //   addne  r0, r1, #1
+  // generate:
+  //   sub    r0, r1, #1
+  //   addne  r0, r1, #1
+  SmallSet<unsigned, 4> RedefsByFalse;
+  SmallSet<unsigned, 4> ExtUses;
+  if (TII->isProfitableToUnpredicate(*BBI1->BB, *BBI2->BB)) {
+    for (MachineBasicBlock::iterator FI = BBI2->BB->begin(); FI != DI2; ++FI) {
+      if (FI->isDebugValue())
+        continue;
+      SmallVector<unsigned, 4> Defs;
+      for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) {
+        const MachineOperand &MO = FI->getOperand(i);
+        if (!MO.isReg())
+          continue;
+        unsigned Reg = MO.getReg();
+        if (!Reg)
+          continue;
+        if (MO.isDef()) {
+          Defs.push_back(Reg);
+        } else if (!RedefsByFalse.count(Reg)) {
+          // These are defined before ctrl flow reach the 'false' instructions.
+          // They cannot be modified by the 'true' instructions.
+          ExtUses.insert(Reg);
+          for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+            ExtUses.insert(*SR);
+        }
+      }
+
+      for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+        unsigned Reg = Defs[i];
+        if (!ExtUses.count(Reg)) {
+          RedefsByFalse.insert(Reg);
+          for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+            RedefsByFalse.insert(*SR);
+        }
+      }
+    }
+  }
+
+  // Predicate the 'true' block.
+  PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, Redefs, &RedefsByFalse);
+
+  // Predicate the 'false' block.
   PredicateBlock(*BBI2, DI2, *Cond2, Redefs);
 
   // Merge the true block into the entry of the diamond.
@@ -1319,7 +1366,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
   // fold the tail block in as well. Otherwise, unless it falls through to the
   // tail, add a unconditional branch to it.
   if (TailBB) {
-    BBInfo TailBBI = BBAnalysis[TailBB->getNumber()];
+    BBInfo &TailBBI = BBAnalysis[TailBB->getNumber()];
     bool CanMergeTail = !TailBBI.HasFallThrough;
     // There may still be a fall-through edge from BBI1 or BBI2 to TailBB;
     // check if there are any other predecessors besides those.
@@ -1356,15 +1403,49 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
   return true;
 }
 
+static bool MaySpeculate(const MachineInstr *MI,
+                         SmallSet<unsigned, 4> &LaterRedefs,
+                         const TargetInstrInfo *TII) {
+  bool SawStore = true;
+  if (!MI->isSafeToMove(TII, 0, SawStore))
+    return false;
+
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg)
+      continue;
+    if (MO.isDef() && !LaterRedefs.count(Reg))
+      return false;
+  }
+
+  return true;
+}
+
 /// PredicateBlock - Predicate instructions from the start of the block to the
 /// specified end with the specified condition.
 void IfConverter::PredicateBlock(BBInfo &BBI,
                                  MachineBasicBlock::iterator E,
                                  SmallVectorImpl<MachineOperand> &Cond,
-                                 SmallSet<unsigned, 4> &Redefs) {
+                                 SmallSet<unsigned, 4> &Redefs,
+                                 SmallSet<unsigned, 4> *LaterRedefs) {
+  bool AnyUnpred = false;
+  bool MaySpec = LaterRedefs != 0;
   for (MachineBasicBlock::iterator I = BBI.BB->begin(); I != E; ++I) {
     if (I->isDebugValue() || TII->isPredicated(I))
       continue;
+    // It may be possible not to predicate an instruction if it's the 'true'
+    // side of a diamond and the 'false' side may re-define the instruction's
+    // defs.
+    if (MaySpec && MaySpeculate(I, *LaterRedefs, TII)) {
+      AnyUnpred = true;
+      continue;
+    }
+    // If any instruction is predicated, then every instruction after it must
+    // be predicated.
+    MaySpec = false;
     if (!TII->PredicateInstruction(I, Cond)) {
 #ifndef NDEBUG
       dbgs() << "Unable to predicate " << *I << "!\n";
@@ -1383,6 +1464,8 @@ void IfConverter::PredicateBlock(BBInfo &BBI,
   BBI.NonPredSize = 0;
 
   ++NumIfConvBBs;
+  if (AnyUnpred)
+    ++NumUnpred;
 }
 
 /// CopyAndPredicateBlock - Copy and predicate instructions from source BB to
@@ -1395,9 +1478,8 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
 
   for (MachineBasicBlock::iterator I = FromBBI.BB->begin(),
          E = FromBBI.BB->end(); I != E; ++I) {
-    const MCInstrDesc &MCID = I->getDesc();
     // Do not copy the end of the block branches.
-    if (IgnoreBr && MCID.isBranch())
+    if (IgnoreBr && I->isBranch())
       break;
 
     MachineInstr *MI = MF.CloneMachineInstr(I);
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index 726af4696578..d5ea666e4a17 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -14,14 +14,15 @@
 
 #define DEBUG_TYPE "regalloc"
 #include "Spiller.h"
-#include "LiveRangeEdit.h"
 #include "VirtRegMap.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
@@ -173,8 +174,7 @@ private:
   void reMaterializeAll();
 
   bool coalesceStackAccess(MachineInstr *MI, unsigned Reg);
-  bool foldMemoryOperand(MachineBasicBlock::iterator MI,
-                         const SmallVectorImpl<unsigned> &Ops,
+  bool foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> >,
                          MachineInstr *LoadMI = 0);
   void insertReload(LiveInterval &NewLI, SlotIndex,
                     MachineBasicBlock::iterator MI);
@@ -578,7 +578,7 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI,
     if (unsigned SrcReg = isFullCopyOf(MI, Reg)) {
       if (isSibling(SrcReg)) {
         LiveInterval &SrcLI = LIS.getInterval(SrcReg);
-        LiveRange *SrcLR = SrcLI.getLiveRangeContaining(VNI->def.getUseIndex());
+        LiveRange *SrcLR = SrcLI.getLiveRangeContaining(VNI->def.getRegSlot(true));
         assert(SrcLR && "Copy from non-existing value");
         // Check if this COPY kills its source.
         SVI->second.KillsSource = (SrcLR->end == VNI->def);
@@ -644,16 +644,18 @@ void InlineSpiller::analyzeSiblingValues() {
       if (VNI->isUnused())
         continue;
       MachineInstr *DefMI = 0;
+      if (!VNI->isPHIDef()) {
+       DefMI = LIS.getInstructionFromIndex(VNI->def);
+       assert(DefMI && "No defining instruction");
+      }
       // Check possible sibling copies.
-      if (VNI->isPHIDef() || VNI->getCopy()) {
+      if (VNI->isPHIDef() || DefMI->isCopy()) {
         VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def);
         assert(OrigVNI && "Def outside original live range");
         if (OrigVNI->def != VNI->def)
           DefMI = traceSiblingValue(Reg, VNI, OrigVNI);
       }
-      if (!DefMI && !VNI->isPHIDef())
-        DefMI = LIS.getInstructionFromIndex(VNI->def);
-      if (DefMI && Edit->checkRematerializable(VNI, DefMI, TII, AA)) {
+      if (DefMI && Edit->checkRematerializable(VNI, DefMI, AA)) {
         DEBUG(dbgs() << "Value " << PrintReg(Reg) << ':' << VNI->id << '@'
                      << VNI->def << " may remat from " << *DefMI);
       }
@@ -665,8 +667,8 @@ void InlineSpiller::analyzeSiblingValues() {
 /// a spill at a better location.
 bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) {
   SlotIndex Idx = LIS.getInstructionIndex(CopyMI);
-  VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getDefIndex());
-  assert(VNI && VNI->def == Idx.getDefIndex() && "Not defined by copy");
+  VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getRegSlot());
+  assert(VNI && VNI->def == Idx.getRegSlot() && "Not defined by copy");
   SibValueMap::iterator I = SibValues.find(VNI);
   if (I == SibValues.end())
     return false;
@@ -726,7 +728,6 @@ bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) {
                           MRI.getRegClass(SVI.SpillReg), &TRI);
   --MII; // Point to store instruction.
   LIS.InsertMachineInstrInMaps(MII);
-  VRM.addSpillSlotUse(StackSlot, MII);
   DEBUG(dbgs() << "\thoisted: " << SVI.SpillVNI->def << '\t' << *MII);
 
   ++NumSpills;
@@ -760,7 +761,7 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
     // Find all spills and copies of VNI.
     for (MachineRegisterInfo::use_nodbg_iterator UI = MRI.use_nodbg_begin(Reg);
          MachineInstr *MI = UI.skipInstruction();) {
-      if (!MI->isCopy() && !MI->getDesc().mayStore())
+      if (!MI->isCopy() && !MI->mayStore())
         continue;
       SlotIndex Idx = LIS.getInstructionIndex(MI);
       if (LI->getVNInfoAt(Idx) != VNI)
@@ -770,9 +771,9 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
       if (unsigned DstReg = isFullCopyOf(MI, Reg)) {
         if (isSibling(DstReg)) {
            LiveInterval &DstLI = LIS.getInterval(DstReg);
-           VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getDefIndex());
+           VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getRegSlot());
            assert(DstVNI && "Missing defined value");
-           assert(DstVNI->def == Idx.getDefIndex() && "Wrong copy def slot");
+           assert(DstVNI->def == Idx.getRegSlot() && "Wrong copy def slot");
            WorkList.push_back(std::make_pair(&DstLI, DstVNI));
         }
         continue;
@@ -811,7 +812,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
       MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def);
       for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
              PE = MBB->pred_end(); PI != PE; ++PI) {
-        VNInfo *PVNI = LI->getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot());
+        VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(*PI));
         if (PVNI)
           WorkList.push_back(std::make_pair(LI, PVNI));
       }
@@ -824,7 +825,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
       continue;
     LiveInterval &SnipLI = LIS.getInterval(MI->getOperand(1).getReg());
     assert(isRegToSpill(SnipLI.reg) && "Unexpected register in copy");
-    VNInfo *SnipVNI = SnipLI.getVNInfoAt(VNI->def.getUseIndex());
+    VNInfo *SnipVNI = SnipLI.getVNInfoAt(VNI->def.getRegSlot(true));
     assert(SnipVNI && "Snippet undefined before copy");
     WorkList.push_back(std::make_pair(&SnipLI, SnipVNI));
   } while (!WorkList.empty());
@@ -833,7 +834,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
 /// reMaterializeFor - Attempt to rematerialize before MI instead of reloading.
 bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
                                      MachineBasicBlock::iterator MI) {
-  SlotIndex UseIdx = LIS.getInstructionIndex(MI).getUseIndex();
+  SlotIndex UseIdx = LIS.getInstructionIndex(MI).getRegSlot(true);
   VNInfo *ParentVNI = VirtReg.getVNInfoAt(UseIdx.getBaseIndex());
 
   if (!ParentVNI) {
@@ -855,7 +856,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
   SibValueMap::const_iterator SibI = SibValues.find(ParentVNI);
   if (SibI != SibValues.end())
     RM.OrigMI = SibI->second.DefMI;
-  if (!Edit->canRematerializeAt(RM, UseIdx, false, LIS)) {
+  if (!Edit->canRematerializeAt(RM, UseIdx, false)) {
     markValueUsed(&VirtReg, ParentVNI);
     DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << *MI);
     return false;
@@ -863,42 +864,37 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
 
   // If the instruction also writes VirtReg.reg, it had better not require the
   // same register for uses and defs.
-  bool Reads, Writes;
-  SmallVector<unsigned, 8> Ops;
-  tie(Reads, Writes) = MI->readsWritesVirtualRegister(VirtReg.reg, &Ops);
-  if (Writes) {
-    for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-      MachineOperand &MO = MI->getOperand(Ops[i]);
-      if (MO.isUse() ? MI->isRegTiedToDefOperand(Ops[i]) : MO.getSubReg()) {
-        markValueUsed(&VirtReg, ParentVNI);
-        DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI);
-        return false;
-      }
-    }
+  SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops;
+  MIBundleOperands::RegInfo RI =
+    MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops);
+  if (RI.Tied) {
+    markValueUsed(&VirtReg, ParentVNI);
+    DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI);
+    return false;
   }
 
   // Before rematerializing into a register for a single instruction, try to
   // fold a load into the instruction. That avoids allocating a new register.
-  if (RM.OrigMI->getDesc().canFoldAsLoad() &&
-      foldMemoryOperand(MI, Ops, RM.OrigMI)) {
+  if (RM.OrigMI->canFoldAsLoad() &&
+      foldMemoryOperand(Ops, RM.OrigMI)) {
     Edit->markRematerialized(RM.ParentVNI);
     ++NumFoldedLoads;
     return true;
   }
 
   // Alocate a new register for the remat.
-  LiveInterval &NewLI = Edit->createFrom(Original, LIS, VRM);
+  LiveInterval &NewLI = Edit->createFrom(Original);
   NewLI.markNotSpillable();
 
   // Finally we can rematerialize OrigMI before MI.
   SlotIndex DefIdx = Edit->rematerializeAt(*MI->getParent(), MI, NewLI.reg, RM,
-                                           LIS, TII, TRI);
+                                           TRI);
   DEBUG(dbgs() << "\tremat:  " << DefIdx << '\t'
                << *LIS.getInstructionFromIndex(DefIdx));
 
   // Replace operands
   for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-    MachineOperand &MO = MI->getOperand(Ops[i]);
+    MachineOperand &MO = MI->getOperand(Ops[i].second);
     if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) {
       MO.setReg(NewLI.reg);
       MO.setIsKill();
@@ -906,8 +902,8 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
   }
   DEBUG(dbgs() << "\t        " << UseIdx << '\t' << *MI);
 
-  VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, LIS.getVNInfoAllocator());
-  NewLI.addRange(LiveRange(DefIdx, UseIdx.getDefIndex(), DefVNI));
+  VNInfo *DefVNI = NewLI.getNextValue(DefIdx, LIS.getVNInfoAllocator());
+  NewLI.addRange(LiveRange(DefIdx, UseIdx.getRegSlot(), DefVNI));
   DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
   ++NumRemats;
   return true;
@@ -917,7 +913,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
 /// and trim the live ranges after.
 void InlineSpiller::reMaterializeAll() {
   // analyzeSiblingValues has already tested all relevant defining instructions.
-  if (!Edit->anyRematerializable(LIS, TII, AA))
+  if (!Edit->anyRematerializable(AA))
     return;
 
   UsedValues.clear();
@@ -929,7 +925,7 @@ void InlineSpiller::reMaterializeAll() {
     LiveInterval &LI = LIS.getInterval(Reg);
     for (MachineRegisterInfo::use_nodbg_iterator
          RI = MRI.use_nodbg_begin(Reg);
-         MachineInstr *MI = RI.skipInstruction();)
+         MachineInstr *MI = RI.skipBundle();)
       anyRemat |= reMaterializeFor(LI, MI);
   }
   if (!anyRemat)
@@ -958,7 +954,7 @@ void InlineSpiller::reMaterializeAll() {
   if (DeadDefs.empty())
     return;
   DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n");
-  Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII);
+  Edit->eliminateDeadDefs(DeadDefs, RegsToSpill);
 
   // Get rid of deleted and empty intervals.
   for (unsigned i = RegsToSpill.size(); i != 0; --i) {
@@ -970,7 +966,7 @@ void InlineSpiller::reMaterializeAll() {
     LiveInterval &LI = LIS.getInterval(Reg);
     if (!LI.empty())
       continue;
-    Edit->eraseVirtReg(Reg, LIS);
+    Edit->eraseVirtReg(Reg);
     RegsToSpill.erase(RegsToSpill.begin() + (i - 1));
   }
   DEBUG(dbgs() << RegsToSpill.size() << " registers to spill after remat.\n");
@@ -1008,23 +1004,35 @@ bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) {
   return true;
 }
 
-/// foldMemoryOperand - Try folding stack slot references in Ops into MI.
-/// @param MI     Instruction using or defining the current register.
-/// @param Ops    Operand indices from readsWritesVirtualRegister().
+/// foldMemoryOperand - Try folding stack slot references in Ops into their
+/// instructions.
+///
+/// @param Ops    Operand indices from analyzeVirtReg().
 /// @param LoadMI Load instruction to use instead of stack slot when non-null.
-/// @return       True on success, and MI will be erased.
-bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI,
-                                      const SmallVectorImpl<unsigned> &Ops,
-                                      MachineInstr *LoadMI) {
+/// @return       True on success.
+bool InlineSpiller::
+foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
+                  MachineInstr *LoadMI) {
+  if (Ops.empty())
+    return false;
+  // Don't attempt folding in bundles.
+  MachineInstr *MI = Ops.front().first;
+  if (Ops.back().first != MI || MI->isBundled())
+    return false;
+
   bool WasCopy = MI->isCopy();
+  unsigned ImpReg = 0;
+
   // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied
   // operands.
   SmallVector<unsigned, 8> FoldOps;
   for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-    unsigned Idx = Ops[i];
+    unsigned Idx = Ops[i].second;
     MachineOperand &MO = MI->getOperand(Idx);
-    if (MO.isImplicit())
+    if (MO.isImplicit()) {
+      ImpReg = MO.getReg();
       continue;
+    }
     // FIXME: Teach targets to deal with subregs.
     if (MO.getSubReg())
       return false;
@@ -1042,13 +1050,24 @@ bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI,
   if (!FoldMI)
     return false;
   LIS.ReplaceMachineInstrInMaps(MI, FoldMI);
-  if (!LoadMI)
-    VRM.addSpillSlotUse(StackSlot, FoldMI);
   MI->eraseFromParent();
-  DEBUG(dbgs() << "\tfolded: " << *FoldMI);
+
+  // TII.foldMemoryOperand may have left some implicit operands on the
+  // instruction.  Strip them.
+  if (ImpReg)
+    for (unsigned i = FoldMI->getNumOperands(); i; --i) {
+      MachineOperand &MO = FoldMI->getOperand(i - 1);
+      if (!MO.isReg() || !MO.isImplicit())
+        break;
+      if (MO.getReg() == ImpReg)
+        FoldMI->RemoveOperand(i - 1);
+    }
+
+  DEBUG(dbgs() << "\tfolded:  " << LIS.getInstructionIndex(FoldMI) << '\t'
+               << *FoldMI);
   if (!WasCopy)
     ++NumFolded;
-  else if (Ops.front() == 0)
+  else if (Ops.front().second == 0)
     ++NumSpills;
   else
     ++NumReloads;
@@ -1063,11 +1082,9 @@ void InlineSpiller::insertReload(LiveInterval &NewLI,
   TII.loadRegFromStackSlot(MBB, MI, NewLI.reg, StackSlot,
                            MRI.getRegClass(NewLI.reg), &TRI);
   --MI; // Point to load instruction.
-  SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getDefIndex();
-  VRM.addSpillSlotUse(StackSlot, MI);
+  SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot();
   DEBUG(dbgs() << "\treload:  " << LoadIdx << '\t' << *MI);
-  VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, 0,
-                                       LIS.getVNInfoAllocator());
+  VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, LIS.getVNInfoAllocator());
   NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI));
   ++NumReloads;
 }
@@ -1079,10 +1096,9 @@ void InlineSpiller::insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI,
   TII.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, StackSlot,
                           MRI.getRegClass(NewLI.reg), &TRI);
   --MI; // Point to store instruction.
-  SlotIndex StoreIdx = LIS.InsertMachineInstrInMaps(MI).getDefIndex();
-  VRM.addSpillSlotUse(StackSlot, MI);
+  SlotIndex StoreIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot();
   DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI);
-  VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, LIS.getVNInfoAllocator());
+  VNInfo *StoreVNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator());
   NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI));
   ++NumSpills;
 }
@@ -1093,8 +1109,8 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
   LiveInterval &OldLI = LIS.getInterval(Reg);
 
   // Iterate over instructions using Reg.
-  for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Reg);
-       MachineInstr *MI = RI.skipInstruction();) {
+  for (MachineRegisterInfo::reg_iterator RegI = MRI.reg_begin(Reg);
+       MachineInstr *MI = RegI.skipBundle();) {
 
     // Debug values are not allowed to affect codegen.
     if (MI->isDebugValue()) {
@@ -1123,14 +1139,14 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
       continue;
 
     // Analyze instruction.
-    bool Reads, Writes;
-    SmallVector<unsigned, 8> Ops;
-    tie(Reads, Writes) = MI->readsWritesVirtualRegister(Reg, &Ops);
+    SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops;
+    MIBundleOperands::RegInfo RI =
+      MIBundleOperands(MI).analyzeVirtReg(Reg, &Ops);
 
     // Find the slot index where this instruction reads and writes OldLI.
     // This is usually the def slot, except for tied early clobbers.
-    SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex();
-    if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getUseIndex()))
+    SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
+    if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getRegSlot(true)))
       if (SlotIndex::isSameInstr(Idx, VNI->def))
         Idx = VNI->def;
 
@@ -1143,7 +1159,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
         SnippetCopies.insert(MI);
         continue;
       }
-      if (Writes) {
+      if (RI.Writes) {
         // Hoist the spill of a sib-reg copy.
         if (hoistSpill(OldLI, MI)) {
           // This COPY is now dead, the value is already in the stack slot.
@@ -1160,24 +1176,24 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
     }
 
     // Attempt to fold memory ops.
-    if (foldMemoryOperand(MI, Ops))
+    if (foldMemoryOperand(Ops))
       continue;
 
     // Allocate interval around instruction.
     // FIXME: Infer regclass from instruction alone.
-    LiveInterval &NewLI = Edit->createFrom(Reg, LIS, VRM);
+    LiveInterval &NewLI = Edit->createFrom(Reg);
     NewLI.markNotSpillable();
 
-    if (Reads)
+    if (RI.Reads)
       insertReload(NewLI, Idx, MI);
 
     // Rewrite instruction operands.
     bool hasLiveDef = false;
     for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-      MachineOperand &MO = MI->getOperand(Ops[i]);
+      MachineOperand &MO = Ops[i].first->getOperand(Ops[i].second);
       MO.setReg(NewLI.reg);
       if (MO.isUse()) {
-        if (!MI->isRegTiedToDefOperand(Ops[i]))
+        if (!Ops[i].first->isRegTiedToDefOperand(Ops[i].second))
           MO.setIsKill();
       } else {
         if (!MO.isDead())
@@ -1187,15 +1203,15 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
     DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI);
 
     // FIXME: Use a second vreg if instruction has no tied ops.
-    if (Writes) {
-     if (hasLiveDef)
-      insertSpill(NewLI, OldLI, Idx, MI);
-     else {
-       // This instruction defines a dead value.  We don't need to spill it,
-       // but do create a live range for the dead value.
-       VNInfo *VNI = NewLI.getNextValue(Idx, 0, LIS.getVNInfoAllocator());
-       NewLI.addRange(LiveRange(Idx, Idx.getNextSlot(), VNI));
-     }
+    if (RI.Writes) {
+      if (hasLiveDef)
+        insertSpill(NewLI, OldLI, Idx, MI);
+      else {
+        // This instruction defines a dead value.  We don't need to spill it,
+        // but do create a live range for the dead value.
+        VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator());
+        NewLI.addRange(LiveRange(Idx, Idx.getDeadSlot(), VNI));
+      }
     }
 
     DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
@@ -1208,7 +1224,7 @@ void InlineSpiller::spillAll() {
   if (StackSlot == VirtRegMap::NO_STACK_SLOT) {
     StackSlot = VRM.assignVirt2StackSlot(Original);
     StackInt = &LSS.getOrCreateInterval(StackSlot, MRI.getRegClass(Original));
-    StackInt->getNextValue(SlotIndex(), 0, LSS.getVNInfoAllocator());
+    StackInt->getNextValue(SlotIndex(), LSS.getVNInfoAllocator());
   } else
     StackInt = &LSS.getInterval(StackSlot);
 
@@ -1228,7 +1244,7 @@ void InlineSpiller::spillAll() {
   // Hoisted spills may cause dead code.
   if (!DeadDefs.empty()) {
     DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n");
-    Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII);
+    Edit->eliminateDeadDefs(DeadDefs, RegsToSpill);
   }
 
   // Finally delete the SnippetCopies.
@@ -1237,7 +1253,6 @@ void InlineSpiller::spillAll() {
          MachineInstr *MI = RI.skipInstruction();) {
       assert(SnippetCopies.count(MI) && "Remaining use wasn't a snippet copy");
       // FIXME: Do this with a LiveRangeEdit callback.
-      VRM.RemoveMachineInstrFromMaps(MI);
       LIS.RemoveMachineInstrFromMaps(MI);
       MI->eraseFromParent();
     }
@@ -1245,7 +1260,7 @@ void InlineSpiller::spillAll() {
 
   // Delete all spilled registers.
   for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i)
-    Edit->eraseVirtReg(RegsToSpill[i], LIS);
+    Edit->eraseVirtReg(RegsToSpill[i]);
 }
 
 void InlineSpiller::spill(LiveRangeEdit &edit) {
@@ -1274,5 +1289,5 @@ void InlineSpiller::spill(LiveRangeEdit &edit) {
   if (!RegsToSpill.empty())
     spillAll();
 
-  Edit->calculateRegClassAndHint(MF, LIS, Loops);
+  Edit->calculateRegClassAndHint(MF, Loops);
 }
diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp
index 29b47bd67ece..8368b58880a3 100644
--- a/lib/CodeGen/InterferenceCache.cpp
+++ b/lib/CodeGen/InterferenceCache.cpp
@@ -1,4 +1,4 @@
-//===-- InterferenceCache.h - Caching per-block interference ---*- C++ -*--===//
+//===-- InterferenceCache.cpp - Caching per-block interference ---------*--===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,6 +15,7 @@
 #include "InterferenceCache.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
 
 using namespace llvm;
 
@@ -24,13 +25,14 @@ InterferenceCache::BlockInterference InterferenceCache::Cursor::NoInterference;
 void InterferenceCache::init(MachineFunction *mf,
                              LiveIntervalUnion *liuarray,
                              SlotIndexes *indexes,
+                             LiveIntervals *lis,
                              const TargetRegisterInfo *tri) {
   MF = mf;
   LIUArray = liuarray;
   TRI = tri;
   PhysRegEntries.assign(TRI->getNumRegs(), 0);
   for (unsigned i = 0; i != CacheEntries; ++i)
-    Entries[i].clear(mf, indexes);
+    Entries[i].clear(mf, indexes, lis);
 }
 
 InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) {
@@ -78,7 +80,7 @@ void InterferenceCache::Entry::reset(unsigned physReg,
   PhysReg = physReg;
   Blocks.resize(MF->getNumBlockIDs());
   Aliases.clear();
-  for (const unsigned *AS = TRI->getOverlaps(PhysReg); *AS; ++AS) {
+  for (const uint16_t *AS = TRI->getOverlaps(PhysReg); *AS; ++AS) {
     LiveIntervalUnion *LIU = LIUArray + *AS;
     Aliases.push_back(std::make_pair(LIU, LIU->getTag()));
   }
@@ -94,7 +96,7 @@ void InterferenceCache::Entry::reset(unsigned physReg,
 bool InterferenceCache::Entry::valid(LiveIntervalUnion *LIUArray,
                                      const TargetRegisterInfo *TRI) {
   unsigned i = 0, e = Aliases.size();
-  for (const unsigned *AS = TRI->getOverlaps(PhysReg); *AS; ++AS, ++i) {
+  for (const uint16_t *AS = TRI->getOverlaps(PhysReg); *AS; ++AS, ++i) {
     LiveIntervalUnion *LIU = LIUArray + *AS;
     if (i == e ||  Aliases[i].first != LIU)
       return false;
@@ -121,6 +123,8 @@ void InterferenceCache::Entry::update(unsigned MBBNum) {
 
   MachineFunction::const_iterator MFI = MF->getBlockNumbered(MBBNum);
   BlockInterference *BI = &Blocks[MBBNum];
+  ArrayRef<SlotIndex> RegMaskSlots;
+  ArrayRef<const uint32_t*> RegMaskBits;
   for (;;) {
     BI->Tag = Tag;
     BI->First = BI->Last = SlotIndex();
@@ -137,6 +141,18 @@ void InterferenceCache::Entry::update(unsigned MBBNum) {
         BI->First = StartI;
     }
 
+    // Also check for register mask interference.
+    RegMaskSlots = LIS->getRegMaskSlotsInBlock(MBBNum);
+    RegMaskBits = LIS->getRegMaskBitsInBlock(MBBNum);
+    SlotIndex Limit = BI->First.isValid() ? BI->First : Stop;
+    for (unsigned i = 0, e = RegMaskSlots.size();
+         i != e && RegMaskSlots[i] < Limit; ++i)
+      if (MachineOperand::clobbersPhysReg(RegMaskBits[i], PhysReg)) {
+        // Register mask i clobbers PhysReg before the LIU interference.
+        BI->First = RegMaskSlots[i];
+        break;
+      }
+
     PrevPos = Stop;
     if (BI->First.isValid())
       break;
@@ -166,4 +182,15 @@ void InterferenceCache::Entry::update(unsigned MBBNum) {
     if (Backup)
       ++I;
   }
+
+  // Also check for register mask interference.
+  SlotIndex Limit = BI->Last.isValid() ? BI->Last : Start;
+  for (unsigned i = RegMaskSlots.size();
+       i && RegMaskSlots[i-1].getDeadSlot() > Limit; --i)
+    if (MachineOperand::clobbersPhysReg(RegMaskBits[i-1], PhysReg)) {
+      // Register mask i-1 clobbers PhysReg after the LIU interference.
+      // Model the regmask clobber as a dead def.
+      BI->Last = RegMaskSlots[i-1].getDeadSlot();
+      break;
+    }
 }
diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h
index 4df0a9e5c393..485a325aa146 100644
--- a/lib/CodeGen/InterferenceCache.h
+++ b/lib/CodeGen/InterferenceCache.h
@@ -18,10 +18,11 @@
 
 namespace llvm {
 
+class LiveIntervals;
+
 class InterferenceCache {
   const TargetRegisterInfo *TRI;
   LiveIntervalUnion *LIUArray;
-  SlotIndexes *Indexes;
   MachineFunction *MF;
 
   /// BlockInterference - information about the interference in a single basic
@@ -52,6 +53,9 @@ class InterferenceCache {
     /// Indexes - Mapping block numbers to SlotIndex ranges.
     SlotIndexes *Indexes;
 
+    /// LIS - Used for accessing register mask interference maps.
+    LiveIntervals *LIS;
+
     /// PrevPos - The previous position the iterators were moved to.
     SlotIndex PrevPos;
 
@@ -71,13 +75,14 @@ class InterferenceCache {
     void update(unsigned MBBNum);
 
   public:
-    Entry() : PhysReg(0), Tag(0), RefCount(0), Indexes(0) {}
+    Entry() : PhysReg(0), Tag(0), RefCount(0), Indexes(0), LIS(0) {}
 
-    void clear(MachineFunction *mf, SlotIndexes *indexes) {
+    void clear(MachineFunction *mf, SlotIndexes *indexes, LiveIntervals *lis) {
       assert(!hasRefs() && "Cannot clear cache entry with references");
       PhysReg = 0;
       MF = mf;
       Indexes = indexes;
+      LIS = lis;
     }
 
     unsigned getPhysReg() const { return PhysReg; }
@@ -124,10 +129,10 @@ class InterferenceCache {
   Entry *get(unsigned PhysReg);
 
 public:
-  InterferenceCache() : TRI(0), LIUArray(0), Indexes(0), MF(0), RoundRobin(0) {}
+  InterferenceCache() : TRI(0), LIUArray(0), MF(0), RoundRobin(0) {}
 
   /// init - Prepare cache for a new function.
-  void init(MachineFunction*, LiveIntervalUnion*, SlotIndexes*,
+  void init(MachineFunction*, LiveIntervalUnion*, SlotIndexes*, LiveIntervals*,
             const TargetRegisterInfo *);
 
   /// getMaxCursors - Return the maximum number of concurrent cursors that can
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 0f92c2d06bdd..a9ca42f69b97 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -448,11 +448,6 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
   case Intrinsic::dbg_declare:
     break;    // Simply strip out debugging intrinsics
 
-  case Intrinsic::eh_exception:
-  case Intrinsic::eh_selector:
-    CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
-    break;
-
   case Intrinsic::eh_typeid_for:
     // Return something different to eh_selector.
     CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
diff --git a/lib/CodeGen/JITCodeEmitter.cpp b/lib/CodeGen/JITCodeEmitter.cpp
new file mode 100644
index 000000000000..96a53892f6d3
--- /dev/null
+++ b/lib/CodeGen/JITCodeEmitter.cpp
@@ -0,0 +1,14 @@
+//===-- llvm/CodeGen/JITCodeEmitter.cpp - Code emission --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/JITCodeEmitter.h"
+
+using namespace llvm;
+
+void JITCodeEmitter::anchor() { }
diff --git a/lib/CodeGen/LLVMBuild.txt b/lib/CodeGen/LLVMBuild.txt
new file mode 100644
index 000000000000..fee0347ea659
--- /dev/null
+++ b/lib/CodeGen/LLVMBuild.txt
@@ -0,0 +1,25 @@
+;===- ./lib/CodeGen/LLVMBuild.txt ------------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = AsmPrinter SelectionDAG
+
+[component_0]
+type = Library
+name = CodeGen
+parent = Libraries
+required_libraries = Analysis Core MC Scalar Support Target TransformUtils
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 187147a3e252..a1f479a4275f 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -11,82 +11,42 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Scalar.h"
 #include "llvm/PassManager.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Assembly/PrintModulePass.h"
 #include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/GCStrategy.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
 #include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
-namespace llvm {
-  bool EnableFastISel;
-}
+// Enable or disable FastISel. Both options are needed, because
+// FastISel is enabled by default with -fast, and we wish to be
+// able to enable or disable fast-isel independently from -O0.
+static cl::opt<cl::boolOrDefault>
+EnableFastISelOption("fast-isel", cl::Hidden,
+  cl::desc("Enable the \"fast\" instruction selector"));
 
-static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden,
-    cl::desc("Disable Post Regalloc"));
-static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden,
-    cl::desc("Disable branch folding"));
-static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden,
-    cl::desc("Disable tail duplication"));
-static cl::opt<bool> DisableEarlyTailDup("disable-early-taildup", cl::Hidden,
-    cl::desc("Disable pre-register allocation tail duplication"));
-static cl::opt<bool> DisableCodePlace("disable-code-place", cl::Hidden,
-    cl::desc("Disable code placement"));
-static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden,
-    cl::desc("Disable Stack Slot Coloring"));
-static cl::opt<bool> DisableMachineDCE("disable-machine-dce", cl::Hidden,
-    cl::desc("Disable Machine Dead Code Elimination"));
-static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden,
-    cl::desc("Disable Machine LICM"));
-static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden,
-    cl::desc("Disable Machine Common Subexpression Elimination"));
-static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm",
-    cl::Hidden,
-    cl::desc("Disable Machine LICM"));
-static cl::opt<bool> DisableMachineSink("disable-machine-sink", cl::Hidden,
-    cl::desc("Disable Machine Sinking"));
-static cl::opt<bool> DisableLSR("disable-lsr", cl::Hidden,
-    cl::desc("Disable Loop Strength Reduction Pass"));
-static cl::opt<bool> DisableCGP("disable-cgp", cl::Hidden,
-    cl::desc("Disable Codegen Prepare"));
-static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden,
-    cl::desc("Print LLVM IR produced by the loop-reduce pass"));
-static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden,
-    cl::desc("Print LLVM IR input to isel pass"));
-static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
-    cl::desc("Dump garbage collector data"));
 static cl::opt<bool> ShowMCEncoding("show-mc-encoding", cl::Hidden,
     cl::desc("Show encoding in .s output"));
 static cl::opt<bool> ShowMCInst("show-mc-inst", cl::Hidden,
     cl::desc("Show instruction structure in .s output"));
-static cl::opt<bool> EnableMCLogging("enable-mc-api-logging", cl::Hidden,
-    cl::desc("Enable MC API logging"));
-static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
-    cl::desc("Verify generated machine code"),
-    cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
 
 static cl::opt<cl::boolOrDefault>
 AsmVerbose("asm-verbose", cl::desc("Add comments to directives."),
@@ -94,25 +54,20 @@ AsmVerbose("asm-verbose", cl::desc("Add comments to directives."),
 
 static bool getVerboseAsm() {
   switch (AsmVerbose) {
-  default:
   case cl::BOU_UNSET: return TargetMachine::getAsmVerbosityDefault();
   case cl::BOU_TRUE:  return true;
   case cl::BOU_FALSE: return false;
   }
+  llvm_unreachable("Invalid verbose asm state");
 }
 
-// Enable or disable FastISel. Both options are needed, because
-// FastISel is enabled by default with -fast, and we wish to be
-// able to enable or disable fast-isel independently from -O0.
-static cl::opt<cl::boolOrDefault>
-EnableFastISelOption("fast-isel", cl::Hidden,
-  cl::desc("Enable the \"fast\" instruction selector"));
-
 LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple,
                                      StringRef CPU, StringRef FS,
-                                     Reloc::Model RM, CodeModel::Model CM)
-  : TargetMachine(T, Triple, CPU, FS) {
-  CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM);
+                                     TargetOptions Options,
+                                     Reloc::Model RM, CodeModel::Model CM,
+                                     CodeGenOpt::Level OL)
+  : TargetMachine(T, Triple, CPU, FS, Options) {
+  CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM, OL);
   AsmInfo = T.createMCAsmInfo(Triple);
   // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0,
   // and if the old one gets included then MCAsmInfo will be NULL and
@@ -123,16 +78,88 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple,
          "and that InitializeAllTargetMCs() is being invoked!");
 }
 
+/// Turn exception handling constructs into something the code generators can
+/// handle.
+static void addPassesToHandleExceptions(TargetMachine *TM,
+                                        PassManagerBase &PM) {
+  switch (TM->getMCAsmInfo()->getExceptionHandlingType()) {
+  case ExceptionHandling::SjLj:
+    // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both
+    // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise,
+    // catch info can get misplaced when a selector ends up more than one block
+    // removed from the parent invoke(s). This could happen when a landing
+    // pad is shared by multiple invokes and is also a target of a normal
+    // edge from elsewhere.
+    PM.add(createSjLjEHPreparePass(TM->getTargetLowering()));
+    // FALLTHROUGH
+  case ExceptionHandling::DwarfCFI:
+  case ExceptionHandling::ARM:
+  case ExceptionHandling::Win64:
+    PM.add(createDwarfEHPass(TM));
+    break;
+  case ExceptionHandling::None:
+    PM.add(createLowerInvokePass(TM->getTargetLowering()));
+
+    // The lower invoke pass may create unreachable code. Remove it.
+    PM.add(createUnreachableBlockEliminationPass());
+    break;
+  }
+}
+
+/// addPassesToX helper drives creation and initialization of TargetPassConfig.
+static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
+                                          PassManagerBase &PM,
+                                          bool DisableVerify) {
+  // Targets may override createPassConfig to provide a target-specific sublass.
+  TargetPassConfig *PassConfig = TM->createPassConfig(PM);
+
+  // Set PassConfig options provided by TargetMachine.
+  PassConfig->setDisableVerify(DisableVerify);
+
+  PM.add(PassConfig);
+
+  PassConfig->addIRPasses();
+
+  addPassesToHandleExceptions(TM, PM);
+
+  PassConfig->addISelPrepare();
+
+  // Install a MachineModuleInfo class, which is an immutable pass that holds
+  // all the per-module stuff we're generating, including MCContext.
+  MachineModuleInfo *MMI =
+    new MachineModuleInfo(*TM->getMCAsmInfo(), *TM->getRegisterInfo(),
+                          &TM->getTargetLowering()->getObjFileLowering());
+  PM.add(MMI);
+  MCContext *Context = &MMI->getContext(); // Return the MCContext by-ref.
+
+  // Set up a MachineFunction for the rest of CodeGen to work on.
+  PM.add(new MachineFunctionAnalysis(*TM));
+
+  // Enable FastISel with -fast, but allow that to be overridden.
+  if (EnableFastISelOption == cl::BOU_TRUE ||
+      (TM->getOptLevel() == CodeGenOpt::None &&
+       EnableFastISelOption != cl::BOU_FALSE))
+    TM->setFastISel(true);
+
+  // Ask the target for an isel.
+  if (PassConfig->addInstSelector())
+    return NULL;
+
+  PassConfig->addMachinePasses();
+
+  PassConfig->setInitialized();
+
+  return Context;
+}
+
 bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
                                             formatted_raw_ostream &Out,
                                             CodeGenFileType FileType,
-                                            CodeGenOpt::Level OptLevel,
                                             bool DisableVerify) {
   // Add common CodeGen passes.
-  MCContext *Context = 0;
-  if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Context))
+  MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify);
+  if (!Context)
     return true;
-  assert(Context != 0 && "Failed to get MCContext");
 
   if (hasMCSaveTempLabels())
     Context->setAllowTemporaryLabels(false);
@@ -142,10 +169,11 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
   OwningPtr<MCStreamer> AsmStreamer;
 
   switch (FileType) {
-  default: return true;
   case CGFT_AssemblyFile: {
     MCInstPrinter *InstPrinter =
-      getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, STI);
+      getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI,
+                                      *getInstrInfo(),
+                                      Context->getRegisterInfo(), STI);
 
     // Create a code emitter if asked to show the encoding.
     MCCodeEmitter *MCE = 0;
@@ -160,6 +188,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
                                                   getVerboseAsm(),
                                                   hasMCUseLoc(),
                                                   hasMCUseCFI(),
+                                                  hasMCUseDwarfDirectory(),
                                                   InstPrinter,
                                                   MCE, MAB,
                                                   ShowMCInst);
@@ -189,9 +218,6 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
     break;
   }
 
-  if (EnableMCLogging)
-    AsmStreamer.reset(createLoggingStreamer(AsmStreamer.take(), errs()));
-
   // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
   FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
   if (Printer == 0)
@@ -214,14 +240,13 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
 ///
 bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
                                                    JITCodeEmitter &JCE,
-                                                   CodeGenOpt::Level OptLevel,
                                                    bool DisableVerify) {
   // Add common CodeGen passes.
-  MCContext *Ctx = 0;
-  if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx))
+  MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify);
+  if (!Context)
     return true;
 
-  addCodeEmitter(PM, OptLevel, JCE);
+  addCodeEmitter(PM, JCE);
   PM.add(createGCInfoDeleter());
 
   return false; // success!
@@ -235,10 +260,10 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
 bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
                                           MCContext *&Ctx,
                                           raw_ostream &Out,
-                                          CodeGenOpt::Level OptLevel,
                                           bool DisableVerify) {
   // Add common CodeGen passes.
-  if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx))
+  Ctx = addPassesToGenerateCode(this, PM, DisableVerify);
+  if (!Ctx)
     return true;
 
   if (hasMCSaveTempLabels())
@@ -247,7 +272,8 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
   // Create the code emitter for the target if it exists.  If not, .o file
   // emission fails.
   const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
-  MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(),STI, *Ctx);
+  MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(),STI,
+                                                       *Ctx);
   MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple());
   if (MCE == 0 || MAB == 0)
     return true;
@@ -271,227 +297,3 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
 
   return false; // success!
 }
-
-static void printNoVerify(PassManagerBase &PM, const char *Banner) {
-  if (PrintMachineCode)
-    PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
-}
-
-static void printAndVerify(PassManagerBase &PM,
-                           const char *Banner) {
-  if (PrintMachineCode)
-    PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
-
-  if (VerifyMachineCode)
-    PM.add(createMachineVerifierPass(Banner));
-}
-
-/// addCommonCodeGenPasses - Add standard LLVM codegen passes used for both
-/// emitting to assembly files or machine code output.
-///
-bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
-                                               CodeGenOpt::Level OptLevel,
-                                               bool DisableVerify,
-                                               MCContext *&OutContext) {
-  // Standard LLVM-Level Passes.
-
-  // Basic AliasAnalysis support.
-  // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
-  // BasicAliasAnalysis wins if they disagree. This is intended to help
-  // support "obvious" type-punning idioms.
-  PM.add(createTypeBasedAliasAnalysisPass());
-  PM.add(createBasicAliasAnalysisPass());
-
-  // Before running any passes, run the verifier to determine if the input
-  // coming from the front-end and/or optimizer is valid.
-  if (!DisableVerify)
-    PM.add(createVerifierPass());
-
-  // Run loop strength reduction before anything else.
-  if (OptLevel != CodeGenOpt::None && !DisableLSR) {
-    PM.add(createLoopStrengthReducePass(getTargetLowering()));
-    if (PrintLSR)
-      PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
-  }
-
-  PM.add(createGCLoweringPass());
-
-  // Make sure that no unreachable blocks are instruction selected.
-  PM.add(createUnreachableBlockEliminationPass());
-
-  // Turn exception handling constructs into something the code generators can
-  // handle.
-  switch (getMCAsmInfo()->getExceptionHandlingType()) {
-  case ExceptionHandling::SjLj:
-    // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both
-    // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise,
-    // catch info can get misplaced when a selector ends up more than one block
-    // removed from the parent invoke(s). This could happen when a landing
-    // pad is shared by multiple invokes and is also a target of a normal
-    // edge from elsewhere.
-    PM.add(createSjLjEHPass(getTargetLowering()));
-    // FALLTHROUGH
-  case ExceptionHandling::DwarfCFI:
-  case ExceptionHandling::ARM:
-  case ExceptionHandling::Win64:
-    PM.add(createDwarfEHPass(this));
-    break;
-  case ExceptionHandling::None:
-    PM.add(createLowerInvokePass(getTargetLowering()));
-
-    // The lower invoke pass may create unreachable code. Remove it.
-    PM.add(createUnreachableBlockEliminationPass());
-    break;
-  }
-
-  if (OptLevel != CodeGenOpt::None && !DisableCGP)
-    PM.add(createCodeGenPreparePass(getTargetLowering()));
-
-  PM.add(createStackProtectorPass(getTargetLowering()));
-
-  addPreISel(PM, OptLevel);
-
-  if (PrintISelInput)
-    PM.add(createPrintFunctionPass("\n\n"
-                                   "*** Final LLVM Code input to ISel ***\n",
-                                   &dbgs()));
-
-  // All passes which modify the LLVM IR are now complete; run the verifier
-  // to ensure that the IR is valid.
-  if (!DisableVerify)
-    PM.add(createVerifierPass());
-
-  // Standard Lower-Level Passes.
-
-  // Install a MachineModuleInfo class, which is an immutable pass that holds
-  // all the per-module stuff we're generating, including MCContext.
-  MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(),
-                                                 *getRegisterInfo(),
-                                     &getTargetLowering()->getObjFileLowering());
-  PM.add(MMI);
-  OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref.
-
-  // Set up a MachineFunction for the rest of CodeGen to work on.
-  PM.add(new MachineFunctionAnalysis(*this, OptLevel));
-
-  // Enable FastISel with -fast, but allow that to be overridden.
-  if (EnableFastISelOption == cl::BOU_TRUE ||
-      (OptLevel == CodeGenOpt::None && EnableFastISelOption != cl::BOU_FALSE))
-    EnableFastISel = true;
-
-  // Ask the target for an isel.
-  if (addInstSelector(PM, OptLevel))
-    return true;
-
-  // Print the instruction selected machine code...
-  printAndVerify(PM, "After Instruction Selection");
-
-  // Expand pseudo-instructions emitted by ISel.
-  PM.add(createExpandISelPseudosPass());
-
-  // Pre-ra tail duplication.
-  if (OptLevel != CodeGenOpt::None && !DisableEarlyTailDup) {
-    PM.add(createTailDuplicatePass(true));
-    printAndVerify(PM, "After Pre-RegAlloc TailDuplicate");
-  }
-
-  // Optimize PHIs before DCE: removing dead PHI cycles may make more
-  // instructions dead.
-  if (OptLevel != CodeGenOpt::None)
-    PM.add(createOptimizePHIsPass());
-
-  // If the target requests it, assign local variables to stack slots relative
-  // to one another and simplify frame index references where possible.
-  PM.add(createLocalStackSlotAllocationPass());
-
-  if (OptLevel != CodeGenOpt::None) {
-    // With optimization, dead code should already be eliminated. However
-    // there is one known exception: lowered code for arguments that are only
-    // used by tail calls, where the tail calls reuse the incoming stack
-    // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
-    if (!DisableMachineDCE)
-      PM.add(createDeadMachineInstructionElimPass());
-    printAndVerify(PM, "After codegen DCE pass");
-
-    if (!DisableMachineLICM)
-      PM.add(createMachineLICMPass());
-    if (!DisableMachineCSE)
-      PM.add(createMachineCSEPass());
-    if (!DisableMachineSink)
-      PM.add(createMachineSinkingPass());
-    printAndVerify(PM, "After Machine LICM, CSE and Sinking passes");
-
-    PM.add(createPeepholeOptimizerPass());
-    printAndVerify(PM, "After codegen peephole optimization pass");
-  }
-
-  // Run pre-ra passes.
-  if (addPreRegAlloc(PM, OptLevel))
-    printAndVerify(PM, "After PreRegAlloc passes");
-
-  // Perform register allocation.
-  PM.add(createRegisterAllocator(OptLevel));
-  printAndVerify(PM, "After Register Allocation");
-
-  // Perform stack slot coloring and post-ra machine LICM.
-  if (OptLevel != CodeGenOpt::None) {
-    // FIXME: Re-enable coloring with register when it's capable of adding
-    // kill markers.
-    if (!DisableSSC)
-      PM.add(createStackSlotColoringPass(false));
-
-    // Run post-ra machine LICM to hoist reloads / remats.
-    if (!DisablePostRAMachineLICM)
-      PM.add(createMachineLICMPass(false));
-
-    printAndVerify(PM, "After StackSlotColoring and postra Machine LICM");
-  }
-
-  // Run post-ra passes.
-  if (addPostRegAlloc(PM, OptLevel))
-    printAndVerify(PM, "After PostRegAlloc passes");
-
-  PM.add(createExpandPostRAPseudosPass());
-  printAndVerify(PM, "After ExpandPostRAPseudos");
-
-  // Insert prolog/epilog code.  Eliminate abstract frame index references...
-  PM.add(createPrologEpilogCodeInserter());
-  printAndVerify(PM, "After PrologEpilogCodeInserter");
-
-  // Run pre-sched2 passes.
-  if (addPreSched2(PM, OptLevel))
-    printAndVerify(PM, "After PreSched2 passes");
-
-  // Second pass scheduler.
-  if (OptLevel != CodeGenOpt::None && !DisablePostRA) {
-    PM.add(createPostRAScheduler(OptLevel));
-    printAndVerify(PM, "After PostRAScheduler");
-  }
-
-  // Branch folding must be run after regalloc and prolog/epilog insertion.
-  if (OptLevel != CodeGenOpt::None && !DisableBranchFold) {
-    PM.add(createBranchFoldingPass(getEnableTailMergeDefault()));
-    printNoVerify(PM, "After BranchFolding");
-  }
-
-  // Tail duplication.
-  if (OptLevel != CodeGenOpt::None && !DisableTailDuplicate) {
-    PM.add(createTailDuplicatePass(false));
-    printNoVerify(PM, "After TailDuplicate");
-  }
-
-  PM.add(createGCMachineCodeAnalysisPass());
-
-  if (PrintGCInfo)
-    PM.add(createGCInfoPrinter(dbgs()));
-
-  if (OptLevel != CodeGenOpt::None && !DisableCodePlace) {
-    PM.add(createCodePlacementOptPass());
-    printNoVerify(PM, "After CodePlacementOpt");
-  }
-
-  if (addPreEmitPass(PM, OptLevel))
-    printNoVerify(PM, "After PreEmit passes");
-
-  return false;
-}
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
index 0eb009ddac29..deab05a412c9 100644
--- a/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -46,7 +46,7 @@ bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
 
   // Finally, just to provide a stable ordering, use the node number as a
   // deciding factor.
-  return LHSNum < RHSNum;
+  return RHSNum < LHSNum;
 }
 
 
@@ -84,11 +84,11 @@ void LatencyPriorityQueue::push(SUnit *SU) {
 }
 
 
-// ScheduledNode - As nodes are scheduled, we look to see if there are any
+// scheduledNode - As nodes are scheduled, we look to see if there are any
 // successor nodes that have a single unscheduled predecessor.  If so, that
 // single predecessor has a higher priority, since scheduling it will make
 // the node available.
-void LatencyPriorityQueue::ScheduledNode(SUnit *SU) {
+void LatencyPriorityQueue::scheduledNode(SUnit *SU) {
   for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
        I != E; ++I) {
     AdjustPriorityOfUnscheduledPreds(I->getSUnit());
diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp
index a12e1a36d113..f1abcbb1dd5c 100644
--- a/lib/CodeGen/LexicalScopes.cpp
+++ b/lib/CodeGen/LexicalScopes.cpp
@@ -311,6 +311,8 @@ bool LexicalScopes::dominates(DebugLoc DL, MachineBasicBlock *MBB) {
   return Result;
 }
 
+void LexicalScope::anchor() { }
+
 /// dump - Print data structures.
 void LexicalScope::dump() const {
 #ifndef NDEBUG
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index 3dfe4c0e8cfa..2187833031ee 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -226,7 +226,7 @@ public:
                  LiveInterval *LI, const VNInfo *VNI,
                  SmallVectorImpl<SlotIndex> *Kills,
                  LiveIntervals &LIS, MachineDominatorTree &MDT,
-		 UserValueScopes &UVS);
+                 UserValueScopes &UVS);
 
   /// addDefsFromCopies - The value in LI/LocNo may be copies to other
   /// registers. Determine if any of the copies are available at the kill
@@ -468,7 +468,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) {
       // DBG_VALUE has no slot index, use the previous instruction instead.
       SlotIndex Idx = MBBI == MBB->begin() ?
         LIS->getMBBStartIdx(MBB) :
-        LIS->getInstructionIndex(llvm::prior(MBBI)).getDefIndex();
+        LIS->getInstructionIndex(llvm::prior(MBBI)).getRegSlot();
       // Handle consecutive DBG_VALUE instructions with the same slot index.
       do {
         if (handleDebugValue(MBBI, Idx)) {
@@ -486,7 +486,7 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo,
                           LiveInterval *LI, const VNInfo *VNI,
                           SmallVectorImpl<SlotIndex> *Kills,
                           LiveIntervals &LIS, MachineDominatorTree &MDT,
-			  UserValueScopes &UVS) {
+                          UserValueScopes &UVS) {
   SmallVector<SlotIndex, 16> Todo;
   Todo.push_back(Idx);
   do {
@@ -575,15 +575,15 @@ UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo,
     // Is LocNo extended to reach this copy? If not, another def may be blocking
     // it, or we are looking at a wrong value of LI.
     SlotIndex Idx = LIS.getInstructionIndex(MI);
-    LocMap::iterator I = locInts.find(Idx.getUseIndex());
+    LocMap::iterator I = locInts.find(Idx.getRegSlot(true));
     if (!I.valid() || I.value() != LocNo)
       continue;
 
     if (!LIS.hasInterval(DstReg))
       continue;
     LiveInterval *DstLI = &LIS.getInterval(DstReg);
-    const VNInfo *DstVNI = DstLI->getVNInfoAt(Idx.getDefIndex());
-    assert(DstVNI && DstVNI->def == Idx.getDefIndex() && "Bad copy value");
+    const VNInfo *DstVNI = DstLI->getVNInfoAt(Idx.getRegSlot());
+    assert(DstVNI && DstVNI->def == Idx.getRegSlot() && "Bad copy value");
     CopyValues.push_back(std::make_pair(DstLI, DstVNI));
   }
 
@@ -620,7 +620,7 @@ void
 UserValue::computeIntervals(MachineRegisterInfo &MRI,
                             LiveIntervals &LIS,
                             MachineDominatorTree &MDT,
-			    UserValueScopes &UVS) {
+                            UserValueScopes &UVS) {
   SmallVector<std::pair<SlotIndex, unsigned>, 16> Defs;
 
   // Collect all defs to be extended (Skipping undefs).
@@ -841,7 +841,7 @@ bool
 UserValue::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) {
   bool DidChange = false;
   // Split locations referring to OldReg. Iterate backwards so splitLocation can
-  // safely erase unuused locations.
+  // safely erase unused locations.
   for (unsigned i = locations.size(); i ; --i) {
     unsigned LocNo = i-1;
     const MachineOperand *Loc = &locations[LocNo];
@@ -889,8 +889,7 @@ UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI) {
       // index is no longer available. That means the user value is in a
       // non-existent sub-register, and %noreg is exactly what we want.
       Loc.substPhysReg(VRM.getPhys(VirtReg), TRI);
-    } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT &&
-               VRM.isSpillSlotUsed(VRM.getStackSlot(VirtReg))) {
+    } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT) {
       // FIXME: Translate SubIdx to a stackslot offset.
       Loc = MachineOperand::CreateFI(VRM.getStackSlot(VirtReg));
     } else {
@@ -921,8 +920,8 @@ findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx,
   }
 
   // Don't insert anything after the first terminator, though.
-  return MI->getDesc().isTerminator() ? MBB->getFirstTerminator() :
-                                    llvm::next(MachineBasicBlock::iterator(MI));
+  return MI->isTerminator() ? MBB->getFirstTerminator() :
+                              llvm::next(MachineBasicBlock::iterator(MI));
 }
 
 DebugLoc UserValue::findDebugLoc() {
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index b69945aea98f..ac18843ac30d 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -381,37 +381,40 @@ void LiveInterval::join(LiveInterval &Other,
   for (unsigned i = 0; i != NumVals; ++i) {
     unsigned LHSValID = LHSValNoAssignments[i];
     if (i != LHSValID ||
-        (NewVNInfo[LHSValID] && NewVNInfo[LHSValID] != getValNumInfo(i)))
+        (NewVNInfo[LHSValID] && NewVNInfo[LHSValID] != getValNumInfo(i))) {
       MustMapCurValNos = true;
+      break;
+    }
   }
 
   // If we have to apply a mapping to our base interval assignment, rewrite it
   // now.
   if (MustMapCurValNos) {
     // Map the first live range.
+
     iterator OutIt = begin();
     OutIt->valno = NewVNInfo[LHSValNoAssignments[OutIt->valno->id]];
-    ++OutIt;
-    for (iterator I = OutIt, E = end(); I != E; ++I) {
-      OutIt->valno = NewVNInfo[LHSValNoAssignments[I->valno->id]];
+    for (iterator I = next(OutIt), E = end(); I != E; ++I) {
+      VNInfo* nextValNo = NewVNInfo[LHSValNoAssignments[I->valno->id]];
+      assert(nextValNo != 0 && "Huh?");
 
       // If this live range has the same value # as its immediate predecessor,
       // and if they are neighbors, remove one LiveRange.  This happens when we
-      // have [0,3:0)[4,7:1) and map 0/1 onto the same value #.
-      if (OutIt->valno == (OutIt-1)->valno && (OutIt-1)->end == OutIt->start) {
-        (OutIt-1)->end = OutIt->end;
+      // have [0,4:0)[4,7:1) and map 0/1 onto the same value #.
+      if (OutIt->valno == nextValNo && OutIt->end == I->start) {
+        OutIt->end = I->end;
       } else {
-        if (I != OutIt) {
+        // Didn't merge. Move OutIt to the next interval,
+        ++OutIt;
+        OutIt->valno = nextValNo;
+        if (OutIt != I) {
           OutIt->start = I->start;
           OutIt->end = I->end;
         }
-
-        // Didn't merge, on to the next one.
-        ++OutIt;
       }
     }
-
     // If we merge some live ranges, chop off the end.
+    ++OutIt;
     ranges.erase(OutIt, end());
   }
 
@@ -639,8 +642,6 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
           OS << "-phidef";
         if (vni->hasPHIKill())
           OS << "-phikill";
-        if (vni->hasRedefByEC())
-          OS << "-ec";
       }
     }
   }
@@ -680,15 +681,14 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) {
       // Connect to values live out of predecessors.
       for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
            PE = MBB->pred_end(); PI != PE; ++PI)
-        if (const VNInfo *PVNI =
-              LI->getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot()))
+        if (const VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(*PI)))
           EqClass.join(VNI->id, PVNI->id);
     } else {
       // Normal value defined by an instruction. Check for two-addr redef.
       // FIXME: This could be coincidental. Should we really check for a tied
       // operand constraint?
       // Note that VNI->def may be a use slot for an early clobber def.
-      if (const VNInfo *UVNI = LI->getVNInfoAt(VNI->def.getPrevSlot()))
+      if (const VNInfo *UVNI = LI->getVNInfoBefore(VNI->def))
         EqClass.join(VNI->id, UVNI->id);
     }
   }
@@ -716,7 +716,7 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[],
       continue;
     // DBG_VALUE instructions should have been eliminated earlier.
     SlotIndex Idx = LIS.getInstructionIndex(MI);
-    Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex();
+    Idx = Idx.getRegSlot(MO.isUse());
     const VNInfo *VNI = LI.getVNInfoAt(Idx);
     assert(VNI && "Interval not live at use.");
     MO.setReg(LIV[getEqClass(VNI)]->reg);
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index b1e202a273d3..3ade66097cbd 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -15,31 +15,22 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "liveintervals"
+#define DEBUG_TYPE "regalloc"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "VirtRegMap.h"
 #include "llvm/Value.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveVariables.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/ProcessImplicitDefs.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
 #include <algorithm>
@@ -52,19 +43,14 @@ static cl::opt<bool> DisableReMat("disable-rematerialization",
                                   cl::init(false), cl::Hidden);
 
 STATISTIC(numIntervals , "Number of original intervals");
-STATISTIC(numFolds     , "Number of loads/stores folded into instructions");
-STATISTIC(numSplits    , "Number of intervals split");
 
 char LiveIntervals::ID = 0;
 INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals",
                 "Live Interval Analysis", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_DEPENDENCY(LiveVariables)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_DEPENDENCY(PHIElimination)
-INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass)
-INITIALIZE_PASS_DEPENDENCY(ProcessImplicitDefs)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_END(LiveIntervals, "liveintervals",
                 "Live Interval Analysis", false, false)
 
@@ -74,18 +60,8 @@ void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<AliasAnalysis>();
   AU.addRequired<LiveVariables>();
   AU.addPreserved<LiveVariables>();
-  AU.addRequired<MachineLoopInfo>();
-  AU.addPreserved<MachineLoopInfo>();
+  AU.addPreservedID(MachineLoopInfoID);
   AU.addPreservedID(MachineDominatorsID);
-
-  if (!StrongPHIElim) {
-    AU.addPreservedID(PHIEliminationID);
-    AU.addRequiredID(PHIEliminationID);
-  }
-
-  AU.addRequiredID(TwoAddressInstructionPassID);
-  AU.addPreserved<ProcessImplicitDefs>();
-  AU.addRequired<ProcessImplicitDefs>();
   AU.addPreserved<SlotIndexes>();
   AU.addRequiredTransitive<SlotIndexes>();
   MachineFunctionPass::getAnalysisUsage(AU);
@@ -98,14 +74,12 @@ void LiveIntervals::releaseMemory() {
     delete I->second;
 
   r2iMap_.clear();
+  RegMaskSlots.clear();
+  RegMaskBits.clear();
+  RegMaskBlocks.clear();
 
   // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd.
   VNInfoAllocator.Reset();
-  while (!CloneMIs.empty()) {
-    MachineInstr *MI = CloneMIs.back();
-    CloneMIs.pop_back();
-    mf_->DeleteMachineInstr(MI);
-  }
 }
 
 /// runOnMachineFunction - Register allocate the whole function
@@ -120,6 +94,7 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
   lv_ = &getAnalysis<LiveVariables>();
   indexes_ = &getAnalysis<SlotIndexes>();
   allocatableRegs_ = tri_->getAllocatableSet(fn);
+  reservedRegs_ = tri_->getReservedRegs(fn);
 
   computeIntervals();
 
@@ -132,10 +107,21 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
 /// print - Implement the dump method.
 void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
   OS << "********** INTERVALS **********\n";
-  for (const_iterator I = begin(), E = end(); I != E; ++I) {
-    I->second->print(OS, tri_);
-    OS << "\n";
-  }
+
+  // Dump the physregs.
+  for (unsigned Reg = 1, RegE = tri_->getNumRegs(); Reg != RegE; ++Reg)
+    if (const LiveInterval *LI = r2iMap_.lookup(Reg)) {
+      LI->print(OS, tri_);
+      OS << '\n';
+    }
+
+  // Dump the virtregs.
+  for (unsigned Reg = 0, RegE = mri_->getNumVirtRegs(); Reg != RegE; ++Reg)
+    if (const LiveInterval *LI =
+        r2iMap_.lookup(TargetRegisterInfo::index2VirtReg(Reg))) {
+      LI->print(OS, tri_);
+      OS << '\n';
+    }
 
   printInstrs(OS);
 }
@@ -149,103 +135,6 @@ void LiveIntervals::dumpInstrs() const {
   printInstrs(dbgs());
 }
 
-bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li,
-                                         VirtRegMap &vrm, unsigned reg) {
-  // We don't handle fancy stuff crossing basic block boundaries
-  if (li.ranges.size() != 1)
-    return true;
-  const LiveRange &range = li.ranges.front();
-  SlotIndex idx = range.start.getBaseIndex();
-  SlotIndex end = range.end.getPrevSlot().getBaseIndex().getNextIndex();
-
-  // Skip deleted instructions
-  MachineInstr *firstMI = getInstructionFromIndex(idx);
-  while (!firstMI && idx != end) {
-    idx = idx.getNextIndex();
-    firstMI = getInstructionFromIndex(idx);
-  }
-  if (!firstMI)
-    return false;
-
-  // Find last instruction in range
-  SlotIndex lastIdx = end.getPrevIndex();
-  MachineInstr *lastMI = getInstructionFromIndex(lastIdx);
-  while (!lastMI && lastIdx != idx) {
-    lastIdx = lastIdx.getPrevIndex();
-    lastMI = getInstructionFromIndex(lastIdx);
-  }
-  if (!lastMI)
-    return false;
-
-  // Range cannot cross basic block boundaries or terminators
-  MachineBasicBlock *MBB = firstMI->getParent();
-  if (MBB != lastMI->getParent() || lastMI->getDesc().isTerminator())
-    return true;
-
-  MachineBasicBlock::const_iterator E = lastMI;
-  ++E;
-  for (MachineBasicBlock::const_iterator I = firstMI; I != E; ++I) {
-    const MachineInstr &MI = *I;
-
-    // Allow copies to and from li.reg
-    if (MI.isCopy())
-      if (MI.getOperand(0).getReg() == li.reg ||
-          MI.getOperand(1).getReg() == li.reg)
-        continue;
-
-    // Check for operands using reg
-    for (unsigned i = 0, e = MI.getNumOperands(); i != e;  ++i) {
-      const MachineOperand& mop = MI.getOperand(i);
-      if (!mop.isReg())
-        continue;
-      unsigned PhysReg = mop.getReg();
-      if (PhysReg == 0 || PhysReg == li.reg)
-        continue;
-      if (TargetRegisterInfo::isVirtualRegister(PhysReg)) {
-        if (!vrm.hasPhys(PhysReg))
-          continue;
-        PhysReg = vrm.getPhys(PhysReg);
-      }
-      if (PhysReg && tri_->regsOverlap(PhysReg, reg))
-        return true;
-    }
-  }
-
-  // No conflicts found.
-  return false;
-}
-
-bool LiveIntervals::conflictsWithAliasRef(LiveInterval &li, unsigned Reg,
-                                  SmallPtrSet<MachineInstr*,32> &JoinedCopies) {
-  for (LiveInterval::Ranges::const_iterator
-         I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
-    for (SlotIndex index = I->start.getBaseIndex(),
-           end = I->end.getPrevSlot().getBaseIndex().getNextIndex();
-           index != end;
-           index = index.getNextIndex()) {
-      MachineInstr *MI = getInstructionFromIndex(index);
-      if (!MI)
-        continue;               // skip deleted instructions
-
-      if (JoinedCopies.count(MI))
-        continue;
-      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-        MachineOperand& MO = MI->getOperand(i);
-        if (!MO.isReg())
-          continue;
-        unsigned PhysReg = MO.getReg();
-        if (PhysReg == 0 || PhysReg == Reg ||
-            TargetRegisterInfo::isVirtualRegister(PhysReg))
-          continue;
-        if (tri_->regsOverlap(Reg, PhysReg))
-          return true;
-      }
-    }
-  }
-
-  return false;
-}
-
 static
 bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) {
   unsigned Reg = MI.getOperand(MOIdx).getReg();
@@ -271,9 +160,9 @@ bool LiveIntervals::isPartialRedef(SlotIndex MIIdx, MachineOperand &MO,
   if (!MO.getSubReg() || MO.isEarlyClobber())
     return false;
 
-  SlotIndex RedefIndex = MIIdx.getDefIndex();
+  SlotIndex RedefIndex = MIIdx.getRegSlot();
   const LiveRange *OldLR =
-    interval.getLiveRangeContaining(RedefIndex.getUseIndex());
+    interval.getLiveRangeContaining(RedefIndex.getRegSlot(true));
   MachineInstr *DefMI = getInstructionFromIndex(OldLR->valno->def);
   if (DefMI != 0) {
     return DefMI->findRegisterDefOperandIdx(interval.reg) != -1;
@@ -296,34 +185,13 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
   LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg);
   if (interval.empty()) {
     // Get the Idx of the defining instructions.
-    SlotIndex defIndex = MIIdx.getDefIndex();
-    // Earlyclobbers move back one, so that they overlap the live range
-    // of inputs.
-    if (MO.isEarlyClobber())
-      defIndex = MIIdx.getUseIndex();
-
-    // Make sure the first definition is not a partial redefinition. Add an
-    // <imp-def> of the full register.
-    // FIXME: LiveIntervals shouldn't modify the code like this.  Whoever
-    // created the machine instruction should annotate it with <undef> flags
-    // as needed.  Then we can simply assert here.  The REG_SEQUENCE lowering
-    // is the main suspect.
-    if (MO.getSubReg()) {
-      mi->addRegisterDefined(interval.reg);
-      // Mark all defs of interval.reg on this instruction as reading <undef>.
-      for (unsigned i = MOIdx, e = mi->getNumOperands(); i != e; ++i) {
-        MachineOperand &MO2 = mi->getOperand(i);
-        if (MO2.isReg() && MO2.getReg() == interval.reg && MO2.getSubReg())
-          MO2.setIsUndef();
-      }
-    }
+    SlotIndex defIndex = MIIdx.getRegSlot(MO.isEarlyClobber());
 
-    MachineInstr *CopyMI = NULL;
-    if (mi->isCopyLike()) {
-      CopyMI = mi;
-    }
+    // Make sure the first definition is not a partial redefinition.
+    assert(!MO.readsReg() && "First def cannot also read virtual register "
+           "missing <undef> flag?");
 
-    VNInfo *ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator);
+    VNInfo *ValNo = interval.getNextValue(defIndex, VNInfoAllocator);
     assert(ValNo->id == 0 && "First value in interval is not 0?");
 
     // Loop over all of the blocks that the vreg is defined in.  There are
@@ -334,9 +202,9 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       // FIXME: what about dead vars?
       SlotIndex killIdx;
       if (vi.Kills[0] != mi)
-        killIdx = getInstructionIndex(vi.Kills[0]).getDefIndex();
+        killIdx = getInstructionIndex(vi.Kills[0]).getRegSlot();
       else
-        killIdx = defIndex.getStoreIndex();
+        killIdx = defIndex.getDeadSlot();
 
       // If the kill happens after the definition, we have an intra-block
       // live range.
@@ -384,14 +252,14 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
     for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) {
       MachineInstr *Kill = vi.Kills[i];
       SlotIndex Start = getMBBStartIdx(Kill->getParent());
-      SlotIndex killIdx = getInstructionIndex(Kill).getDefIndex();
+      SlotIndex killIdx = getInstructionIndex(Kill).getRegSlot();
 
       // Create interval with one of a NEW value number.  Note that this value
       // number isn't actually defined by an instruction, weird huh? :)
       if (PHIJoin) {
         assert(getInstructionFromIndex(Start) == 0 &&
                "PHI def index points at actual instruction.");
-        ValNo = interval.getNextValue(Start, 0, VNInfoAllocator);
+        ValNo = interval.getNextValue(Start, VNInfoAllocator);
         ValNo->setIsPHIDef(true);
       }
       LiveRange LR(Start, killIdx, ValNo);
@@ -422,14 +290,12 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       // are actually two values in the live interval.  Because of this we
       // need to take the LiveRegion that defines this register and split it
       // into two values.
-      SlotIndex RedefIndex = MIIdx.getDefIndex();
-      if (MO.isEarlyClobber())
-        RedefIndex = MIIdx.getUseIndex();
+      SlotIndex RedefIndex = MIIdx.getRegSlot(MO.isEarlyClobber());
 
       const LiveRange *OldLR =
-        interval.getLiveRangeContaining(RedefIndex.getUseIndex());
+        interval.getLiveRangeContaining(RedefIndex.getRegSlot(true));
       VNInfo *OldValNo = OldLR->valno;
-      SlotIndex DefIndex = OldValNo->def.getDefIndex();
+      SlotIndex DefIndex = OldValNo->def.getRegSlot();
 
       // Delete the previous value, which should be short and continuous,
       // because the 2-addr copy must be in the same MBB as the redef.
@@ -440,12 +306,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       VNInfo *ValNo = interval.createValueCopy(OldValNo, VNInfoAllocator);
 
       // Value#0 is now defined by the 2-addr instruction.
-      OldValNo->def  = RedefIndex;
-      OldValNo->setCopy(0);
-
-      // A re-def may be a copy. e.g. %reg1030:6<def> = VMOVD %reg1026, ...
-      if (PartReDef && mi->isCopyLike())
-        OldValNo->setCopy(&*mi);
+      OldValNo->def = RedefIndex;
 
       // Add the new live interval which replaces the range for the input copy.
       LiveRange LR(DefIndex, RedefIndex, ValNo);
@@ -455,7 +316,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       // If this redefinition is dead, we need to add a dummy unit live
       // range covering the def slot.
       if (MO.isDead())
-        interval.addRange(LiveRange(RedefIndex, RedefIndex.getStoreIndex(),
+        interval.addRange(LiveRange(RedefIndex, RedefIndex.getDeadSlot(),
                                     OldValNo));
 
       DEBUG({
@@ -467,15 +328,11 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       // live until the end of the block.  We've already taken care of the
       // rest of the live range.
 
-      SlotIndex defIndex = MIIdx.getDefIndex();
+      SlotIndex defIndex = MIIdx.getRegSlot();
       if (MO.isEarlyClobber())
-        defIndex = MIIdx.getUseIndex();
+        defIndex = MIIdx.getRegSlot(true);
 
-      VNInfo *ValNo;
-      MachineInstr *CopyMI = NULL;
-      if (mi->isCopyLike())
-        CopyMI = mi;
-      ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator);
+      VNInfo *ValNo = interval.getNextValue(defIndex, VNInfoAllocator);
 
       SlotIndex killIndex = getMBBEndIdx(mbb);
       LiveRange LR(defIndex, killIndex, ValNo);
@@ -490,21 +347,26 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
   DEBUG(dbgs() << '\n');
 }
 
+static bool isRegLiveIntoSuccessor(const MachineBasicBlock *MBB, unsigned Reg) {
+  for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
+                                              SE = MBB->succ_end();
+       SI != SE; ++SI) {
+    const MachineBasicBlock* succ = *SI;
+    if (succ->isLiveIn(Reg))
+      return true;
+  }
+  return false;
+}
+
 void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
                                               MachineBasicBlock::iterator mi,
                                               SlotIndex MIIdx,
                                               MachineOperand& MO,
-                                              LiveInterval &interval,
-                                              MachineInstr *CopyMI) {
-  // A physical register cannot be live across basic block, so its
-  // lifetime must end somewhere in its defining basic block.
+                                              LiveInterval &interval) {
   DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_));
 
   SlotIndex baseIndex = MIIdx;
-  SlotIndex start = baseIndex.getDefIndex();
-  // Earlyclobbers move back one.
-  if (MO.isEarlyClobber())
-    start = MIIdx.getUseIndex();
+  SlotIndex start = baseIndex.getRegSlot(MO.isEarlyClobber());
   SlotIndex end = start;
 
   // If it is not used after definition, it is considered dead at
@@ -514,7 +376,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
   // advance below compensates.
   if (MO.isDead()) {
     DEBUG(dbgs() << " dead");
-    end = start.getStoreIndex();
+    end = start.getDeadSlot();
     goto exit;
   }
 
@@ -531,21 +393,21 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
 
     if (mi->killsRegister(interval.reg, tri_)) {
       DEBUG(dbgs() << " killed");
-      end = baseIndex.getDefIndex();
+      end = baseIndex.getRegSlot();
       goto exit;
     } else {
       int DefIdx = mi->findRegisterDefOperandIdx(interval.reg,false,false,tri_);
       if (DefIdx != -1) {
         if (mi->isRegTiedToUseOperand(DefIdx)) {
           // Two-address instruction.
-          end = baseIndex.getDefIndex();
+          end = baseIndex.getRegSlot(mi->getOperand(DefIdx).isEarlyClobber());
         } else {
           // Another instruction redefines the register before it is ever read.
           // Then the register is essentially dead at the instruction that
           // defines it. Hence its interval is:
           // [defSlot(def), defSlot(def)+1)
           DEBUG(dbgs() << " dead");
-          end = start.getStoreIndex();
+          end = start.getDeadSlot();
         }
         goto exit;
       }
@@ -554,12 +416,19 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
     baseIndex = baseIndex.getNextIndex();
   }
 
-  // The only case we should have a dead physreg here without a killing or
-  // instruction where we know it's dead is if it is live-in to the function
-  // and never used. Another possible case is the implicit use of the
-  // physical register has been deleted by two-address pass.
-  end = start.getStoreIndex();
+  // If we get here the register *should* be live out.
+  assert(!isAllocatable(interval.reg) && "Physregs shouldn't be live out!");
 
+  // FIXME: We need saner rules for reserved regs.
+  if (isReserved(interval.reg)) {
+    end = start.getDeadSlot();
+  } else {
+    // Unreserved, unallocable registers like EFLAGS can be live across basic
+    // block boundaries.
+    assert(isRegLiveIntoSuccessor(MBB, interval.reg) &&
+           "Unreserved reg not live-out?");
+    end = getMBBEndIdx(MBB);
+  }
 exit:
   assert(start < end && "did not find end of interval?");
 
@@ -567,9 +436,7 @@ exit:
   VNInfo *ValNo = interval.getVNInfoAt(start);
   bool Extend = ValNo != 0;
   if (!Extend)
-    ValNo = interval.getNextValue(start, CopyMI, VNInfoAllocator);
-  if (Extend && MO.isEarlyClobber())
-    ValNo->setHasRedefByEC(true);
+    ValNo = interval.getNextValue(start, VNInfoAllocator);
   LiveRange LR(start, end, ValNo);
   interval.addRange(LR);
   DEBUG(dbgs() << " +" << LR << '\n');
@@ -583,18 +450,20 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
   if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
     handleVirtualRegisterDef(MBB, MI, MIIdx, MO, MOIdx,
                              getOrCreateInterval(MO.getReg()));
-  else {
-    MachineInstr *CopyMI = NULL;
-    if (MI->isCopyLike())
-      CopyMI = MI;
+  else
     handlePhysicalRegisterDef(MBB, MI, MIIdx, MO,
-                              getOrCreateInterval(MO.getReg()), CopyMI);
-  }
+                              getOrCreateInterval(MO.getReg()));
 }
 
 void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
                                          SlotIndex MIIdx,
-                                         LiveInterval &interval, bool isAlias) {
+                                         LiveInterval &interval) {
+  assert(TargetRegisterInfo::isPhysicalRegister(interval.reg) &&
+         "Only physical registers can be live in.");
+  assert((!isAllocatable(interval.reg) || MBB->getParent()->begin() ||
+          MBB->isLandingPad()) &&
+          "Allocatable live-ins only valid for entry blocks and landing pads.");
+
   DEBUG(dbgs() << "\t\tlivein register: " << PrintReg(interval.reg, tri_));
 
   // Look for kills, if it reaches a def before it's killed, then it shouldn't
@@ -621,16 +490,16 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
   while (mi != E) {
     if (mi->killsRegister(interval.reg, tri_)) {
       DEBUG(dbgs() << " killed");
-      end = baseIndex.getDefIndex();
+      end = baseIndex.getRegSlot();
       SeenDefUse = true;
       break;
-    } else if (mi->definesRegister(interval.reg, tri_)) {
+    } else if (mi->modifiesRegister(interval.reg, tri_)) {
       // Another instruction redefines the register before it is ever read.
       // Then the register is essentially dead at the instruction that defines
       // it. Hence its interval is:
       // [defSlot(def), defSlot(def)+1)
       DEBUG(dbgs() << " dead");
-      end = start.getStoreIndex();
+      end = start.getDeadSlot();
       SeenDefUse = true;
       break;
     }
@@ -644,10 +513,16 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
 
   // Live-in register might not be used at all.
   if (!SeenDefUse) {
-    if (isAlias) {
+    if (isAllocatable(interval.reg) ||
+        !isRegLiveIntoSuccessor(MBB, interval.reg)) {
+      // Allocatable registers are never live through.
+      // Non-allocatable registers that aren't live into any successors also
+      // aren't live through.
       DEBUG(dbgs() << " dead");
-      end = MIIdx.getStoreIndex();
+      return;
     } else {
+      // If we get here the register is non-allocatable and live into some
+      // successor. We'll conservatively assume it's live-through.
       DEBUG(dbgs() << " live through");
       end = getMBBEndIdx(MBB);
     }
@@ -656,8 +531,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
   SlotIndex defIdx = getMBBStartIdx(MBB);
   assert(getInstructionFromIndex(defIdx) == 0 &&
          "PHI def index points at actual instruction.");
-  VNInfo *vni =
-    interval.getNextValue(defIdx, 0, VNInfoAllocator);
+  VNInfo *vni = interval.getNextValue(defIdx, VNInfoAllocator);
   vni->setIsPHIDef(true);
   LiveRange LR(start, end, vni);
 
@@ -674,10 +548,14 @@ void LiveIntervals::computeIntervals() {
                << "********** Function: "
                << ((Value*)mf_->getFunction())->getName() << '\n');
 
+  RegMaskBlocks.resize(mf_->getNumBlockIDs());
+
   SmallVector<unsigned, 8> UndefUses;
   for (MachineFunction::iterator MBBI = mf_->begin(), E = mf_->end();
        MBBI != E; ++MBBI) {
     MachineBasicBlock *MBB = MBBI;
+    RegMaskBlocks[MBB->getNumber()].first = RegMaskSlots.size();
+
     if (MBB->empty())
       continue;
 
@@ -690,11 +568,6 @@ void LiveIntervals::computeIntervals() {
     for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(),
            LE = MBB->livein_end(); LI != LE; ++LI) {
       handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*LI));
-      // Multiple live-ins can alias the same register.
-      for (const unsigned* AS = tri_->getSubRegisters(*LI); *AS; ++AS)
-        if (!hasInterval(*AS))
-          handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*AS),
-                               true);
     }
 
     // Skip over empty initial indices.
@@ -706,10 +579,20 @@ void LiveIntervals::computeIntervals() {
       DEBUG(dbgs() << MIIndex << "\t" << *MI);
       if (MI->isDebugValue())
         continue;
+      assert(indexes_->getInstructionFromIndex(MIIndex) == MI &&
+             "Lost SlotIndex synchronization");
 
       // Handle defs.
       for (int i = MI->getNumOperands() - 1; i >= 0; --i) {
         MachineOperand &MO = MI->getOperand(i);
+
+        // Collect register masks.
+        if (MO.isRegMask()) {
+          RegMaskSlots.push_back(MIIndex.getRegSlot());
+          RegMaskBits.push_back(MO.getRegMask());
+          continue;
+        }
+
         if (!MO.isReg() || !MO.getReg())
           continue;
 
@@ -723,6 +606,10 @@ void LiveIntervals::computeIntervals() {
       // Move to the next instr slot.
       MIIndex = indexes_->getNextNonNullIndex(MIIndex);
     }
+
+    // Compute the number of register mask instructions in this block.
+    std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB->getNumber()];
+    RMB.second = RegMaskSlots.size() - RMB.first;;
   }
 
   // Create empty intervals for registers defined by implicit_def's (except
@@ -754,7 +641,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
                                  SmallVectorImpl<MachineInstr*> *dead) {
   DEBUG(dbgs() << "Shrink: " << *li << '\n');
   assert(TargetRegisterInfo::isVirtualRegister(li->reg)
-         && "Can't only shrink physical registers");
+         && "Can only shrink virtual registers");
   // Find all the values used, including PHI kills.
   SmallVector<std::pair<SlotIndex, VNInfo*>, 16> WorkList;
 
@@ -766,8 +653,10 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
        MachineInstr *UseMI = I.skipInstruction();) {
     if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg))
       continue;
-    SlotIndex Idx = getInstructionIndex(UseMI).getUseIndex();
-    VNInfo *VNI = li->getVNInfoAt(Idx);
+    SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot();
+    // Note: This intentionally picks up the wrong VNI in case of an EC redef.
+    // See below.
+    VNInfo *VNI = li->getVNInfoBefore(Idx);
     if (!VNI) {
       // This shouldn't happen: readsVirtualRegister returns true, but there is
       // no live value. It is likely caused by a target getting <undef> flags
@@ -777,11 +666,12 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
                     << *li << '\n');
       continue;
     }
-    if (VNI->def == Idx) {
-      // Special case: An early-clobber tied operand reads and writes the
-      // register one slot early.
-      Idx = Idx.getPrevSlot();
-      VNI = li->getVNInfoAt(Idx);
+    // Special case: An early-clobber tied operand reads and writes the
+    // register one slot early.  The getVNInfoBefore call above would have
+    // picked up the value defined by UseMI.  Adjust the kill slot and value.
+    if (SlotIndex::isSameInstr(VNI->def, Idx)) {
+      Idx = VNI->def;
+      VNI = li->getVNInfoBefore(Idx);
       assert(VNI && "Early-clobber tied value not available");
     }
     WorkList.push_back(std::make_pair(Idx, VNI));
@@ -794,14 +684,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
     VNInfo *VNI = *I;
     if (VNI->isUnused())
       continue;
-    NewLI.addRange(LiveRange(VNI->def, VNI->def.getNextSlot(), VNI));
-
-    // A use tied to an early-clobber def ends at the load slot and isn't caught
-    // above. Catch it here instead. This probably only ever happens for inline
-    // assembly.
-    if (VNI->def.isUse())
-      if (VNInfo *UVNI = li->getVNInfoAt(VNI->def.getLoadIndex()))
-        WorkList.push_back(std::make_pair(VNI->def.getLoadIndex(), UVNI));
+    NewLI.addRange(LiveRange(VNI->def, VNI->def.getDeadSlot(), VNI));
   }
 
   // Keep track of the PHIs that are in use.
@@ -812,11 +695,11 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
     SlotIndex Idx = WorkList.back().first;
     VNInfo *VNI = WorkList.back().second;
     WorkList.pop_back();
-    const MachineBasicBlock *MBB = getMBBFromIndex(Idx);
+    const MachineBasicBlock *MBB = getMBBFromIndex(Idx.getPrevSlot());
     SlotIndex BlockStart = getMBBStartIdx(MBB);
 
     // Extend the live range for VNI to be live at Idx.
-    if (VNInfo *ExtVNI = NewLI.extendInBlock(BlockStart, Idx.getNextSlot())) {
+    if (VNInfo *ExtVNI = NewLI.extendInBlock(BlockStart, Idx)) {
       (void)ExtVNI;
       assert(ExtVNI == VNI && "Unexpected existing value number");
       // Is this a PHIDef we haven't seen before?
@@ -827,9 +710,9 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
            PE = MBB->pred_end(); PI != PE; ++PI) {
         if (!LiveOut.insert(*PI))
           continue;
-        SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot();
+        SlotIndex Stop = getMBBEndIdx(*PI);
         // A predecessor is not required to have a live-out value for a PHI.
-        if (VNInfo *PVNI = li->getVNInfoAt(Stop))
+        if (VNInfo *PVNI = li->getVNInfoBefore(Stop))
           WorkList.push_back(std::make_pair(Stop, PVNI));
       }
       continue;
@@ -837,15 +720,16 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
 
     // VNI is live-in to MBB.
     DEBUG(dbgs() << " live-in at " << BlockStart << '\n');
-    NewLI.addRange(LiveRange(BlockStart, Idx.getNextSlot(), VNI));
+    NewLI.addRange(LiveRange(BlockStart, Idx, VNI));
 
     // Make sure VNI is live-out from the predecessors.
     for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
          PE = MBB->pred_end(); PI != PE; ++PI) {
       if (!LiveOut.insert(*PI))
         continue;
-      SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot();
-      assert(li->getVNInfoAt(Stop) == VNI && "Wrong value out of predecessor");
+      SlotIndex Stop = getMBBEndIdx(*PI);
+      assert(li->getVNInfoBefore(Stop) == VNI &&
+             "Wrong value out of predecessor");
       WorkList.push_back(std::make_pair(Stop, VNI));
     }
   }
@@ -859,7 +743,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
       continue;
     LiveInterval::iterator LII = NewLI.FindLiveRangeContaining(VNI->def);
     assert(LII != NewLI.end() && "Missing live range for PHI");
-    if (LII->end != VNI->def.getNextSlot())
+    if (LII->end != VNI->def.getDeadSlot())
       continue;
     if (VNI->isPHIDef()) {
       // This is a dead PHI. Remove it.
@@ -890,28 +774,6 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
 // Register allocator hooks.
 //
 
-MachineBasicBlock::iterator
-LiveIntervals::getLastSplitPoint(const LiveInterval &li,
-                                 MachineBasicBlock *mbb) const {
-  const MachineBasicBlock *lpad = mbb->getLandingPadSuccessor();
-
-  // If li is not live into a landing pad, we can insert spill code before the
-  // first terminator.
-  if (!lpad || !isLiveInToMBB(li, lpad))
-    return mbb->getFirstTerminator();
-
-  // When there is a landing pad, spill code must go before the call instruction
-  // that can throw.
-  MachineBasicBlock::iterator I = mbb->end(), B = mbb->begin();
-  while (I != B) {
-    --I;
-    if (I->getDesc().isCall())
-      return I;
-  }
-  // The block contains no calls that can throw, so use the first terminator.
-  return mbb->getFirstTerminator();
-}
-
 void LiveIntervals::addKillFlags() {
   for (iterator I = begin(), E = end(); I != E; ++I) {
     unsigned Reg = I->first;
@@ -924,8 +786,8 @@ void LiveIntervals::addKillFlags() {
     // Every instruction that kills Reg corresponds to a live range end point.
     for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE;
          ++RI) {
-      // A LOAD index indicates an MBB edge.
-      if (RI->end.isLoad())
+      // A block index indicates an MBB edge.
+      if (RI->end.isBlock())
         continue;
       MachineInstr *MI = getInstructionFromIndex(RI->end);
       if (!MI)
@@ -949,16 +811,10 @@ unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li,
     if (Reg == 0 || Reg == li.reg)
       continue;
 
-    if (TargetRegisterInfo::isPhysicalRegister(Reg) &&
-        !allocatableRegs_[Reg])
+    if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isAllocatable(Reg))
       continue;
-    // FIXME: For now, only remat MI with at most one register operand.
-    assert(!RegOp &&
-           "Can't rematerialize instruction with multiple register operand!");
     RegOp = MO.getReg();
-#ifndef NDEBUG
-    break;
-#endif
+    break; // Found vreg operand - leave the loop.
   }
   return RegOp;
 }
@@ -1011,14 +867,6 @@ LiveIntervals::isReMaterializable(const LiveInterval &li,
   return true;
 }
 
-/// isReMaterializable - Returns true if the definition MI of the specified
-/// val# of the specified interval is re-materializable.
-bool LiveIntervals::isReMaterializable(const LiveInterval &li,
-                                       const VNInfo *ValNo, MachineInstr *MI) {
-  bool Dummy2;
-  return isReMaterializable(li, ValNo, MI, 0, Dummy2);
-}
-
 /// isReMaterializable - Returns true if every definition of MI of every
 /// val# of the specified interval is re-materializable.
 bool
@@ -1044,1141 +892,653 @@ LiveIntervals::isReMaterializable(const LiveInterval &li,
   return true;
 }
 
-/// FilterFoldedOps - Filter out two-address use operands. Return
-/// true if it finds any issue with the operands that ought to prevent
-/// folding.
-static bool FilterFoldedOps(MachineInstr *MI,
-                            SmallVector<unsigned, 2> &Ops,
-                            unsigned &MRInfo,
-                            SmallVector<unsigned, 2> &FoldOps) {
-  MRInfo = 0;
-  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-    unsigned OpIdx = Ops[i];
-    MachineOperand &MO = MI->getOperand(OpIdx);
-    // FIXME: fold subreg use.
-    if (MO.getSubReg())
-      return true;
-    if (MO.isDef())
-      MRInfo |= (unsigned)VirtRegMap::isMod;
-    else {
-      // Filter out two-address use operand(s).
-      if (MI->isRegTiedToDefOperand(OpIdx)) {
-        MRInfo = VirtRegMap::isModRef;
-        continue;
-      }
-      MRInfo |= (unsigned)VirtRegMap::isRef;
-    }
-    FoldOps.push_back(OpIdx);
-  }
-  return false;
+MachineBasicBlock*
+LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const {
+  // A local live range must be fully contained inside the block, meaning it is
+  // defined and killed at instructions, not at block boundaries. It is not
+  // live in or or out of any block.
+  //
+  // It is technically possible to have a PHI-defined live range identical to a
+  // single block, but we are going to return false in that case.
+
+  SlotIndex Start = LI.beginIndex();
+  if (Start.isBlock())
+    return NULL;
+
+  SlotIndex Stop = LI.endIndex();
+  if (Stop.isBlock())
+    return NULL;
+
+  // getMBBFromIndex doesn't need to search the MBB table when both indexes
+  // belong to proper instructions.
+  MachineBasicBlock *MBB1 = indexes_->getMBBFromIndex(Start);
+  MachineBasicBlock *MBB2 = indexes_->getMBBFromIndex(Stop);
+  return MBB1 == MBB2 ? MBB1 : NULL;
 }
 
+float
+LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) {
+  // Limit the loop depth ridiculousness.
+  if (loopDepth > 200)
+    loopDepth = 200;
 
-/// tryFoldMemoryOperand - Attempts to fold either a spill / restore from
-/// slot / to reg or any rematerialized load into ith operand of specified
-/// MI. If it is successul, MI is updated with the newly created MI and
-/// returns true.
-bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI,
-                                         VirtRegMap &vrm, MachineInstr *DefMI,
-                                         SlotIndex InstrIdx,
-                                         SmallVector<unsigned, 2> &Ops,
-                                         bool isSS, int Slot, unsigned Reg) {
-  // If it is an implicit def instruction, just delete it.
-  if (MI->isImplicitDef()) {
-    RemoveMachineInstrFromMaps(MI);
-    vrm.RemoveMachineInstrFromMaps(MI);
-    MI->eraseFromParent();
-    ++numFolds;
-    return true;
-  }
+  // The loop depth is used to roughly estimate the number of times the
+  // instruction is executed. Something like 10^d is simple, but will quickly
+  // overflow a float. This expression behaves like 10^d for small d, but is
+  // more tempered for large d. At d=200 we get 6.7e33 which leaves a bit of
+  // headroom before overflow.
+  // By the way, powf() might be unavailable here. For consistency,
+  // We may take pow(double,double).
+  float lc = std::pow(1 + (100.0 / (loopDepth + 10)), (double)loopDepth);
 
-  // Filter the list of operand indexes that are to be folded. Abort if
-  // any operand will prevent folding.
-  unsigned MRInfo = 0;
-  SmallVector<unsigned, 2> FoldOps;
-  if (FilterFoldedOps(MI, Ops, MRInfo, FoldOps))
-    return false;
+  return (isDef + isUse) * lc;
+}
 
-  // The only time it's safe to fold into a two address instruction is when
-  // it's folding reload and spill from / into a spill stack slot.
-  if (DefMI && (MRInfo & VirtRegMap::isMod))
-    return false;
+LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
+                                                  MachineInstr* startInst) {
+  LiveInterval& Interval = getOrCreateInterval(reg);
+  VNInfo* VN = Interval.getNextValue(
+    SlotIndex(getInstructionIndex(startInst).getRegSlot()),
+    getVNInfoAllocator());
+  VN->setHasPHIKill(true);
+  LiveRange LR(
+     SlotIndex(getInstructionIndex(startInst).getRegSlot()),
+     getMBBEndIdx(startInst->getParent()), VN);
+  Interval.addRange(LR);
 
-  MachineInstr *fmi = isSS ? tii_->foldMemoryOperand(MI, FoldOps, Slot)
-                           : tii_->foldMemoryOperand(MI, FoldOps, DefMI);
-  if (fmi) {
-    // Remember this instruction uses the spill slot.
-    if (isSS) vrm.addSpillSlotUse(Slot, fmi);
-
-    // Attempt to fold the memory reference into the instruction. If
-    // we can do this, we don't need to insert spill code.
-    if (isSS && !mf_->getFrameInfo()->isImmutableObjectIndex(Slot))
-      vrm.virtFolded(Reg, MI, fmi, (VirtRegMap::ModRef)MRInfo);
-    vrm.transferSpillPts(MI, fmi);
-    vrm.transferRestorePts(MI, fmi);
-    vrm.transferEmergencySpills(MI, fmi);
-    ReplaceMachineInstrInMaps(MI, fmi);
-    MI->eraseFromParent();
-    MI = fmi;
-    ++numFolds;
-    return true;
-  }
-  return false;
+  return LR;
 }
 
-/// canFoldMemoryOperand - Returns true if the specified load / store
-/// folding is possible.
-bool LiveIntervals::canFoldMemoryOperand(MachineInstr *MI,
-                                         SmallVector<unsigned, 2> &Ops,
-                                         bool ReMat) const {
-  // Filter the list of operand indexes that are to be folded. Abort if
-  // any operand will prevent folding.
-  unsigned MRInfo = 0;
-  SmallVector<unsigned, 2> FoldOps;
-  if (FilterFoldedOps(MI, Ops, MRInfo, FoldOps))
+
+//===----------------------------------------------------------------------===//
+//                          Register mask functions
+//===----------------------------------------------------------------------===//
+
+bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI,
+                                             BitVector &UsableRegs) {
+  if (LI.empty())
     return false;
+  LiveInterval::iterator LiveI = LI.begin(), LiveE = LI.end();
+
+  // Use a smaller arrays for local live ranges.
+  ArrayRef<SlotIndex> Slots;
+  ArrayRef<const uint32_t*> Bits;
+  if (MachineBasicBlock *MBB = intervalIsInOneMBB(LI)) {
+    Slots = getRegMaskSlotsInBlock(MBB->getNumber());
+    Bits = getRegMaskBitsInBlock(MBB->getNumber());
+  } else {
+    Slots = getRegMaskSlots();
+    Bits = getRegMaskBits();
+  }
 
-  // It's only legal to remat for a use, not a def.
-  if (ReMat && (MRInfo & VirtRegMap::isMod))
+  // We are going to enumerate all the register mask slots contained in LI.
+  // Start with a binary search of RegMaskSlots to find a starting point.
+  ArrayRef<SlotIndex>::iterator SlotI =
+    std::lower_bound(Slots.begin(), Slots.end(), LiveI->start);
+  ArrayRef<SlotIndex>::iterator SlotE = Slots.end();
+
+  // No slots in range, LI begins after the last call.
+  if (SlotI == SlotE)
     return false;
 
-  return tii_->canFoldMemoryOperand(MI, FoldOps);
+  bool Found = false;
+  for (;;) {
+    assert(*SlotI >= LiveI->start);
+    // Loop over all slots overlapping this segment.
+    while (*SlotI < LiveI->end) {
+      // *SlotI overlaps LI. Collect mask bits.
+      if (!Found) {
+        // This is the first overlap. Initialize UsableRegs to all ones.
+        UsableRegs.clear();
+        UsableRegs.resize(tri_->getNumRegs(), true);
+        Found = true;
+      }
+      // Remove usable registers clobbered by this mask.
+      UsableRegs.clearBitsNotInMask(Bits[SlotI-Slots.begin()]);
+      if (++SlotI == SlotE)
+        return Found;
+    }
+    // *SlotI is beyond the current LI segment.
+    LiveI = LI.advanceTo(LiveI, *SlotI);
+    if (LiveI == LiveE)
+      return Found;
+    // Advance SlotI until it overlaps.
+    while (*SlotI < LiveI->start)
+      if (++SlotI == SlotE)
+        return Found;
+  }
 }
 
-bool LiveIntervals::intervalIsInOneMBB(const LiveInterval &li) const {
-  LiveInterval::Ranges::const_iterator itr = li.ranges.begin();
+//===----------------------------------------------------------------------===//
+//                         IntervalUpdate class.
+//===----------------------------------------------------------------------===//
 
-  MachineBasicBlock *mbb =  indexes_->getMBBCoveringRange(itr->start, itr->end);
+// HMEditor is a toolkit used by handleMove to trim or extend live intervals.
+class LiveIntervals::HMEditor {
+private:
+  LiveIntervals& LIS;
+  const MachineRegisterInfo& MRI;
+  const TargetRegisterInfo& TRI;
+  SlotIndex NewIdx;
+
+  typedef std::pair<LiveInterval*, LiveRange*> IntRangePair;
+  typedef DenseSet<IntRangePair> RangeSet;
+
+  struct RegRanges {
+    LiveRange* Use;
+    LiveRange* EC;
+    LiveRange* Dead;
+    LiveRange* Def;
+    RegRanges() : Use(0), EC(0), Dead(0), Def(0) {}
+  };
+  typedef DenseMap<unsigned, RegRanges> BundleRanges;
+
+public:
+  HMEditor(LiveIntervals& LIS, const MachineRegisterInfo& MRI,
+           const TargetRegisterInfo& TRI, SlotIndex NewIdx)
+    : LIS(LIS), MRI(MRI), TRI(TRI), NewIdx(NewIdx) {}
+
+  // Update intervals for all operands of MI from OldIdx to NewIdx.
+  // This assumes that MI used to be at OldIdx, and now resides at
+  // NewIdx.
+  void moveAllRangesFrom(MachineInstr* MI, SlotIndex OldIdx) {
+    assert(NewIdx != OldIdx && "No-op move? That's a bit strange.");
+
+    // Collect the operands.
+    RangeSet Entering, Internal, Exiting;
+    bool hasRegMaskOp = false;
+    collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx);
+
+    // To keep the LiveRanges valid within an interval, move the ranges closest
+    // to the destination first. This prevents ranges from overlapping, to that
+    // APIs like removeRange still work.
+    if (NewIdx < OldIdx) {
+      moveAllEnteringFrom(OldIdx, Entering);
+      moveAllInternalFrom(OldIdx, Internal);
+      moveAllExitingFrom(OldIdx, Exiting);
+    }
+    else {
+      moveAllExitingFrom(OldIdx, Exiting);
+      moveAllInternalFrom(OldIdx, Internal);
+      moveAllEnteringFrom(OldIdx, Entering);
+    }
 
-  if (mbb == 0)
-    return false;
+    if (hasRegMaskOp)
+      updateRegMaskSlots(OldIdx);
 
-  for (++itr; itr != li.ranges.end(); ++itr) {
-    MachineBasicBlock *mbb2 =
-      indexes_->getMBBCoveringRange(itr->start, itr->end);
+#ifndef NDEBUG
+    LIValidator validator;
+    std::for_each(Entering.begin(), Entering.end(), validator);
+    std::for_each(Internal.begin(), Internal.end(), validator);
+    std::for_each(Exiting.begin(), Exiting.end(), validator);
+    assert(validator.rangesOk() && "moveAllOperandsFrom broke liveness.");
+#endif
 
-    if (mbb2 != mbb)
-      return false;
   }
 
-  return true;
-}
+  // Update intervals for all operands of MI to refer to BundleStart's
+  // SlotIndex.
+  void moveAllRangesInto(MachineInstr* MI, MachineInstr* BundleStart) {
+    if (MI == BundleStart)
+      return; // Bundling instr with itself - nothing to do.
+
+    SlotIndex OldIdx = LIS.getSlotIndexes()->getInstructionIndex(MI);
+    assert(LIS.getSlotIndexes()->getInstructionFromIndex(OldIdx) == MI &&
+           "SlotIndex <-> Instruction mapping broken for MI");
+
+    // Collect all ranges already in the bundle.
+    MachineBasicBlock::instr_iterator BII(BundleStart);
+    RangeSet Entering, Internal, Exiting;
+    bool hasRegMaskOp = false;
+    collectRanges(BII, Entering, Internal, Exiting, hasRegMaskOp, NewIdx);
+    assert(!hasRegMaskOp && "Can't have RegMask operand in bundle.");
+    for (++BII; &*BII == MI || BII->isInsideBundle(); ++BII) {
+      if (&*BII == MI)
+        continue;
+      collectRanges(BII, Entering, Internal, Exiting, hasRegMaskOp, NewIdx);
+      assert(!hasRegMaskOp && "Can't have RegMask operand in bundle.");
+    }
 
-/// rewriteImplicitOps - Rewrite implicit use operands of MI (i.e. uses of
-/// interval on to-be re-materialized operands of MI) with new register.
-void LiveIntervals::rewriteImplicitOps(const LiveInterval &li,
-                                       MachineInstr *MI, unsigned NewVReg,
-                                       VirtRegMap &vrm) {
-  // There is an implicit use. That means one of the other operand is
-  // being remat'ed and the remat'ed instruction has li.reg as an
-  // use operand. Make sure we rewrite that as well.
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = MI->getOperand(i);
-    if (!MO.isReg())
-      continue;
-    unsigned Reg = MO.getReg();
-    if (!TargetRegisterInfo::isVirtualRegister(Reg))
-      continue;
-    if (!vrm.isReMaterialized(Reg))
-      continue;
-    MachineInstr *ReMatMI = vrm.getReMaterializedMI(Reg);
-    MachineOperand *UseMO = ReMatMI->findRegisterUseOperand(li.reg);
-    if (UseMO)
-      UseMO->setReg(NewVReg);
-  }
-}
+    BundleRanges BR = createBundleRanges(Entering, Internal, Exiting);
 
-/// rewriteInstructionForSpills, rewriteInstructionsForSpills - Helper functions
-/// for addIntervalsForSpills to rewrite uses / defs for the given live range.
-bool LiveIntervals::
-rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
-                 bool TrySplit, SlotIndex index, SlotIndex end,
-                 MachineInstr *MI,
-                 MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI,
-                 unsigned Slot, int LdSlot,
-                 bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete,
-                 VirtRegMap &vrm,
-                 const TargetRegisterClass* rc,
-                 SmallVector<int, 4> &ReMatIds,
-                 const MachineLoopInfo *loopInfo,
-                 unsigned &NewVReg, unsigned ImpUse, bool &HasDef, bool &HasUse,
-                 DenseMap<unsigned,unsigned> &MBBVRegsMap,
-                 std::vector<LiveInterval*> &NewLIs) {
-  bool CanFold = false;
- RestartInstruction:
-  for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
-    MachineOperand& mop = MI->getOperand(i);
-    if (!mop.isReg())
-      continue;
-    unsigned Reg = mop.getReg();
-    if (!TargetRegisterInfo::isVirtualRegister(Reg))
-      continue;
-    if (Reg != li.reg)
-      continue;
+    collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx);
+    assert(!hasRegMaskOp && "Can't have RegMask operand in bundle.");
 
-    bool TryFold = !DefIsReMat;
-    bool FoldSS = true; // Default behavior unless it's a remat.
-    int FoldSlot = Slot;
-    if (DefIsReMat) {
-      // If this is the rematerializable definition MI itself and
-      // all of its uses are rematerialized, simply delete it.
-      if (MI == ReMatOrigDefMI && CanDelete) {
-        DEBUG(dbgs() << "\t\t\t\tErasing re-materializable def: "
-                     << *MI << '\n');
-        RemoveMachineInstrFromMaps(MI);
-        vrm.RemoveMachineInstrFromMaps(MI);
-        MI->eraseFromParent();
-        break;
-      }
+    DEBUG(dbgs() << "Entering: " << Entering.size() << "\n");
+    DEBUG(dbgs() << "Internal: " << Internal.size() << "\n");
+    DEBUG(dbgs() << "Exiting: " << Exiting.size() << "\n");
 
-      // If def for this use can't be rematerialized, then try folding.
-      // If def is rematerializable and it's a load, also try folding.
-      TryFold = !ReMatDefMI || (ReMatDefMI && (MI == ReMatOrigDefMI || isLoad));
-      if (isLoad) {
-        // Try fold loads (from stack slot, constant pool, etc.) into uses.
-        FoldSS = isLoadSS;
-        FoldSlot = LdSlot;
-      }
-    }
+    moveAllEnteringFromInto(OldIdx, Entering, BR);
+    moveAllInternalFromInto(OldIdx, Internal, BR);
+    moveAllExitingFromInto(OldIdx, Exiting, BR);
 
-    // Scan all of the operands of this instruction rewriting operands
-    // to use NewVReg instead of li.reg as appropriate.  We do this for
-    // two reasons:
-    //
-    //   1. If the instr reads the same spilled vreg multiple times, we
-    //      want to reuse the NewVReg.
-    //   2. If the instr is a two-addr instruction, we are required to
-    //      keep the src/dst regs pinned.
-    //
-    // Keep track of whether we replace a use and/or def so that we can
-    // create the spill interval with the appropriate range.
-    SmallVector<unsigned, 2> Ops;
-    tie(HasUse, HasDef) = MI->readsWritesVirtualRegister(Reg, &Ops);
-
-    // Create a new virtual register for the spill interval.
-    // Create the new register now so we can map the fold instruction
-    // to the new register so when it is unfolded we get the correct
-    // answer.
-    bool CreatedNewVReg = false;
-    if (NewVReg == 0) {
-      NewVReg = mri_->createVirtualRegister(rc);
-      vrm.grow();
-      CreatedNewVReg = true;
-
-      // The new virtual register should get the same allocation hints as the
-      // old one.
-      std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(Reg);
-      if (Hint.first || Hint.second)
-        mri_->setRegAllocationHint(NewVReg, Hint.first, Hint.second);
-    }
 
-    if (!TryFold)
-      CanFold = false;
-    else {
-      // Do not fold load / store here if we are splitting. We'll find an
-      // optimal point to insert a load / store later.
-      if (!TrySplit) {
-        if (tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index,
-                                 Ops, FoldSS, FoldSlot, NewVReg)) {
-          // Folding the load/store can completely change the instruction in
-          // unpredictable ways, rescan it from the beginning.
-
-          if (FoldSS) {
-            // We need to give the new vreg the same stack slot as the
-            // spilled interval.
-            vrm.assignVirt2StackSlot(NewVReg, FoldSlot);
-          }
-
-          HasUse = false;
-          HasDef = false;
-          CanFold = false;
-          if (isNotInMIMap(MI))
-            break;
-          goto RestartInstruction;
-        }
-      } else {
-        // We'll try to fold it later if it's profitable.
-        CanFold = canFoldMemoryOperand(MI, Ops, DefIsReMat);
-      }
-    }
+#ifndef NDEBUG
+    LIValidator validator;
+    std::for_each(Entering.begin(), Entering.end(), validator);
+    std::for_each(Internal.begin(), Internal.end(), validator);
+    std::for_each(Exiting.begin(), Exiting.end(), validator);
+    assert(validator.rangesOk() && "moveAllOperandsInto broke liveness.");
+#endif
+  }
 
-    mop.setReg(NewVReg);
-    if (mop.isImplicit())
-      rewriteImplicitOps(li, MI, NewVReg, vrm);
-
-    // Reuse NewVReg for other reads.
-    bool HasEarlyClobber = false;
-    for (unsigned j = 0, e = Ops.size(); j != e; ++j) {
-      MachineOperand &mopj = MI->getOperand(Ops[j]);
-      mopj.setReg(NewVReg);
-      if (mopj.isImplicit())
-        rewriteImplicitOps(li, MI, NewVReg, vrm);
-      if (mopj.isEarlyClobber())
-        HasEarlyClobber = true;
-    }
+private:
 
-    if (CreatedNewVReg) {
-      if (DefIsReMat) {
-        vrm.setVirtIsReMaterialized(NewVReg, ReMatDefMI);
-        if (ReMatIds[VNI->id] == VirtRegMap::MAX_STACK_SLOT) {
-          // Each valnum may have its own remat id.
-          ReMatIds[VNI->id] = vrm.assignVirtReMatId(NewVReg);
-        } else {
-          vrm.assignVirtReMatId(NewVReg, ReMatIds[VNI->id]);
-        }
-        if (!CanDelete || (HasUse && HasDef)) {
-          // If this is a two-addr instruction then its use operands are
-          // rematerializable but its def is not. It should be assigned a
-          // stack slot.
-          vrm.assignVirt2StackSlot(NewVReg, Slot);
-        }
-      } else {
-        vrm.assignVirt2StackSlot(NewVReg, Slot);
+#ifndef NDEBUG
+  class LIValidator {
+  private:
+    DenseSet<const LiveInterval*> Checked, Bogus;
+  public:
+    void operator()(const IntRangePair& P) {
+      const LiveInterval* LI = P.first;
+      if (Checked.count(LI))
+        return;
+      Checked.insert(LI);
+      if (LI->empty())
+        return;
+      SlotIndex LastEnd = LI->begin()->start;
+      for (LiveInterval::const_iterator LRI = LI->begin(), LRE = LI->end();
+           LRI != LRE; ++LRI) {
+        const LiveRange& LR = *LRI;
+        if (LastEnd > LR.start || LR.start >= LR.end)
+          Bogus.insert(LI);
+        LastEnd = LR.end;
       }
-    } else if (HasUse && HasDef &&
-               vrm.getStackSlot(NewVReg) == VirtRegMap::NO_STACK_SLOT) {
-      // If this interval hasn't been assigned a stack slot (because earlier
-      // def is a deleted remat def), do it now.
-      assert(Slot != VirtRegMap::NO_STACK_SLOT);
-      vrm.assignVirt2StackSlot(NewVReg, Slot);
     }
 
-    // Re-matting an instruction with virtual register use. Add the
-    // register as an implicit use on the use MI.
-    if (DefIsReMat && ImpUse)
-      MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true));
-
-    // Create a new register interval for this spill / remat.
-    LiveInterval &nI = getOrCreateInterval(NewVReg);
-    if (CreatedNewVReg) {
-      NewLIs.push_back(&nI);
-      MBBVRegsMap.insert(std::make_pair(MI->getParent()->getNumber(), NewVReg));
-      if (TrySplit)
-        vrm.setIsSplitFromReg(NewVReg, li.reg);
+    bool rangesOk() const {
+      return Bogus.empty();
     }
+  };
+#endif
 
-    if (HasUse) {
-      if (CreatedNewVReg) {
-        LiveRange LR(index.getLoadIndex(), index.getDefIndex(),
-                     nI.getNextValue(SlotIndex(), 0, VNInfoAllocator));
-        DEBUG(dbgs() << " +" << LR);
-        nI.addRange(LR);
-      } else {
-        // Extend the split live interval to this def / use.
-        SlotIndex End = index.getDefIndex();
-        LiveRange LR(nI.ranges[nI.ranges.size()-1].end, End,
-                     nI.getValNumInfo(nI.getNumValNums()-1));
-        DEBUG(dbgs() << " +" << LR);
-        nI.addRange(LR);
+  // Collect IntRangePairs for all operands of MI that may need fixing.
+  // Treat's MI's index as OldIdx (regardless of what it is in SlotIndexes'
+  // maps).
+  void collectRanges(MachineInstr* MI, RangeSet& Entering, RangeSet& Internal,
+                     RangeSet& Exiting, bool& hasRegMaskOp, SlotIndex OldIdx) {
+    hasRegMaskOp = false;
+    for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
+                                    MOE = MI->operands_end();
+         MOI != MOE; ++MOI) {
+      const MachineOperand& MO = *MOI;
+
+      if (MO.isRegMask()) {
+        hasRegMaskOp = true;
+        continue;
       }
-    }
-    if (HasDef) {
-      // An early clobber starts at the use slot, except for an early clobber
-      // tied to a use operand (yes, that is a thing).
-      LiveRange LR(HasEarlyClobber && !HasUse ?
-                   index.getUseIndex() : index.getDefIndex(),
-                   index.getStoreIndex(),
-                   nI.getNextValue(SlotIndex(), 0, VNInfoAllocator));
-      DEBUG(dbgs() << " +" << LR);
-      nI.addRange(LR);
-    }
 
-    DEBUG({
-        dbgs() << "\t\t\t\tAdded new interval: ";
-        nI.print(dbgs(), tri_);
-        dbgs() << '\n';
-      });
-  }
-  return CanFold;
-}
-bool LiveIntervals::anyKillInMBBAfterIdx(const LiveInterval &li,
-                                   const VNInfo *VNI,
-                                   MachineBasicBlock *MBB,
-                                   SlotIndex Idx) const {
-  return li.killedInRange(Idx.getNextSlot(), getMBBEndIdx(MBB));
-}
+      if (!MO.isReg() || MO.getReg() == 0)
+        continue;
 
-/// RewriteInfo - Keep track of machine instrs that will be rewritten
-/// during spilling.
-namespace {
-  struct RewriteInfo {
-    SlotIndex Index;
-    MachineInstr *MI;
-    RewriteInfo(SlotIndex i, MachineInstr *mi) : Index(i), MI(mi) {}
-  };
+      unsigned Reg = MO.getReg();
 
-  struct RewriteInfoCompare {
-    bool operator()(const RewriteInfo &LHS, const RewriteInfo &RHS) const {
-      return LHS.Index < RHS.Index;
-    }
-  };
-}
+      // TODO: Currently we're skipping uses that are reserved or have no
+      // interval, but we're not updating their kills. This should be
+      // fixed.
+      if (!LIS.hasInterval(Reg) ||
+          (TargetRegisterInfo::isPhysicalRegister(Reg) && LIS.isReserved(Reg)))
+        continue;
 
-void LiveIntervals::
-rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
-                    LiveInterval::Ranges::const_iterator &I,
-                    MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI,
-                    unsigned Slot, int LdSlot,
-                    bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete,
-                    VirtRegMap &vrm,
-                    const TargetRegisterClass* rc,
-                    SmallVector<int, 4> &ReMatIds,
-                    const MachineLoopInfo *loopInfo,
-                    BitVector &SpillMBBs,
-                    DenseMap<unsigned, std::vector<SRInfo> > &SpillIdxes,
-                    BitVector &RestoreMBBs,
-                    DenseMap<unsigned, std::vector<SRInfo> > &RestoreIdxes,
-                    DenseMap<unsigned,unsigned> &MBBVRegsMap,
-                    std::vector<LiveInterval*> &NewLIs) {
-  bool AllCanFold = true;
-  unsigned NewVReg = 0;
-  SlotIndex start = I->start.getBaseIndex();
-  SlotIndex end = I->end.getPrevSlot().getBaseIndex().getNextIndex();
-
-  // First collect all the def / use in this live range that will be rewritten.
-  // Make sure they are sorted according to instruction index.
-  std::vector<RewriteInfo> RewriteMIs;
-  for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(li.reg),
-         re = mri_->reg_end(); ri != re; ) {
-    MachineInstr *MI = &*ri;
-    MachineOperand &O = ri.getOperand();
-    ++ri;
-    if (MI->isDebugValue()) {
-      // Modify DBG_VALUE now that the value is in a spill slot.
-      if (Slot != VirtRegMap::MAX_STACK_SLOT || isLoadSS) {
-        uint64_t Offset = MI->getOperand(1).getImm();
-        const MDNode *MDPtr = MI->getOperand(2).getMetadata();
-        DebugLoc DL = MI->getDebugLoc();
-        int FI = isLoadSS ? LdSlot : (int)Slot;
-        if (MachineInstr *NewDV = tii_->emitFrameIndexDebugValue(*mf_, FI,
-                                                           Offset, MDPtr, DL)) {
-          DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
-          ReplaceMachineInstrInMaps(MI, NewDV);
-          MachineBasicBlock *MBB = MI->getParent();
-          MBB->insert(MBB->erase(MI), NewDV);
-          continue;
+      LiveInterval* LI = &LIS.getInterval(Reg);
+
+      if (MO.readsReg()) {
+        LiveRange* LR = LI->getLiveRangeContaining(OldIdx);
+        if (LR != 0)
+          Entering.insert(std::make_pair(LI, LR));
+      }
+      if (MO.isDef()) {
+        if (MO.isEarlyClobber()) {
+          LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getRegSlot(true));
+          assert(LR != 0 && "No EC range?");
+          if (LR->end > OldIdx.getDeadSlot())
+            Exiting.insert(std::make_pair(LI, LR));
+          else
+            Internal.insert(std::make_pair(LI, LR));
+        } else if (MO.isDead()) {
+          LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getRegSlot());
+          assert(LR != 0 && "No dead-def range?");
+          Internal.insert(std::make_pair(LI, LR));
+        } else {
+          LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getDeadSlot());
+          assert(LR && LR->end > OldIdx.getDeadSlot() &&
+                 "Non-dead-def should have live range exiting.");
+          Exiting.insert(std::make_pair(LI, LR));
         }
       }
-
-      DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI);
-      RemoveMachineInstrFromMaps(MI);
-      vrm.RemoveMachineInstrFromMaps(MI);
-      MI->eraseFromParent();
-      continue;
     }
-    assert(!(O.isImplicit() && O.isUse()) &&
-           "Spilling register that's used as implicit use?");
-    SlotIndex index = getInstructionIndex(MI);
-    if (index < start || index >= end)
-      continue;
-
-    if (O.isUndef())
-      // Must be defined by an implicit def. It should not be spilled. Note,
-      // this is for correctness reason. e.g.
-      // 8   %reg1024<def> = IMPLICIT_DEF
-      // 12  %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
-      // The live range [12, 14) are not part of the r1024 live interval since
-      // it's defined by an implicit def. It will not conflicts with live
-      // interval of r1025. Now suppose both registers are spilled, you can
-      // easily see a situation where both registers are reloaded before
-      // the INSERT_SUBREG and both target registers that would overlap.
-      continue;
-    RewriteMIs.push_back(RewriteInfo(index, MI));
   }
-  std::sort(RewriteMIs.begin(), RewriteMIs.end(), RewriteInfoCompare());
-
-  unsigned ImpUse = DefIsReMat ? getReMatImplicitUse(li, ReMatDefMI) : 0;
-  // Now rewrite the defs and uses.
-  for (unsigned i = 0, e = RewriteMIs.size(); i != e; ) {
-    RewriteInfo &rwi = RewriteMIs[i];
-    ++i;
-    SlotIndex index = rwi.Index;
-    MachineInstr *MI = rwi.MI;
-    // If MI def and/or use the same register multiple times, then there
-    // are multiple entries.
-    while (i != e && RewriteMIs[i].MI == MI) {
-      assert(RewriteMIs[i].Index == index);
-      ++i;
-    }
-    MachineBasicBlock *MBB = MI->getParent();
 
-    if (ImpUse && MI != ReMatDefMI) {
-      // Re-matting an instruction with virtual register use. Prevent interval
-      // from being spilled.
-      getInterval(ImpUse).markNotSpillable();
-    }
+  // Collect IntRangePairs for all operands of MI that may need fixing.
+  void collectRangesInBundle(MachineInstr* MI, RangeSet& Entering,
+                             RangeSet& Exiting, SlotIndex MIStartIdx,
+                             SlotIndex MIEndIdx) {
+    for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
+                                    MOE = MI->operands_end();
+         MOI != MOE; ++MOI) {
+      const MachineOperand& MO = *MOI;
+      assert(!MO.isRegMask() && "Can't have RegMasks in bundles.");
+      if (!MO.isReg() || MO.getReg() == 0)
+        continue;
 
-    unsigned MBBId = MBB->getNumber();
-    unsigned ThisVReg = 0;
-    if (TrySplit) {
-      DenseMap<unsigned,unsigned>::iterator NVI = MBBVRegsMap.find(MBBId);
-      if (NVI != MBBVRegsMap.end()) {
-        ThisVReg = NVI->second;
-        // One common case:
-        // x = use
-        // ...
-        // ...
-        // def = ...
-        //     = use
-        // It's better to start a new interval to avoid artificially
-        // extend the new interval.
-        if (MI->readsWritesVirtualRegister(li.reg) ==
-            std::make_pair(false,true)) {
-          MBBVRegsMap.erase(MBB->getNumber());
-          ThisVReg = 0;
-        }
-      }
-    }
+      unsigned Reg = MO.getReg();
+
+      // TODO: Currently we're skipping uses that are reserved or have no
+      // interval, but we're not updating their kills. This should be
+      // fixed.
+      if (!LIS.hasInterval(Reg) ||
+          (TargetRegisterInfo::isPhysicalRegister(Reg) && LIS.isReserved(Reg)))
+        continue;
 
-    bool IsNew = ThisVReg == 0;
-    if (IsNew) {
-      // This ends the previous live interval. If all of its def / use
-      // can be folded, give it a low spill weight.
-      if (NewVReg && TrySplit && AllCanFold) {
-        LiveInterval &nI = getOrCreateInterval(NewVReg);
-        nI.weight /= 10.0F;
+      LiveInterval* LI = &LIS.getInterval(Reg);
+
+      if (MO.readsReg()) {
+        LiveRange* LR = LI->getLiveRangeContaining(MIStartIdx);
+        if (LR != 0)
+          Entering.insert(std::make_pair(LI, LR));
+      }
+      if (MO.isDef()) {
+        assert(!MO.isEarlyClobber() && "Early clobbers not allowed in bundles.");
+        assert(!MO.isDead() && "Dead-defs not allowed in bundles.");
+        LiveRange* LR = LI->getLiveRangeContaining(MIEndIdx.getDeadSlot());
+        assert(LR != 0 && "Internal ranges not allowed in bundles.");
+        Exiting.insert(std::make_pair(LI, LR));
       }
-      AllCanFold = true;
     }
-    NewVReg = ThisVReg;
-
-    bool HasDef = false;
-    bool HasUse = false;
-    bool CanFold = rewriteInstructionForSpills(li, I->valno, TrySplit,
-                         index, end, MI, ReMatOrigDefMI, ReMatDefMI,
-                         Slot, LdSlot, isLoad, isLoadSS, DefIsReMat,
-                         CanDelete, vrm, rc, ReMatIds, loopInfo, NewVReg,
-                         ImpUse, HasDef, HasUse, MBBVRegsMap, NewLIs);
-    if (!HasDef && !HasUse)
-      continue;
+  }
 
-    AllCanFold &= CanFold;
+  BundleRanges createBundleRanges(RangeSet& Entering, RangeSet& Internal, RangeSet& Exiting) {
+    BundleRanges BR;
 
-    // Update weight of spill interval.
-    LiveInterval &nI = getOrCreateInterval(NewVReg);
-    if (!TrySplit) {
-      // The spill weight is now infinity as it cannot be spilled again.
-      nI.markNotSpillable();
-      continue;
+    for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
+         EI != EE; ++EI) {
+      LiveInterval* LI = EI->first;
+      LiveRange* LR = EI->second;
+      BR[LI->reg].Use = LR;
     }
 
-    // Keep track of the last def and first use in each MBB.
-    if (HasDef) {
-      if (MI != ReMatOrigDefMI || !CanDelete) {
-        bool HasKill = false;
-        if (!HasUse)
-          HasKill = anyKillInMBBAfterIdx(li, I->valno, MBB, index.getDefIndex());
-        else {
-          // If this is a two-address code, then this index starts a new VNInfo.
-          const VNInfo *VNI = li.findDefinedVNInfoForRegInt(index.getDefIndex());
-          if (VNI)
-            HasKill = anyKillInMBBAfterIdx(li, VNI, MBB, index.getDefIndex());
-        }
-        DenseMap<unsigned, std::vector<SRInfo> >::iterator SII =
-          SpillIdxes.find(MBBId);
-        if (!HasKill) {
-          if (SII == SpillIdxes.end()) {
-            std::vector<SRInfo> S;
-            S.push_back(SRInfo(index, NewVReg, true));
-            SpillIdxes.insert(std::make_pair(MBBId, S));
-          } else if (SII->second.back().vreg != NewVReg) {
-            SII->second.push_back(SRInfo(index, NewVReg, true));
-          } else if (index > SII->second.back().index) {
-            // If there is an earlier def and this is a two-address
-            // instruction, then it's not possible to fold the store (which
-            // would also fold the load).
-            SRInfo &Info = SII->second.back();
-            Info.index = index;
-            Info.canFold = !HasUse;
-          }
-          SpillMBBs.set(MBBId);
-        } else if (SII != SpillIdxes.end() &&
-                   SII->second.back().vreg == NewVReg &&
-                   index > SII->second.back().index) {
-          // There is an earlier def that's not killed (must be two-address).
-          // The spill is no longer needed.
-          SII->second.pop_back();
-          if (SII->second.empty()) {
-            SpillIdxes.erase(MBBId);
-            SpillMBBs.reset(MBBId);
-          }
-        }
+    for (RangeSet::iterator II = Internal.begin(), IE = Internal.end();
+         II != IE; ++II) {
+      LiveInterval* LI = II->first;
+      LiveRange* LR = II->second;
+      if (LR->end.isDead()) {
+        BR[LI->reg].Dead = LR;
+      } else {
+        BR[LI->reg].EC = LR;
       }
     }
 
-    if (HasUse) {
-      DenseMap<unsigned, std::vector<SRInfo> >::iterator SII =
-        SpillIdxes.find(MBBId);
-      if (SII != SpillIdxes.end() &&
-          SII->second.back().vreg == NewVReg &&
-          index > SII->second.back().index)
-        // Use(s) following the last def, it's not safe to fold the spill.
-        SII->second.back().canFold = false;
-      DenseMap<unsigned, std::vector<SRInfo> >::iterator RII =
-        RestoreIdxes.find(MBBId);
-      if (RII != RestoreIdxes.end() && RII->second.back().vreg == NewVReg)
-        // If we are splitting live intervals, only fold if it's the first
-        // use and there isn't another use later in the MBB.
-        RII->second.back().canFold = false;
-      else if (IsNew) {
-        // Only need a reload if there isn't an earlier def / use.
-        if (RII == RestoreIdxes.end()) {
-          std::vector<SRInfo> Infos;
-          Infos.push_back(SRInfo(index, NewVReg, true));
-          RestoreIdxes.insert(std::make_pair(MBBId, Infos));
-        } else {
-          RII->second.push_back(SRInfo(index, NewVReg, true));
-        }
-        RestoreMBBs.set(MBBId);
-      }
+    for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end();
+         EI != EE; ++EI) {
+      LiveInterval* LI = EI->first;
+      LiveRange* LR = EI->second;
+      BR[LI->reg].Def = LR;
     }
 
-    // Update spill weight.
-    unsigned loopDepth = loopInfo->getLoopDepth(MBB);
-    nI.weight += getSpillWeight(HasDef, HasUse, loopDepth);
+    return BR;
   }
 
-  if (NewVReg && TrySplit && AllCanFold) {
-    // If all of its def / use can be folded, give it a low spill weight.
-    LiveInterval &nI = getOrCreateInterval(NewVReg);
-    nI.weight /= 10.0F;
+  void moveKillFlags(unsigned reg, SlotIndex OldIdx, SlotIndex newKillIdx) {
+    MachineInstr* OldKillMI = LIS.getInstructionFromIndex(OldIdx);
+    if (!OldKillMI->killsRegister(reg))
+      return; // Bail out if we don't have kill flags on the old register.
+    MachineInstr* NewKillMI = LIS.getInstructionFromIndex(newKillIdx);
+    assert(OldKillMI->killsRegister(reg) && "Old 'kill' instr isn't a kill.");
+    assert(!NewKillMI->killsRegister(reg) && "New kill instr is already a kill.");
+    OldKillMI->clearRegisterKills(reg, &TRI);
+    NewKillMI->addRegisterKilled(reg, &TRI);
   }
-}
 
-bool LiveIntervals::alsoFoldARestore(int Id, SlotIndex index,
-                        unsigned vr, BitVector &RestoreMBBs,
-                        DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes) {
-  if (!RestoreMBBs[Id])
-    return false;
-  std::vector<SRInfo> &Restores = RestoreIdxes[Id];
-  for (unsigned i = 0, e = Restores.size(); i != e; ++i)
-    if (Restores[i].index == index &&
-        Restores[i].vreg == vr &&
-        Restores[i].canFold)
-      return true;
-  return false;
-}
-
-void LiveIntervals::eraseRestoreInfo(int Id, SlotIndex index,
-                        unsigned vr, BitVector &RestoreMBBs,
-                        DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes) {
-  if (!RestoreMBBs[Id])
-    return;
-  std::vector<SRInfo> &Restores = RestoreIdxes[Id];
-  for (unsigned i = 0, e = Restores.size(); i != e; ++i)
-    if (Restores[i].index == index && Restores[i].vreg)
-      Restores[i].index = SlotIndex();
-}
+  void updateRegMaskSlots(SlotIndex OldIdx) {
+    SmallVectorImpl<SlotIndex>::iterator RI =
+      std::lower_bound(LIS.RegMaskSlots.begin(), LIS.RegMaskSlots.end(),
+                       OldIdx);
+    assert(*RI == OldIdx && "No RegMask at OldIdx.");
+    *RI = NewIdx;
+    assert(*prior(RI) < *RI && *RI < *next(RI) &&
+           "RegSlots out of order. Did you move one call across another?");
+  }
 
-/// handleSpilledImpDefs - Remove IMPLICIT_DEF instructions which are being
-/// spilled and create empty intervals for their uses.
-void
-LiveIntervals::handleSpilledImpDefs(const LiveInterval &li, VirtRegMap &vrm,
-                                    const TargetRegisterClass* rc,
-                                    std::vector<LiveInterval*> &NewLIs) {
-  for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(li.reg),
-         re = mri_->reg_end(); ri != re; ) {
-    MachineOperand &O = ri.getOperand();
-    MachineInstr *MI = &*ri;
-    ++ri;
-    if (MI->isDebugValue()) {
-      // Remove debug info for now.
-      O.setReg(0U);
-      DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI);
-      continue;
-    }
-    if (O.isDef()) {
-      assert(MI->isImplicitDef() &&
-             "Register def was not rewritten?");
-      RemoveMachineInstrFromMaps(MI);
-      vrm.RemoveMachineInstrFromMaps(MI);
-      MI->eraseFromParent();
-    } else {
-      // This must be an use of an implicit_def so it's not part of the live
-      // interval. Create a new empty live interval for it.
-      // FIXME: Can we simply erase some of the instructions? e.g. Stores?
-      unsigned NewVReg = mri_->createVirtualRegister(rc);
-      vrm.grow();
-      vrm.setIsImplicitlyDefined(NewVReg);
-      NewLIs.push_back(&getOrCreateInterval(NewVReg));
-      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-        MachineOperand &MO = MI->getOperand(i);
-        if (MO.isReg() && MO.getReg() == li.reg) {
-          MO.setReg(NewVReg);
-          MO.setIsUndef();
-        }
-      }
+  // Return the last use of reg between NewIdx and OldIdx.
+  SlotIndex findLastUseBefore(unsigned Reg, SlotIndex OldIdx) {
+    SlotIndex LastUse = NewIdx;
+    for (MachineRegisterInfo::use_nodbg_iterator
+           UI = MRI.use_nodbg_begin(Reg),
+           UE = MRI.use_nodbg_end();
+         UI != UE; UI.skipInstruction()) {
+      const MachineInstr* MI = &*UI;
+      SlotIndex InstSlot = LIS.getSlotIndexes()->getInstructionIndex(MI);
+      if (InstSlot > LastUse && InstSlot < OldIdx)
+        LastUse = InstSlot;
     }
+    return LastUse;
   }
-}
-
-float
-LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) {
-  // Limit the loop depth ridiculousness.
-  if (loopDepth > 200)
-    loopDepth = 200;
-
-  // The loop depth is used to roughly estimate the number of times the
-  // instruction is executed. Something like 10^d is simple, but will quickly
-  // overflow a float. This expression behaves like 10^d for small d, but is
-  // more tempered for large d. At d=200 we get 6.7e33 which leaves a bit of
-  // headroom before overflow.
-  // By the way, powf() might be unavailable here. For consistency,
-  // We may take pow(double,double).
-  float lc = std::pow(1 + (100.0 / (loopDepth + 10)), (double)loopDepth);
-
-  return (isDef + isUse) * lc;
-}
 
-static void normalizeSpillWeights(std::vector<LiveInterval*> &NewLIs) {
-  for (unsigned i = 0, e = NewLIs.size(); i != e; ++i)
-    NewLIs[i]->weight =
-      normalizeSpillWeight(NewLIs[i]->weight, NewLIs[i]->getSize());
-}
+  void moveEnteringUpFrom(SlotIndex OldIdx, IntRangePair& P) {
+    LiveInterval* LI = P.first;
+    LiveRange* LR = P.second;
+    bool LiveThrough = LR->end > OldIdx.getRegSlot();
+    if (LiveThrough)
+      return;
+    SlotIndex LastUse = findLastUseBefore(LI->reg, OldIdx);
+    if (LastUse != NewIdx)
+      moveKillFlags(LI->reg, NewIdx, LastUse);
+    LR->end = LastUse.getRegSlot();
+  }
 
-std::vector<LiveInterval*> LiveIntervals::
-addIntervalsForSpills(const LiveInterval &li,
-                      const SmallVectorImpl<LiveInterval*> *SpillIs,
-                      const MachineLoopInfo *loopInfo, VirtRegMap &vrm) {
-  assert(li.isSpillable() && "attempt to spill already spilled interval!");
-
-  DEBUG({
-      dbgs() << "\t\t\t\tadding intervals for spills for interval: ";
-      li.print(dbgs(), tri_);
-      dbgs() << '\n';
-    });
-
-  // Each bit specify whether a spill is required in the MBB.
-  BitVector SpillMBBs(mf_->getNumBlockIDs());
-  DenseMap<unsigned, std::vector<SRInfo> > SpillIdxes;
-  BitVector RestoreMBBs(mf_->getNumBlockIDs());
-  DenseMap<unsigned, std::vector<SRInfo> > RestoreIdxes;
-  DenseMap<unsigned,unsigned> MBBVRegsMap;
-  std::vector<LiveInterval*> NewLIs;
-  const TargetRegisterClass* rc = mri_->getRegClass(li.reg);
-
-  unsigned NumValNums = li.getNumValNums();
-  SmallVector<MachineInstr*, 4> ReMatDefs;
-  ReMatDefs.resize(NumValNums, NULL);
-  SmallVector<MachineInstr*, 4> ReMatOrigDefs;
-  ReMatOrigDefs.resize(NumValNums, NULL);
-  SmallVector<int, 4> ReMatIds;
-  ReMatIds.resize(NumValNums, VirtRegMap::MAX_STACK_SLOT);
-  BitVector ReMatDelete(NumValNums);
-  unsigned Slot = VirtRegMap::MAX_STACK_SLOT;
-
-  // Spilling a split live interval. It cannot be split any further. Also,
-  // it's also guaranteed to be a single val# / range interval.
-  if (vrm.getPreSplitReg(li.reg)) {
-    vrm.setIsSplitFromReg(li.reg, 0);
-    // Unset the split kill marker on the last use.
-    SlotIndex KillIdx = vrm.getKillPoint(li.reg);
-    if (KillIdx != SlotIndex()) {
-      MachineInstr *KillMI = getInstructionFromIndex(KillIdx);
-      assert(KillMI && "Last use disappeared?");
-      int KillOp = KillMI->findRegisterUseOperandIdx(li.reg, true);
-      assert(KillOp != -1 && "Last use disappeared?");
-      KillMI->getOperand(KillOp).setIsKill(false);
-    }
-    vrm.removeKillPoint(li.reg);
-    bool DefIsReMat = vrm.isReMaterialized(li.reg);
-    Slot = vrm.getStackSlot(li.reg);
-    assert(Slot != VirtRegMap::MAX_STACK_SLOT);
-    MachineInstr *ReMatDefMI = DefIsReMat ?
-      vrm.getReMaterializedMI(li.reg) : NULL;
-    int LdSlot = 0;
-    bool isLoadSS = DefIsReMat && tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot);
-    bool isLoad = isLoadSS ||
-      (DefIsReMat && (ReMatDefMI->getDesc().canFoldAsLoad()));
-    bool IsFirstRange = true;
-    for (LiveInterval::Ranges::const_iterator
-           I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
-      // If this is a split live interval with multiple ranges, it means there
-      // are two-address instructions that re-defined the value. Only the
-      // first def can be rematerialized!
-      if (IsFirstRange) {
-        // Note ReMatOrigDefMI has already been deleted.
-        rewriteInstructionsForSpills(li, false, I, NULL, ReMatDefMI,
-                             Slot, LdSlot, isLoad, isLoadSS, DefIsReMat,
-                             false, vrm, rc, ReMatIds, loopInfo,
-                             SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes,
-                             MBBVRegsMap, NewLIs);
-      } else {
-        rewriteInstructionsForSpills(li, false, I, NULL, 0,
-                             Slot, 0, false, false, false,
-                             false, vrm, rc, ReMatIds, loopInfo,
-                             SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes,
-                             MBBVRegsMap, NewLIs);
+  void moveEnteringDownFrom(SlotIndex OldIdx, IntRangePair& P) {
+    LiveInterval* LI = P.first;
+    LiveRange* LR = P.second;
+    // Extend the LiveRange if NewIdx is past the end.
+    if (NewIdx > LR->end) {
+      // Move kill flags if OldIdx was not originally the end
+      // (otherwise LR->end points to an invalid slot).
+      if (LR->end.getRegSlot() != OldIdx.getRegSlot()) {
+        assert(LR->end > OldIdx && "LiveRange does not cover original slot");
+        moveKillFlags(LI->reg, LR->end, NewIdx);
       }
-      IsFirstRange = false;
+      LR->end = NewIdx.getRegSlot();
     }
-
-    handleSpilledImpDefs(li, vrm, rc, NewLIs);
-    normalizeSpillWeights(NewLIs);
-    return NewLIs;
   }
 
-  bool TrySplit = !intervalIsInOneMBB(li);
-  if (TrySplit)
-    ++numSplits;
-  bool NeedStackSlot = false;
-  for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end();
-       i != e; ++i) {
-    const VNInfo *VNI = *i;
-    unsigned VN = VNI->id;
-    if (VNI->isUnused())
-      continue; // Dead val#.
-    // Is the def for the val# rematerializable?
-    MachineInstr *ReMatDefMI = getInstructionFromIndex(VNI->def);
-    bool dummy;
-    if (ReMatDefMI && isReMaterializable(li, VNI, ReMatDefMI, SpillIs, dummy)) {
-      // Remember how to remat the def of this val#.
-      ReMatOrigDefs[VN] = ReMatDefMI;
-      // Original def may be modified so we have to make a copy here.
-      MachineInstr *Clone = mf_->CloneMachineInstr(ReMatDefMI);
-      CloneMIs.push_back(Clone);
-      ReMatDefs[VN] = Clone;
-
-      bool CanDelete = true;
-      if (VNI->hasPHIKill()) {
-        // A kill is a phi node, not all of its uses can be rematerialized.
-        // It must not be deleted.
-        CanDelete = false;
-        // Need a stack slot if there is any live range where uses cannot be
-        // rematerialized.
-        NeedStackSlot = true;
-      }
-      if (CanDelete)
-        ReMatDelete.set(VN);
+  void moveAllEnteringFrom(SlotIndex OldIdx, RangeSet& Entering) {
+    bool GoingUp = NewIdx < OldIdx;
+
+    if (GoingUp) {
+      for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
+           EI != EE; ++EI)
+        moveEnteringUpFrom(OldIdx, *EI);
     } else {
-      // Need a stack slot if there is any live range where uses cannot be
-      // rematerialized.
-      NeedStackSlot = true;
+      for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
+           EI != EE; ++EI)
+        moveEnteringDownFrom(OldIdx, *EI);
     }
   }
 
-  // One stack slot per live interval.
-  if (NeedStackSlot && vrm.getPreSplitReg(li.reg) == 0) {
-    if (vrm.getStackSlot(li.reg) == VirtRegMap::NO_STACK_SLOT)
-      Slot = vrm.assignVirt2StackSlot(li.reg);
-
-    // This case only occurs when the prealloc splitter has already assigned
-    // a stack slot to this vreg.
-    else
-      Slot = vrm.getStackSlot(li.reg);
+  void moveInternalFrom(SlotIndex OldIdx, IntRangePair& P) {
+    LiveInterval* LI = P.first;
+    LiveRange* LR = P.second;
+    assert(OldIdx < LR->start && LR->start < OldIdx.getDeadSlot() &&
+           LR->end <= OldIdx.getDeadSlot() &&
+           "Range should be internal to OldIdx.");
+    LiveRange Tmp(*LR);
+    Tmp.start = NewIdx.getRegSlot(LR->start.isEarlyClobber());
+    Tmp.valno->def = Tmp.start;
+    Tmp.end = LR->end.isDead() ? NewIdx.getDeadSlot() : NewIdx.getRegSlot();
+    LI->removeRange(*LR);
+    LI->addRange(Tmp);
   }
 
-  // Create new intervals and rewrite defs and uses.
-  for (LiveInterval::Ranges::const_iterator
-         I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
-    MachineInstr *ReMatDefMI = ReMatDefs[I->valno->id];
-    MachineInstr *ReMatOrigDefMI = ReMatOrigDefs[I->valno->id];
-    bool DefIsReMat = ReMatDefMI != NULL;
-    bool CanDelete = ReMatDelete[I->valno->id];
-    int LdSlot = 0;
-    bool isLoadSS = DefIsReMat && tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot);
-    bool isLoad = isLoadSS ||
-      (DefIsReMat && ReMatDefMI->getDesc().canFoldAsLoad());
-    rewriteInstructionsForSpills(li, TrySplit, I, ReMatOrigDefMI, ReMatDefMI,
-                               Slot, LdSlot, isLoad, isLoadSS, DefIsReMat,
-                               CanDelete, vrm, rc, ReMatIds, loopInfo,
-                               SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes,
-                               MBBVRegsMap, NewLIs);
+  void moveAllInternalFrom(SlotIndex OldIdx, RangeSet& Internal) {
+    for (RangeSet::iterator II = Internal.begin(), IE = Internal.end();
+         II != IE; ++II)
+      moveInternalFrom(OldIdx, *II);
   }
 
-  // Insert spills / restores if we are splitting.
-  if (!TrySplit) {
-    handleSpilledImpDefs(li, vrm, rc, NewLIs);
-    normalizeSpillWeights(NewLIs);
-    return NewLIs;
+  void moveExitingFrom(SlotIndex OldIdx, IntRangePair& P) {
+    LiveRange* LR = P.second;
+    assert(OldIdx < LR->start && LR->start < OldIdx.getDeadSlot() &&
+           "Range should start in OldIdx.");
+    assert(LR->end > OldIdx.getDeadSlot() && "Range should exit OldIdx.");
+    SlotIndex NewStart = NewIdx.getRegSlot(LR->start.isEarlyClobber());
+    LR->start = NewStart;
+    LR->valno->def = NewStart;
   }
 
-  SmallPtrSet<LiveInterval*, 4> AddedKill;
-  SmallVector<unsigned, 2> Ops;
-  if (NeedStackSlot) {
-    int Id = SpillMBBs.find_first();
-    while (Id != -1) {
-      std::vector<SRInfo> &spills = SpillIdxes[Id];
-      for (unsigned i = 0, e = spills.size(); i != e; ++i) {
-        SlotIndex index = spills[i].index;
-        unsigned VReg = spills[i].vreg;
-        LiveInterval &nI = getOrCreateInterval(VReg);
-        bool isReMat = vrm.isReMaterialized(VReg);
-        MachineInstr *MI = getInstructionFromIndex(index);
-        bool CanFold = false;
-        bool FoundUse = false;
-        Ops.clear();
-        if (spills[i].canFold) {
-          CanFold = true;
-          for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
-            MachineOperand &MO = MI->getOperand(j);
-            if (!MO.isReg() || MO.getReg() != VReg)
-              continue;
-
-            Ops.push_back(j);
-            if (MO.isDef())
-              continue;
-            if (isReMat ||
-                (!FoundUse && !alsoFoldARestore(Id, index, VReg,
-                                                RestoreMBBs, RestoreIdxes))) {
-              // MI has two-address uses of the same register. If the use
-              // isn't the first and only use in the BB, then we can't fold
-              // it. FIXME: Move this to rewriteInstructionsForSpills.
-              CanFold = false;
-              break;
-            }
-            FoundUse = true;
-          }
-        }
-        // Fold the store into the def if possible.
-        bool Folded = false;
-        if (CanFold && !Ops.empty()) {
-          if (tryFoldMemoryOperand(MI, vrm, NULL, index, Ops, true, Slot,VReg)){
-            Folded = true;
-            if (FoundUse) {
-              // Also folded uses, do not issue a load.
-              eraseRestoreInfo(Id, index, VReg, RestoreMBBs, RestoreIdxes);
-              nI.removeRange(index.getLoadIndex(), index.getDefIndex());
-            }
-            nI.removeRange(index.getDefIndex(), index.getStoreIndex());
-          }
-        }
-
-        // Otherwise tell the spiller to issue a spill.
-        if (!Folded) {
-          LiveRange *LR = &nI.ranges[nI.ranges.size()-1];
-          bool isKill = LR->end == index.getStoreIndex();
-          if (!MI->registerDefIsDead(nI.reg))
-            // No need to spill a dead def.
-            vrm.addSpillPoint(VReg, isKill, MI);
-          if (isKill)
-            AddedKill.insert(&nI);
-        }
-      }
-      Id = SpillMBBs.find_next(Id);
-    }
+  void moveAllExitingFrom(SlotIndex OldIdx, RangeSet& Exiting) {
+    for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end();
+         EI != EE; ++EI)
+      moveExitingFrom(OldIdx, *EI);
   }
 
-  int Id = RestoreMBBs.find_first();
-  while (Id != -1) {
-    std::vector<SRInfo> &restores = RestoreIdxes[Id];
-    for (unsigned i = 0, e = restores.size(); i != e; ++i) {
-      SlotIndex index = restores[i].index;
-      if (index == SlotIndex())
-        continue;
-      unsigned VReg = restores[i].vreg;
-      LiveInterval &nI = getOrCreateInterval(VReg);
-      bool isReMat = vrm.isReMaterialized(VReg);
-      MachineInstr *MI = getInstructionFromIndex(index);
-      bool CanFold = false;
-      Ops.clear();
-      if (restores[i].canFold) {
-        CanFold = true;
-        for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
-          MachineOperand &MO = MI->getOperand(j);
-          if (!MO.isReg() || MO.getReg() != VReg)
-            continue;
-
-          if (MO.isDef()) {
-            // If this restore were to be folded, it would have been folded
-            // already.
-            CanFold = false;
-            break;
-          }
-          Ops.push_back(j);
-        }
-      }
+  void moveEnteringUpFromInto(SlotIndex OldIdx, IntRangePair& P,
+                              BundleRanges& BR) {
+    LiveInterval* LI = P.first;
+    LiveRange* LR = P.second;
+    bool LiveThrough = LR->end > OldIdx.getRegSlot();
+    if (LiveThrough) {
+      assert((LR->start < NewIdx || BR[LI->reg].Def == LR) &&
+             "Def in bundle should be def range.");
+      assert((BR[LI->reg].Use == 0 || BR[LI->reg].Use == LR) &&
+             "If bundle has use for this reg it should be LR.");
+      BR[LI->reg].Use = LR;
+      return;
+    }
 
-      // Fold the load into the use if possible.
-      bool Folded = false;
-      if (CanFold && !Ops.empty()) {
-        if (!isReMat)
-          Folded = tryFoldMemoryOperand(MI, vrm, NULL,index,Ops,true,Slot,VReg);
-        else {
-          MachineInstr *ReMatDefMI = vrm.getReMaterializedMI(VReg);
-          int LdSlot = 0;
-          bool isLoadSS = tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot);
-          // If the rematerializable def is a load, also try to fold it.
-          if (isLoadSS || ReMatDefMI->getDesc().canFoldAsLoad())
-            Folded = tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index,
-                                          Ops, isLoadSS, LdSlot, VReg);
-          if (!Folded) {
-            unsigned ImpUse = getReMatImplicitUse(li, ReMatDefMI);
-            if (ImpUse) {
-              // Re-matting an instruction with virtual register use. Add the
-              // register as an implicit use on the use MI and mark the register
-              // interval as unspillable.
-              LiveInterval &ImpLi = getInterval(ImpUse);
-              ImpLi.markNotSpillable();
-              MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true));
-            }
-          }
-        }
-      }
-      // If folding is not possible / failed, then tell the spiller to issue a
-      // load / rematerialization for us.
-      if (Folded)
-        nI.removeRange(index.getLoadIndex(), index.getDefIndex());
-      else
-        vrm.addRestorePoint(VReg, MI);
+    SlotIndex LastUse = findLastUseBefore(LI->reg, OldIdx);
+    moveKillFlags(LI->reg, OldIdx, LastUse);
+
+    if (LR->start < NewIdx) {
+      // Becoming a new entering range.
+      assert(BR[LI->reg].Dead == 0 && BR[LI->reg].Def == 0 &&
+             "Bundle shouldn't be re-defining reg mid-range.");
+      assert((BR[LI->reg].Use == 0 || BR[LI->reg].Use == LR) &&
+             "Bundle shouldn't have different use range for same reg.");
+      LR->end = LastUse.getRegSlot();
+      BR[LI->reg].Use = LR;
+    } else {
+      // Becoming a new Dead-def.
+      assert(LR->start == NewIdx.getRegSlot(LR->start.isEarlyClobber()) &&
+             "Live range starting at unexpected slot.");
+      assert(BR[LI->reg].Def == LR && "Reg should have def range.");
+      assert(BR[LI->reg].Dead == 0 &&
+               "Can't have def and dead def of same reg in a bundle.");
+      LR->end = LastUse.getDeadSlot();
+      BR[LI->reg].Dead = BR[LI->reg].Def;
+      BR[LI->reg].Def = 0;
     }
-    Id = RestoreMBBs.find_next(Id);
   }
 
-  // Finalize intervals: add kills, finalize spill weights, and filter out
-  // dead intervals.
-  std::vector<LiveInterval*> RetNewLIs;
-  for (unsigned i = 0, e = NewLIs.size(); i != e; ++i) {
-    LiveInterval *LI = NewLIs[i];
-    if (!LI->empty()) {
-      if (!AddedKill.count(LI)) {
-        LiveRange *LR = &LI->ranges[LI->ranges.size()-1];
-        SlotIndex LastUseIdx = LR->end.getBaseIndex();
-        MachineInstr *LastUse = getInstructionFromIndex(LastUseIdx);
-        int UseIdx = LastUse->findRegisterUseOperandIdx(LI->reg, false);
-        assert(UseIdx != -1);
-        if (!LastUse->isRegTiedToDefOperand(UseIdx)) {
-          LastUse->getOperand(UseIdx).setIsKill();
-          vrm.addKillPoint(LI->reg, LastUseIdx);
-        }
-      }
-      RetNewLIs.push_back(LI);
+  void moveEnteringDownFromInto(SlotIndex OldIdx, IntRangePair& P,
+                                BundleRanges& BR) {
+    LiveInterval* LI = P.first;
+    LiveRange* LR = P.second;
+    if (NewIdx > LR->end) {
+      // Range extended to bundle. Add to bundle uses.
+      // Note: Currently adds kill flags to bundle start.
+      assert(BR[LI->reg].Use == 0 &&
+             "Bundle already has use range for reg.");
+      moveKillFlags(LI->reg, LR->end, NewIdx);
+      LR->end = NewIdx.getRegSlot();
+      BR[LI->reg].Use = LR;
+    } else {
+      assert(BR[LI->reg].Use != 0 &&
+             "Bundle should already have a use range for reg.");
     }
   }
 
-  handleSpilledImpDefs(li, vrm, rc, RetNewLIs);
-  normalizeSpillWeights(RetNewLIs);
-  return RetNewLIs;
-}
-
-/// hasAllocatableSuperReg - Return true if the specified physical register has
-/// any super register that's allocatable.
-bool LiveIntervals::hasAllocatableSuperReg(unsigned Reg) const {
-  for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS)
-    if (allocatableRegs_[*AS] && hasInterval(*AS))
-      return true;
-  return false;
-}
+  void moveAllEnteringFromInto(SlotIndex OldIdx, RangeSet& Entering,
+                               BundleRanges& BR) {
+    bool GoingUp = NewIdx < OldIdx;
 
-/// getRepresentativeReg - Find the largest super register of the specified
-/// physical register.
-unsigned LiveIntervals::getRepresentativeReg(unsigned Reg) const {
-  // Find the largest super-register that is allocatable.
-  unsigned BestReg = Reg;
-  for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS) {
-    unsigned SuperReg = *AS;
-    if (!hasAllocatableSuperReg(SuperReg) && hasInterval(SuperReg)) {
-      BestReg = SuperReg;
-      break;
+    if (GoingUp) {
+      for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
+           EI != EE; ++EI)
+        moveEnteringUpFromInto(OldIdx, *EI, BR);
+    } else {
+      for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
+           EI != EE; ++EI)
+        moveEnteringDownFromInto(OldIdx, *EI, BR);
     }
   }
-  return BestReg;
-}
 
-/// getNumConflictsWithPhysReg - Return the number of uses and defs of the
-/// specified interval that conflicts with the specified physical register.
-unsigned LiveIntervals::getNumConflictsWithPhysReg(const LiveInterval &li,
-                                                   unsigned PhysReg) const {
-  unsigned NumConflicts = 0;
-  const LiveInterval &pli = getInterval(getRepresentativeReg(PhysReg));
-  for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg),
-         E = mri_->reg_end(); I != E; ++I) {
-    MachineOperand &O = I.getOperand();
-    MachineInstr *MI = O.getParent();
-    if (MI->isDebugValue())
-      continue;
-    SlotIndex Index = getInstructionIndex(MI);
-    if (pli.liveAt(Index))
-      ++NumConflicts;
+  void moveInternalFromInto(SlotIndex OldIdx, IntRangePair& P,
+                            BundleRanges& BR) {
+    // TODO: Sane rules for moving ranges into bundles.
   }
-  return NumConflicts;
-}
 
-/// spillPhysRegAroundRegDefsUses - Spill the specified physical register
-/// around all defs and uses of the specified interval. Return true if it
-/// was able to cut its interval.
-bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li,
-                                            unsigned PhysReg, VirtRegMap &vrm) {
-  unsigned SpillReg = getRepresentativeReg(PhysReg);
-
-  DEBUG(dbgs() << "spillPhysRegAroundRegDefsUses " << tri_->getName(PhysReg)
-               << " represented by " << tri_->getName(SpillReg) << '\n');
-
-  for (const unsigned *AS = tri_->getAliasSet(PhysReg); *AS; ++AS)
-    // If there are registers which alias PhysReg, but which are not a
-    // sub-register of the chosen representative super register. Assert
-    // since we can't handle it yet.
-    assert(*AS == SpillReg || !allocatableRegs_[*AS] || !hasInterval(*AS) ||
-           tri_->isSuperRegister(*AS, SpillReg));
-
-  bool Cut = false;
-  SmallVector<unsigned, 4> PRegs;
-  if (hasInterval(SpillReg))
-    PRegs.push_back(SpillReg);
-  for (const unsigned *SR = tri_->getSubRegisters(SpillReg); *SR; ++SR)
-    if (hasInterval(*SR))
-      PRegs.push_back(*SR);
-
-  DEBUG({
-    dbgs() << "Trying to spill:";
-    for (unsigned i = 0, e = PRegs.size(); i != e; ++i)
-      dbgs() << ' ' << tri_->getName(PRegs[i]);
-    dbgs() << '\n';
-  });
-
-  SmallPtrSet<MachineInstr*, 8> SeenMIs;
-  for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg),
-         E = mri_->reg_end(); I != E; ++I) {
-    MachineOperand &O = I.getOperand();
-    MachineInstr *MI = O.getParent();
-    if (MI->isDebugValue() || SeenMIs.count(MI))
-      continue;
-    SeenMIs.insert(MI);
-    SlotIndex Index = getInstructionIndex(MI);
-    bool LiveReg = false;
-    for (unsigned i = 0, e = PRegs.size(); i != e; ++i) {
-      unsigned PReg = PRegs[i];
-      LiveInterval &pli = getInterval(PReg);
-      if (!pli.liveAt(Index))
-        continue;
-      LiveReg = true;
-      SlotIndex StartIdx = Index.getLoadIndex();
-      SlotIndex EndIdx = Index.getNextIndex().getBaseIndex();
-      if (!pli.isInOneLiveRange(StartIdx, EndIdx)) {
-        std::string msg;
-        raw_string_ostream Msg(msg);
-        Msg << "Ran out of registers during register allocation!";
-        if (MI->isInlineAsm()) {
-          Msg << "\nPlease check your inline asm statement for invalid "
-              << "constraints:\n";
-          MI->print(Msg, tm_);
-        }
-        report_fatal_error(Msg.str());
+  void moveAllInternalFromInto(SlotIndex OldIdx, RangeSet& Internal,
+                               BundleRanges& BR) {
+    for (RangeSet::iterator II = Internal.begin(), IE = Internal.end();
+         II != IE; ++II)
+      moveInternalFromInto(OldIdx, *II, BR);
+  }
+
+  void moveExitingFromInto(SlotIndex OldIdx, IntRangePair& P,
+                           BundleRanges& BR) {
+    LiveInterval* LI = P.first;
+    LiveRange* LR = P.second;
+
+    assert(LR->start.isRegister() &&
+           "Don't know how to merge exiting ECs into bundles yet.");
+
+    if (LR->end > NewIdx.getDeadSlot()) {
+      // This range is becoming an exiting range on the bundle.
+      // If there was an old dead-def of this reg, delete it.
+      if (BR[LI->reg].Dead != 0) {
+        LI->removeRange(*BR[LI->reg].Dead);
+        BR[LI->reg].Dead = 0;
+      }
+      assert(BR[LI->reg].Def == 0 &&
+             "Can't have two defs for the same variable exiting a bundle.");
+      LR->start = NewIdx.getRegSlot();
+      LR->valno->def = LR->start;
+      BR[LI->reg].Def = LR;
+    } else {
+      // This range is becoming internal to the bundle.
+      assert(LR->end == NewIdx.getRegSlot() &&
+             "Can't bundle def whose kill is before the bundle");
+      if (BR[LI->reg].Dead || BR[LI->reg].Def) {
+        // Already have a def for this. Just delete range.
+        LI->removeRange(*LR);
+      } else {
+        // Make range dead, record.
+        LR->end = NewIdx.getDeadSlot();
+        BR[LI->reg].Dead = LR;
+        assert(BR[LI->reg].Use == LR &&
+               "Range becoming dead should currently be use.");
       }
-      pli.removeRange(StartIdx, EndIdx);
-      LiveReg = true;
+      // In both cases the range is no longer a use on the bundle.
+      BR[LI->reg].Use = 0;
     }
-    if (!LiveReg)
-      continue;
-    DEBUG(dbgs() << "Emergency spill around " << Index << '\t' << *MI);
-    vrm.addEmergencySpill(SpillReg, MI);
-    Cut = true;
   }
-  return Cut;
-}
 
-LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
-                                                  MachineInstr* startInst) {
-  LiveInterval& Interval = getOrCreateInterval(reg);
-  VNInfo* VN = Interval.getNextValue(
-    SlotIndex(getInstructionIndex(startInst).getDefIndex()),
-    startInst, getVNInfoAllocator());
-  VN->setHasPHIKill(true);
-  LiveRange LR(
-     SlotIndex(getInstructionIndex(startInst).getDefIndex()),
-     getMBBEndIdx(startInst->getParent()), VN);
-  Interval.addRange(LR);
+  void moveAllExitingFromInto(SlotIndex OldIdx, RangeSet& Exiting,
+                              BundleRanges& BR) {
+    for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end();
+         EI != EE; ++EI)
+      moveExitingFromInto(OldIdx, *EI, BR);
+  }
 
-  return LR;
+};
+
+void LiveIntervals::handleMove(MachineInstr* MI) {
+  SlotIndex OldIndex = indexes_->getInstructionIndex(MI);
+  indexes_->removeMachineInstrFromMaps(MI);
+  SlotIndex NewIndex = MI->isInsideBundle() ?
+                        indexes_->getInstructionIndex(MI) :
+                        indexes_->insertMachineInstrInMaps(MI);
+  assert(getMBBStartIdx(MI->getParent()) <= OldIndex &&
+         OldIndex < getMBBEndIdx(MI->getParent()) &&
+         "Cannot handle moves across basic block boundaries.");
+  assert(!MI->isBundled() && "Can't handle bundled instructions yet.");
+
+  HMEditor HME(*this, *mri_, *tri_, NewIndex);
+  HME.moveAllRangesFrom(MI, OldIndex);
 }
 
+void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI, MachineInstr* BundleStart) {
+  SlotIndex NewIndex = indexes_->getInstructionIndex(BundleStart);
+  HMEditor HME(*this, *mri_, *tri_, NewIndex);
+  HME.moveAllRangesInto(MI, BundleStart);
+}
diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp
index 110fe1e62024..60a68806c55e 100644
--- a/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/lib/CodeGen/LiveIntervalUnion.cpp
@@ -21,6 +21,8 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 
+#include <algorithm>
+
 using namespace llvm;
 
 
diff --git a/lib/CodeGen/LiveIntervalUnion.h b/lib/CodeGen/LiveIntervalUnion.h
index 5d64d285f39a..dbf5ac122d5d 100644
--- a/lib/CodeGen/LiveIntervalUnion.h
+++ b/lib/CodeGen/LiveIntervalUnion.h
@@ -20,8 +20,6 @@
 #include "llvm/ADT/IntervalMap.h"
 #include "llvm/CodeGen/LiveInterval.h"
 
-#include <algorithm>
-
 namespace llvm {
 
 class MachineLoopRange;
diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp
index a7d5af5198e5..d8ab7918ae25 100644
--- a/lib/CodeGen/LiveRangeCalc.cpp
+++ b/lib/CodeGen/LiveRangeCalc.cpp
@@ -65,7 +65,7 @@ void LiveRangeCalc::extend(LiveInterval *LI,
   assert(DomTree && "Missing dominator tree");
 
   MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill.getPrevSlot());
-  assert(Kill && "No MBB at Kill");
+  assert(KillMBB && "No MBB at Kill");
 
   // Is there a def in the same MBB we can extend?
   if (LI->extendInBlock(Indexes->getMBBStartIdx(KillMBB), Kill))
@@ -237,7 +237,7 @@ void LiveRangeCalc::updateSSA(SlotIndexes *Indexes,
         assert(Alloc && "Need VNInfo allocator to create PHI-defs");
         SlotIndex Start, End;
         tie(Start, End) = Indexes->getMBBRange(MBB);
-        VNInfo *VNI = I->LI->getNextValue(Start, 0, *Alloc);
+        VNInfo *VNI = I->LI->getNextValue(Start, *Alloc);
         VNI->setIsPHIDef(true);
         I->Value = VNI;
         // This block is done, we know the final value.
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index b23f85165360..695f53631e1b 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -1,4 +1,4 @@
-//===--- LiveRangeEdit.cpp - Basic tools for editing a register live range --===//
+//===-- LiveRangeEdit.cpp - Basic tools for editing a register live range -===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,12 +12,12 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "regalloc"
-#include "LiveRangeEdit.h"
 #include "VirtRegMap.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Support/Debug.h"
@@ -29,13 +29,14 @@ STATISTIC(NumDCEDeleted,     "Number of instructions deleted by DCE");
 STATISTIC(NumDCEFoldedLoads, "Number of single use loads folded after DCE");
 STATISTIC(NumFracRanges,     "Number of live ranges fractured by DCE");
 
-LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg,
-                                        LiveIntervals &LIS,
-                                        VirtRegMap &VRM) {
-  MachineRegisterInfo &MRI = VRM.getRegInfo();
+void LiveRangeEdit::Delegate::anchor() { }
+
+LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg) {
   unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
-  VRM.grow();
-  VRM.setIsSplitFromReg(VReg, VRM.getOriginal(OldReg));
+  if (VRM) {
+    VRM->grow();
+    VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg));
+  }
   LiveInterval &LI = LIS.getOrCreateInterval(VReg);
   newRegs_.push_back(&LI);
   return LI;
@@ -43,37 +44,32 @@ LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg,
 
 bool LiveRangeEdit::checkRematerializable(VNInfo *VNI,
                                           const MachineInstr *DefMI,
-                                          const TargetInstrInfo &tii,
                                           AliasAnalysis *aa) {
   assert(DefMI && "Missing instruction");
   scannedRemattable_ = true;
-  if (!tii.isTriviallyReMaterializable(DefMI, aa))
+  if (!TII.isTriviallyReMaterializable(DefMI, aa))
     return false;
   remattable_.insert(VNI);
   return true;
 }
 
-void LiveRangeEdit::scanRemattable(LiveIntervals &lis,
-                                   const TargetInstrInfo &tii,
-                                   AliasAnalysis *aa) {
+void LiveRangeEdit::scanRemattable(AliasAnalysis *aa) {
   for (LiveInterval::vni_iterator I = parent_.vni_begin(),
        E = parent_.vni_end(); I != E; ++I) {
     VNInfo *VNI = *I;
     if (VNI->isUnused())
       continue;
-    MachineInstr *DefMI = lis.getInstructionFromIndex(VNI->def);
+    MachineInstr *DefMI = LIS.getInstructionFromIndex(VNI->def);
     if (!DefMI)
       continue;
-    checkRematerializable(VNI, DefMI, tii, aa);
+    checkRematerializable(VNI, DefMI, aa);
   }
   scannedRemattable_ = true;
 }
 
-bool LiveRangeEdit::anyRematerializable(LiveIntervals &lis,
-                                        const TargetInstrInfo &tii,
-                                        AliasAnalysis *aa) {
+bool LiveRangeEdit::anyRematerializable(AliasAnalysis *aa) {
   if (!scannedRemattable_)
-    scanRemattable(lis, tii, aa);
+    scanRemattable(aa);
   return !remattable_.empty();
 }
 
@@ -81,24 +77,18 @@ bool LiveRangeEdit::anyRematerializable(LiveIntervals &lis,
 /// OrigIdx are also available with the same value at UseIdx.
 bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
                                        SlotIndex OrigIdx,
-                                       SlotIndex UseIdx,
-                                       LiveIntervals &lis) {
-  OrigIdx = OrigIdx.getUseIndex();
-  UseIdx = UseIdx.getUseIndex();
+                                       SlotIndex UseIdx) {
+  OrigIdx = OrigIdx.getRegSlot(true);
+  UseIdx = UseIdx.getRegSlot(true);
   for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = OrigMI->getOperand(i);
     if (!MO.isReg() || !MO.getReg() || MO.isDef())
       continue;
     // Reserved registers are OK.
-    if (MO.isUndef() || !lis.hasInterval(MO.getReg()))
+    if (MO.isUndef() || !LIS.hasInterval(MO.getReg()))
       continue;
-    // We cannot depend on virtual registers in uselessRegs_.
-    if (uselessRegs_)
-      for (unsigned ui = 0, ue = uselessRegs_->size(); ui != ue; ++ui)
-        if ((*uselessRegs_)[ui]->reg == MO.getReg())
-          return false;
 
-    LiveInterval &li = lis.getInterval(MO.getReg());
+    LiveInterval &li = LIS.getInterval(MO.getReg());
     const VNInfo *OVNI = li.getVNInfoAt(OrigIdx);
     if (!OVNI)
       continue;
@@ -110,8 +100,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
 
 bool LiveRangeEdit::canRematerializeAt(Remat &RM,
                                        SlotIndex UseIdx,
-                                       bool cheapAsAMove,
-                                       LiveIntervals &lis) {
+                                       bool cheapAsAMove) {
   assert(scannedRemattable_ && "Call anyRematerializable first");
 
   // Use scanRemattable info.
@@ -121,19 +110,19 @@ bool LiveRangeEdit::canRematerializeAt(Remat &RM,
   // No defining instruction provided.
   SlotIndex DefIdx;
   if (RM.OrigMI)
-    DefIdx = lis.getInstructionIndex(RM.OrigMI);
+    DefIdx = LIS.getInstructionIndex(RM.OrigMI);
   else {
     DefIdx = RM.ParentVNI->def;
-    RM.OrigMI = lis.getInstructionFromIndex(DefIdx);
+    RM.OrigMI = LIS.getInstructionFromIndex(DefIdx);
     assert(RM.OrigMI && "No defining instruction for remattable value");
   }
 
   // If only cheap remats were requested, bail out early.
-  if (cheapAsAMove && !RM.OrigMI->getDesc().isAsCheapAsAMove())
+  if (cheapAsAMove && !RM.OrigMI->isAsCheapAsAMove())
     return false;
 
   // Verify that all used registers are available with the same values.
-  if (!allUsesAvailableAt(RM.OrigMI, DefIdx, UseIdx, lis))
+  if (!allUsesAvailableAt(RM.OrigMI, DefIdx, UseIdx))
     return false;
 
   return true;
@@ -143,27 +132,22 @@ SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB,
                                          MachineBasicBlock::iterator MI,
                                          unsigned DestReg,
                                          const Remat &RM,
-                                         LiveIntervals &lis,
-                                         const TargetInstrInfo &tii,
                                          const TargetRegisterInfo &tri,
                                          bool Late) {
   assert(RM.OrigMI && "Invalid remat");
-  tii.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri);
+  TII.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri);
   rematted_.insert(RM.ParentVNI);
-  return lis.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late)
-           .getDefIndex();
+  return LIS.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late)
+           .getRegSlot();
 }
 
-void LiveRangeEdit::eraseVirtReg(unsigned Reg, LiveIntervals &LIS) {
+void LiveRangeEdit::eraseVirtReg(unsigned Reg) {
   if (delegate_ && delegate_->LRE_CanEraseVirtReg(Reg))
     LIS.removeInterval(Reg);
 }
 
 bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
-                               SmallVectorImpl<MachineInstr*> &Dead,
-                               MachineRegisterInfo &MRI,
-                               LiveIntervals &LIS,
-                               const TargetInstrInfo &TII) {
+                               SmallVectorImpl<MachineInstr*> &Dead) {
   MachineInstr *DefMI = 0, *UseMI = 0;
 
   // Check that there is a single def and a single use.
@@ -174,7 +158,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
     if (MO.isDef()) {
       if (DefMI && DefMI != MI)
         return false;
-      if (!MI->getDesc().canFoldAsLoad())
+      if (!MI->canFoldAsLoad())
         return false;
       DefMI = MI;
     } else if (!MO.isUndef()) {
@@ -209,19 +193,17 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
 }
 
 void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
-                                      LiveIntervals &LIS, VirtRegMap &VRM,
-                                      const TargetInstrInfo &TII) {
+                                      ArrayRef<unsigned> RegsBeingSpilled) {
   SetVector<LiveInterval*,
             SmallVector<LiveInterval*, 8>,
             SmallPtrSet<LiveInterval*, 8> > ToShrink;
-  MachineRegisterInfo &MRI = VRM.getRegInfo();
 
   for (;;) {
     // Erase all dead defs.
     while (!Dead.empty()) {
       MachineInstr *MI = Dead.pop_back_val();
       assert(MI->allDefsAreDead() && "Def isn't really dead");
-      SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex();
+      SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
 
       // Never delete inline asm.
       if (MI->isInlineAsm()) {
@@ -265,7 +247,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
             LI.removeValNo(VNI);
             if (LI.empty()) {
               ToShrink.remove(&LI);
-              eraseVirtReg(Reg, LIS);
+              eraseVirtReg(Reg);
             }
           }
         }
@@ -284,12 +266,26 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
     // Shrink just one live interval. Then delete new dead defs.
     LiveInterval *LI = ToShrink.back();
     ToShrink.pop_back();
-    if (foldAsLoad(LI, Dead, MRI, LIS, TII))
+    if (foldAsLoad(LI, Dead))
       continue;
     if (delegate_)
       delegate_->LRE_WillShrinkVirtReg(LI->reg);
     if (!LIS.shrinkToUses(LI, &Dead))
       continue;
+    
+    // Don't create new intervals for a register being spilled.
+    // The new intervals would have to be spilled anyway so its not worth it.
+    // Also they currently aren't spilled so creating them and not spilling
+    // them results in incorrect code.
+    bool BeingSpilled = false;
+    for (unsigned i = 0, e = RegsBeingSpilled.size(); i != e; ++i) {
+      if (LI->reg == RegsBeingSpilled[i]) {
+        BeingSpilled = true;
+        break;
+      }
+    }
+    
+    if (BeingSpilled) continue;
 
     // LI may have been separated, create new intervals.
     LI->RenumberValues(LIS);
@@ -298,16 +294,16 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
     if (NumComp <= 1)
       continue;
     ++NumFracRanges;
-    bool IsOriginal = VRM.getOriginal(LI->reg) == LI->reg;
+    bool IsOriginal = VRM && VRM->getOriginal(LI->reg) == LI->reg;
     DEBUG(dbgs() << NumComp << " components: " << *LI << '\n');
     SmallVector<LiveInterval*, 8> Dups(1, LI);
     for (unsigned i = 1; i != NumComp; ++i) {
-      Dups.push_back(&createFrom(LI->reg, LIS, VRM));
+      Dups.push_back(&createFrom(LI->reg));
       // If LI is an original interval that hasn't been split yet, make the new
       // intervals their own originals instead of referring to LI. The original
       // interval must contain all the split products, and LI doesn't.
       if (IsOriginal)
-        VRM.setIsSplitFromReg(Dups.back()->reg, 0);
+        VRM->setIsSplitFromReg(Dups.back()->reg, 0);
       if (delegate_)
         delegate_->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg);
     }
@@ -316,10 +312,8 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
 }
 
 void LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF,
-                                             LiveIntervals &LIS,
                                              const MachineLoopInfo &Loops) {
   VirtRegAuxInfo VRAI(MF, LIS, Loops);
-  MachineRegisterInfo &MRI = MF.getRegInfo();
   for (iterator I = begin(), E = end(); I != E; ++I) {
     LiveInterval &LI = **I;
     if (MRI.recomputeRegClass(LI.reg, MF.getTarget()))
diff --git a/lib/CodeGen/LiveRangeEdit.h b/lib/CodeGen/LiveRangeEdit.h
deleted file mode 100644
index 9b0a671ea9e5..000000000000
--- a/lib/CodeGen/LiveRangeEdit.h
+++ /dev/null
@@ -1,206 +0,0 @@
-//===---- LiveRangeEdit.h - Basic tools for split and spill -----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// The LiveRangeEdit class represents changes done to a virtual register when it
-// is spilled or split.
-//
-// The parent register is never changed. Instead, a number of new virtual
-// registers are created and added to the newRegs vector.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_LIVERANGEEDIT_H
-#define LLVM_CODEGEN_LIVERANGEEDIT_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/CodeGen/LiveInterval.h"
-
-namespace llvm {
-
-class AliasAnalysis;
-class LiveIntervals;
-class MachineLoopInfo;
-class MachineRegisterInfo;
-class VirtRegMap;
-
-class LiveRangeEdit {
-public:
-  /// Callback methods for LiveRangeEdit owners.
-  struct Delegate {
-    /// Called immediately before erasing a dead machine instruction.
-    virtual void LRE_WillEraseInstruction(MachineInstr *MI) {}
-
-    /// Called when a virtual register is no longer used. Return false to defer
-    /// its deletion from LiveIntervals.
-    virtual bool LRE_CanEraseVirtReg(unsigned) { return true; }
-
-    /// Called before shrinking the live range of a virtual register.
-    virtual void LRE_WillShrinkVirtReg(unsigned) {}
-
-    /// Called after cloning a virtual register.
-    /// This is used for new registers representing connected components of Old.
-    virtual void LRE_DidCloneVirtReg(unsigned New, unsigned Old) {}
-
-    virtual ~Delegate() {}
-  };
-
-private:
-  LiveInterval &parent_;
-  SmallVectorImpl<LiveInterval*> &newRegs_;
-  Delegate *const delegate_;
-  const SmallVectorImpl<LiveInterval*> *uselessRegs_;
-
-  /// firstNew_ - Index of the first register added to newRegs_.
-  const unsigned firstNew_;
-
-  /// scannedRemattable_ - true when remattable values have been identified.
-  bool scannedRemattable_;
-
-  /// remattable_ - Values defined by remattable instructions as identified by
-  /// tii.isTriviallyReMaterializable().
-  SmallPtrSet<const VNInfo*,4> remattable_;
-
-  /// rematted_ - Values that were actually rematted, and so need to have their
-  /// live range trimmed or entirely removed.
-  SmallPtrSet<const VNInfo*,4> rematted_;
-
-  /// scanRemattable - Identify the parent_ values that may rematerialize.
-  void scanRemattable(LiveIntervals &lis,
-                      const TargetInstrInfo &tii,
-                      AliasAnalysis *aa);
-
-  /// allUsesAvailableAt - Return true if all registers used by OrigMI at
-  /// OrigIdx are also available with the same value at UseIdx.
-  bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx,
-                          SlotIndex UseIdx, LiveIntervals &lis);
-
-  /// foldAsLoad - If LI has a single use and a single def that can be folded as
-  /// a load, eliminate the register by folding the def into the use.
-  bool foldAsLoad(LiveInterval *LI, SmallVectorImpl<MachineInstr*> &Dead,
-                  MachineRegisterInfo&, LiveIntervals&, const TargetInstrInfo&);
-
-public:
-  /// Create a LiveRangeEdit for breaking down parent into smaller pieces.
-  /// @param parent The register being spilled or split.
-  /// @param newRegs List to receive any new registers created. This needn't be
-  ///                empty initially, any existing registers are ignored.
-  /// @param uselessRegs List of registers that can't be used when
-  ///        rematerializing values because they are about to be removed.
-  LiveRangeEdit(LiveInterval &parent,
-                SmallVectorImpl<LiveInterval*> &newRegs,
-                Delegate *delegate = 0,
-                const SmallVectorImpl<LiveInterval*> *uselessRegs = 0)
-    : parent_(parent), newRegs_(newRegs),
-      delegate_(delegate),
-      uselessRegs_(uselessRegs),
-      firstNew_(newRegs.size()),
-      scannedRemattable_(false) {}
-
-  LiveInterval &getParent() const { return parent_; }
-  unsigned getReg() const { return parent_.reg; }
-
-  /// Iterator for accessing the new registers added by this edit.
-  typedef SmallVectorImpl<LiveInterval*>::const_iterator iterator;
-  iterator begin() const { return newRegs_.begin()+firstNew_; }
-  iterator end() const { return newRegs_.end(); }
-  unsigned size() const { return newRegs_.size()-firstNew_; }
-  bool empty() const { return size() == 0; }
-  LiveInterval *get(unsigned idx) const { return newRegs_[idx+firstNew_]; }
-
-  ArrayRef<LiveInterval*> regs() const {
-    return makeArrayRef(newRegs_).slice(firstNew_);
-  }
-
-  /// FIXME: Temporary accessors until we can get rid of
-  /// LiveIntervals::AddIntervalsForSpills
-  SmallVectorImpl<LiveInterval*> *getNewVRegs() { return &newRegs_; }
-  const SmallVectorImpl<LiveInterval*> *getUselessVRegs() {
-    return uselessRegs_;
-  }
-
-  /// createFrom - Create a new virtual register based on OldReg.
-  LiveInterval &createFrom(unsigned OldReg, LiveIntervals&, VirtRegMap&);
-
-  /// create - Create a new register with the same class and original slot as
-  /// parent.
-  LiveInterval &create(LiveIntervals &LIS, VirtRegMap &VRM) {
-    return createFrom(getReg(), LIS, VRM);
-  }
-
-  /// anyRematerializable - Return true if any parent values may be
-  /// rematerializable.
-  /// This function must be called before any rematerialization is attempted.
-  bool anyRematerializable(LiveIntervals&, const TargetInstrInfo&,
-                           AliasAnalysis*);
-
-  /// checkRematerializable - Manually add VNI to the list of rematerializable
-  /// values if DefMI may be rematerializable.
-  bool checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI,
-                             const TargetInstrInfo&, AliasAnalysis*);
-
-  /// Remat - Information needed to rematerialize at a specific location.
-  struct Remat {
-    VNInfo *ParentVNI;      // parent_'s value at the remat location.
-    MachineInstr *OrigMI;   // Instruction defining ParentVNI.
-    explicit Remat(VNInfo *ParentVNI) : ParentVNI(ParentVNI), OrigMI(0) {}
-  };
-
-  /// canRematerializeAt - Determine if ParentVNI can be rematerialized at
-  /// UseIdx. It is assumed that parent_.getVNINfoAt(UseIdx) == ParentVNI.
-  /// When cheapAsAMove is set, only cheap remats are allowed.
-  bool canRematerializeAt(Remat &RM,
-                          SlotIndex UseIdx,
-                          bool cheapAsAMove,
-                          LiveIntervals &lis);
-
-  /// rematerializeAt - Rematerialize RM.ParentVNI into DestReg by inserting an
-  /// instruction into MBB before MI. The new instruction is mapped, but
-  /// liveness is not updated.
-  /// Return the SlotIndex of the new instruction.
-  SlotIndex rematerializeAt(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator MI,
-                            unsigned DestReg,
-                            const Remat &RM,
-                            LiveIntervals&,
-                            const TargetInstrInfo&,
-                            const TargetRegisterInfo&,
-                            bool Late = false);
-
-  /// markRematerialized - explicitly mark a value as rematerialized after doing
-  /// it manually.
-  void markRematerialized(const VNInfo *ParentVNI) {
-    rematted_.insert(ParentVNI);
-  }
-
-  /// didRematerialize - Return true if ParentVNI was rematerialized anywhere.
-  bool didRematerialize(const VNInfo *ParentVNI) const {
-    return rematted_.count(ParentVNI);
-  }
-
-  /// eraseVirtReg - Notify the delegate that Reg is no longer in use, and try
-  /// to erase it from LIS.
-  void eraseVirtReg(unsigned Reg, LiveIntervals &LIS);
-
-  /// eliminateDeadDefs - Try to delete machine instructions that are now dead
-  /// (allDefsAreDead returns true). This may cause live intervals to be trimmed
-  /// and further dead efs to be eliminated.
-  void eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
-                         LiveIntervals&, VirtRegMap&,
-                         const TargetInstrInfo&);
-
-  /// calculateRegClassAndHint - Recompute register class and hint for each new
-  /// register.
-  void calculateRegClassAndHint(MachineFunction&, LiveIntervals&,
-                                const MachineLoopInfo&);
-};
-
-}
-
-#endif
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 2ca90f9f05c0..5a0d97d132dd 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -14,7 +14,7 @@
 // the instruction, but are never used after the instruction (i.e., they are
 // killed).
 //
-// This class computes live variables using are sparse implementation based on
+// This class computes live variables using a sparse implementation based on
 // the machine code SSA form.  This class computes live variable information for
 // each virtual and _register allocatable_ physical register in a function.  It
 // uses the dominance properties of SSA form to efficiently compute live
@@ -33,6 +33,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
@@ -41,6 +42,7 @@
 using namespace llvm;
 
 char LiveVariables::ID = 0;
+char &llvm::LiveVariablesID = LiveVariables::ID;
 INITIALIZE_PASS_BEGIN(LiveVariables, "livevars",
                 "Live Variable Analysis", false, false)
 INITIALIZE_PASS_DEPENDENCY(UnreachableMachineBlockElim)
@@ -90,7 +92,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo,
                                             MachineBasicBlock *MBB,
                                     std::vector<MachineBasicBlock*> &WorkList) {
   unsigned BBNum = MBB->getNumber();
-  
+
   // Check to see if this basic block is one of the killing blocks.  If so,
   // remove it.
   for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i)
@@ -98,7 +100,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo,
       VRInfo.Kills.erase(VRInfo.Kills.begin()+i);  // Erase entry
       break;
     }
-  
+
   if (MBB == DefBlock) return;  // Terminate recursion
 
   if (VRInfo.AliveBlocks.test(BBNum))
@@ -107,6 +109,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo,
   // Mark the variable known alive in this bb
   VRInfo.AliveBlocks.set(BBNum);
 
+  assert(MBB != &MF->front() && "Can't find reaching def for virtreg");
   WorkList.insert(WorkList.end(), MBB->pred_rbegin(), MBB->pred_rend());
 }
 
@@ -130,7 +133,6 @@ void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB,
   unsigned BBNum = MBB->getNumber();
 
   VarInfo& VRInfo = getVarInfo(reg);
-  VRInfo.NumUses++;
 
   // Check to see if this basic block is already a kill block.
   if (!VRInfo.Kills.empty() && VRInfo.Kills.back()->getParent() == MBB) {
@@ -190,7 +192,7 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
   unsigned LastDefReg = 0;
   unsigned LastDefDist = 0;
   MachineInstr *LastDef = NULL;
-  for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+  for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
        unsigned SubReg = *SubRegs; ++SubRegs) {
     MachineInstr *Def = PhysRegDef[SubReg];
     if (!Def)
@@ -214,7 +216,7 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
     unsigned DefReg = MO.getReg();
     if (TRI->isSubRegister(Reg, DefReg)) {
       PartDefRegs.insert(DefReg);
-      for (const unsigned *SubRegs = TRI->getSubRegisters(DefReg);
+      for (const uint16_t *SubRegs = TRI->getSubRegisters(DefReg);
            unsigned SubReg = *SubRegs; ++SubRegs)
         PartDefRegs.insert(SubReg);
     }
@@ -245,7 +247,7 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
                                                            true/*IsImp*/));
       PhysRegDef[Reg] = LastPartialDef;
       SmallSet<unsigned, 8> Processed;
-      for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+      for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
            unsigned SubReg = *SubRegs; ++SubRegs) {
         if (Processed.count(SubReg))
           continue;
@@ -257,20 +259,19 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
                                                              false/*IsDef*/,
                                                              true/*IsImp*/));
         PhysRegDef[SubReg] = LastPartialDef;
-        for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+        for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
           Processed.insert(*SS);
       }
     }
-  }
-  else if (LastDef && !PhysRegUse[Reg] &&
-           !LastDef->findRegisterDefOperand(Reg))
+  } else if (LastDef && !PhysRegUse[Reg] &&
+             !LastDef->findRegisterDefOperand(Reg))
     // Last def defines the super register, add an implicit def of reg.
-    LastDef->addOperand(MachineOperand::CreateReg(Reg,
-                                                 true/*IsDef*/, true/*IsImp*/));
+    LastDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/,
+                                                  true/*IsImp*/));
 
   // Remember this use.
   PhysRegUse[Reg]  = MI;
-  for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+  for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
        unsigned SubReg = *SubRegs; ++SubRegs)
     PhysRegUse[SubReg] =  MI;
 }
@@ -286,7 +287,7 @@ MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) {
   MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef;
   unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
   unsigned LastPartDefDist = 0;
-  for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+  for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
        unsigned SubReg = *SubRegs; ++SubRegs) {
     MachineInstr *Def = PhysRegDef[SubReg];
     if (Def && Def != LastDef) {
@@ -331,11 +332,11 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
   // Or whole register is defined, but only partly used.
   // AX<dead> = AL<imp-def>
   //    = AL<kill>
-  // AX = 
+  // AX =
   MachineInstr *LastPartDef = 0;
   unsigned LastPartDefDist = 0;
   SmallSet<unsigned, 8> PartUses;
-  for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+  for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
        unsigned SubReg = *SubRegs; ++SubRegs) {
     MachineInstr *Def = PhysRegDef[SubReg];
     if (Def && Def != LastDef) {
@@ -350,7 +351,7 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
     }
     if (MachineInstr *Use = PhysRegUse[SubReg]) {
       PartUses.insert(SubReg);
-      for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+      for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
         PartUses.insert(*SS);
       unsigned Dist = DistanceMap[Use];
       if (Dist > LastRefOrPartRefDist) {
@@ -366,7 +367,7 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
     // EAX<dead>  = op  AL<imp-def>
     // That is, EAX def is dead but AL def extends pass it.
     PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true);
-    for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+    for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
          unsigned SubReg = *SubRegs; ++SubRegs) {
       if (!PartUses.count(SubReg))
         continue;
@@ -387,11 +388,11 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
       else {
         LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true);
         PhysRegUse[SubReg] = LastRefOrPartRef;
-        for (const unsigned *SSRegs = TRI->getSubRegisters(SubReg);
+        for (const uint16_t *SSRegs = TRI->getSubRegisters(SubReg);
              unsigned SSReg = *SSRegs; ++SSRegs)
           PhysRegUse[SSReg] = LastRefOrPartRef;
       }
-      for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+      for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
         PartUses.erase(*SS);
     }
   } else if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) {
@@ -419,16 +420,37 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
   return true;
 }
 
+void LiveVariables::HandleRegMask(const MachineOperand &MO) {
+  // Call HandlePhysRegKill() for all live registers clobbered by Mask.
+  // Clobbered registers are always dead, sp there is no need to use
+  // HandlePhysRegDef().
+  for (unsigned Reg = 1, NumRegs = TRI->getNumRegs(); Reg != NumRegs; ++Reg) {
+    // Skip dead regs.
+    if (!PhysRegDef[Reg] && !PhysRegUse[Reg])
+      continue;
+    // Skip mask-preserved regs.
+    if (!MO.clobbersPhysReg(Reg))
+      continue;
+    // Kill the largest clobbered super-register.
+    // This avoids needless implicit operands.
+    unsigned Super = Reg;
+    for (const uint16_t *SR = TRI->getSuperRegisters(Reg); *SR; ++SR)
+      if ((PhysRegDef[*SR] || PhysRegUse[*SR]) && MO.clobbersPhysReg(*SR))
+        Super = *SR;
+    HandlePhysRegKill(Super, 0);
+  }
+}
+
 void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
                                      SmallVector<unsigned, 4> &Defs) {
   // What parts of the register are previously defined?
   SmallSet<unsigned, 32> Live;
   if (PhysRegDef[Reg] || PhysRegUse[Reg]) {
     Live.insert(Reg);
-    for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS)
+    for (const uint16_t *SS = TRI->getSubRegisters(Reg); *SS; ++SS)
       Live.insert(*SS);
   } else {
-    for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+    for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
          unsigned SubReg = *SubRegs; ++SubRegs) {
       // If a register isn't itself defined, but all parts that make up of it
       // are defined, then consider it also defined.
@@ -440,7 +462,7 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
         continue;
       if (PhysRegDef[SubReg] || PhysRegUse[SubReg]) {
         Live.insert(SubReg);
-        for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+        for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
           Live.insert(*SS);
       }
     }
@@ -450,7 +472,7 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
   // is referenced.
   HandlePhysRegKill(Reg, MI);
   // Only some of the sub-registers are used.
-  for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+  for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
        unsigned SubReg = *SubRegs; ++SubRegs) {
     if (!Live.count(SubReg))
       // Skip if this sub-register isn't defined.
@@ -469,7 +491,7 @@ void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI,
     Defs.pop_back();
     PhysRegDef[Reg]  = MI;
     PhysRegUse[Reg]  = NULL;
-    for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+    for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
          unsigned SubReg = *SubRegs; ++SubRegs) {
       PhysRegDef[SubReg]  = MI;
       PhysRegUse[SubReg]  = NULL;
@@ -492,6 +514,12 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
   std::fill(PhysRegUse,  PhysRegUse  + NumRegs, (MachineInstr*)0);
   PHIJoins.clear();
 
+  // FIXME: LiveIntervals will be updated to remove its dependence on
+  // LiveVariables to improve compilation time and eliminate bizarre pass
+  // dependencies. Until then, we can't change much in -O0.
+  if (!MRI->isSSA())
+    report_fatal_error("regalloc=... not currently supported with -O0");
+
   analyzePHINodes(mf);
 
   // Calculate live variable information in depth first order on the CFG of the
@@ -536,8 +564,13 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
       // Clear kill and dead markers. LV will recompute them.
       SmallVector<unsigned, 4> UseRegs;
       SmallVector<unsigned, 4> DefRegs;
+      SmallVector<unsigned, 1> RegMasks;
       for (unsigned i = 0; i != NumOperandsToProcess; ++i) {
         MachineOperand &MO = MI->getOperand(i);
+        if (MO.isRegMask()) {
+          RegMasks.push_back(i);
+          continue;
+        }
         if (!MO.isReg() || MO.getReg() == 0)
           continue;
         unsigned MOReg = MO.getReg();
@@ -559,6 +592,10 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
           HandlePhysRegUse(MOReg, MI);
       }
 
+      // Process all masked registers. (Call clobbers).
+      for (unsigned i = 0, e = RegMasks.size(); i != e; ++i)
+        HandleRegMask(MI->getOperand(RegMasks[i]));
+
       // Process all defs.
       for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) {
         unsigned MOReg = DefRegs[i];
@@ -590,8 +627,8 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
     // them.  The tail callee need not take the same registers as input
     // that it produces as output, and there are dependencies for its input
     // registers elsewhere.
-    if (!MBB->empty() && MBB->back().getDesc().isReturn()
-        && !MBB->back().getDesc().isCall()) {
+    if (!MBB->empty() && MBB->back().isReturn()
+        && !MBB->back().isCall()) {
       MachineInstr *Ret = &MBB->back();
 
       for (MachineRegisterInfo::liveout_iterator
@@ -607,10 +644,27 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
       }
     }
 
+    // MachineCSE may CSE instructions which write to non-allocatable physical
+    // registers across MBBs. Remember if any reserved register is liveout.
+    SmallSet<unsigned, 4> LiveOuts;
+    for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
+           SE = MBB->succ_end(); SI != SE; ++SI) {
+      MachineBasicBlock *SuccMBB = *SI;
+      if (SuccMBB->isLandingPad())
+        continue;
+      for (MachineBasicBlock::livein_iterator LI = SuccMBB->livein_begin(),
+             LE = SuccMBB->livein_end(); LI != LE; ++LI) {
+        unsigned LReg = *LI;
+        if (!TRI->isInAllocatableClass(LReg))
+          // Ignore other live-ins, e.g. those that are live into landing pads.
+          LiveOuts.insert(LReg);
+      }
+    }
+
     // Loop over PhysRegDef / PhysRegUse, killing any registers that are
     // available at the end of the basic block.
     for (unsigned i = 0; i != NumRegs; ++i)
-      if (PhysRegDef[i] || PhysRegUse[i])
+      if ((PhysRegDef[i] || PhysRegUse[i]) && !LiveOuts.count(i))
         HandlePhysRegDef(i, 0, Defs);
 
     std::fill(PhysRegDef,  PhysRegDef  + NumRegs, (MachineInstr*)0);
@@ -754,7 +808,7 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB,
   const unsigned NumNew = BB->getNumber();
 
   // All registers used by PHI nodes in SuccBB must be live through BB.
-  for (MachineBasicBlock::const_iterator BBI = SuccBB->begin(),
+  for (MachineBasicBlock::iterator BBI = SuccBB->begin(),
          BBE = SuccBB->end(); BBI != BBE && BBI->isPHI(); ++BBI)
     for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
       if (BBI->getOperand(i+1).getMBB() == BB)
diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp
index 1318d6212497..238bf52dfed7 100644
--- a/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -71,19 +71,15 @@ namespace {
       AU.setPreservesCFG();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
-    const char *getPassName() const {
-      return "Local Stack Slot Allocation";
-    }
 
   private:
   };
 } // end anonymous namespace
 
 char LocalStackSlotPass::ID = 0;
-
-FunctionPass *llvm::createLocalStackSlotAllocationPass() {
-  return new LocalStackSlotPass();
-}
+char &llvm::LocalStackSlotAllocationID = LocalStackSlotPass::ID;
+INITIALIZE_PASS(LocalStackSlotPass, "localstackalloc",
+                "Local Stack Slot Allocation", false, false)
 
 bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
   MachineFrameInfo *MFI = MF.getFrameInfo();
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 4c5fe4c480a6..6c8a1072697c 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -73,7 +73,8 @@ void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock *N) {
 
   // Make sure the instructions have their operands in the reginfo lists.
   MachineRegisterInfo &RegInfo = MF.getRegInfo();
-  for (MachineBasicBlock::iterator I = N->begin(), E = N->end(); I != E; ++I)
+  for (MachineBasicBlock::instr_iterator
+         I = N->instr_begin(), E = N->instr_end(); I != E; ++I)
     I->AddRegOperandsToUseLists(RegInfo);
 
   LeakDetector::removeGarbageObject(N);
@@ -120,8 +121,8 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
 /// lists.
 void ilist_traits<MachineInstr>::
 transferNodesFromList(ilist_traits<MachineInstr> &fromList,
-                      MachineBasicBlock::iterator first,
-                      MachineBasicBlock::iterator last) {
+                      ilist_iterator<MachineInstr> first,
+                      ilist_iterator<MachineInstr> last) {
   assert(Parent->getParent() == fromList.Parent->getParent() &&
         "MachineInstr parent mismatch!");
 
@@ -140,33 +141,75 @@ void ilist_traits<MachineInstr>::deleteNode(MachineInstr* MI) {
 }
 
 MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() {
-  iterator I = begin();
-  while (I != end() && I->isPHI())
+  instr_iterator I = instr_begin(), E = instr_end();
+  while (I != E && I->isPHI())
     ++I;
+  assert(!I->isInsideBundle() && "First non-phi MI cannot be inside a bundle!");
   return I;
 }
 
 MachineBasicBlock::iterator
 MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
-  while (I != end() && (I->isPHI() || I->isLabel() || I->isDebugValue()))
+  iterator E = end();
+  while (I != E && (I->isPHI() || I->isLabel() || I->isDebugValue()))
     ++I;
+  // FIXME: This needs to change if we wish to bundle labels / dbg_values
+  // inside the bundle.
+  assert(!I->isInsideBundle() &&
+         "First non-phi / non-label instruction is inside a bundle!");
   return I;
 }
 
 MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {
-  iterator I = end();
-  while (I != begin() && ((--I)->getDesc().isTerminator() || I->isDebugValue()))
+  iterator B = begin(), E = end(), I = E;
+  while (I != B && ((--I)->isTerminator() || I->isDebugValue()))
     ; /*noop */
-  while (I != end() && !I->getDesc().isTerminator())
+  while (I != E && !I->isTerminator())
+    ++I;
+  return I;
+}
+
+MachineBasicBlock::const_iterator
+MachineBasicBlock::getFirstTerminator() const {
+  const_iterator B = begin(), E = end(), I = E;
+  while (I != B && ((--I)->isTerminator() || I->isDebugValue()))
+    ; /*noop */
+  while (I != E && !I->isTerminator())
+    ++I;
+  return I;
+}
+
+MachineBasicBlock::instr_iterator MachineBasicBlock::getFirstInstrTerminator() {
+  instr_iterator B = instr_begin(), E = instr_end(), I = E;
+  while (I != B && ((--I)->isTerminator() || I->isDebugValue()))
+    ; /*noop */
+  while (I != E && !I->isTerminator())
     ++I;
   return I;
 }
 
 MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() {
-  iterator B = begin(), I = end();
+  // Skip over end-of-block dbg_value instructions.
+  instr_iterator B = instr_begin(), I = instr_end();
+  while (I != B) {
+    --I;
+    // Return instruction that starts a bundle.
+    if (I->isDebugValue() || I->isInsideBundle())
+      continue;
+    return I;
+  }
+  // The block is all debug values.
+  return end();
+}
+
+MachineBasicBlock::const_iterator
+MachineBasicBlock::getLastNonDebugInstr() const {
+  // Skip over end-of-block dbg_value instructions.
+  const_instr_iterator B = instr_begin(), I = instr_end();
   while (I != B) {
     --I;
-    if (I->isDebugValue())
+    // Return instruction that starts a bundle.
+    if (I->isDebugValue() || I->isInsideBundle())
       continue;
     return I;
   }
@@ -195,6 +238,18 @@ StringRef MachineBasicBlock::getName() const {
     return "(null)";
 }
 
+/// Return a hopefully unique identifier for this block.
+std::string MachineBasicBlock::getFullName() const {
+  std::string Name;
+  if (getParent())
+    Name = (getParent()->getFunction()->getName() + ":").str();
+  if (getBasicBlock())
+    Name += getBasicBlock()->getName();
+  else
+    Name += (Twine("BB") + Twine(getNumber())).str();
+  return Name;
+}
+
 void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
   const MachineFunction *MF = getParent();
   if (!MF) {
@@ -203,8 +258,6 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
     return;
   }
 
-  if (Alignment) { OS << "Alignment " << Alignment << "\n"; }
-
   if (Indexes)
     OS << Indexes->getMBBStartIdx(this) << '\t';
 
@@ -218,6 +271,12 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
   }
   if (isLandingPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; }
   if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; }
+  if (Alignment) {
+    OS << Comma << "Align " << Alignment << " (" << (1u << Alignment)
+       << " bytes)";
+    Comma = ", ";
+  }
+
   OS << '\n';
 
   const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
@@ -237,13 +296,15 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
     OS << '\n';
   }
 
-  for (const_iterator I = begin(); I != end(); ++I) {
+  for (const_instr_iterator I = instr_begin(); I != instr_end(); ++I) {
     if (Indexes) {
       if (Indexes->hasIndex(I))
         OS << Indexes->getInstructionIndex(I);
       OS << '\t';
     }
     OS << '\t';
+    if (I->isInsideBundle())
+      OS << "  * ";
     I->print(OS, &getParent()->getTarget());
   }
 
@@ -260,8 +321,8 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
 void MachineBasicBlock::removeLiveIn(unsigned Reg) {
   std::vector<unsigned>::iterator I =
     std::find(LiveIns.begin(), LiveIns.end(), Reg);
-  assert(I != LiveIns.end() && "Not a live in!");
-  LiveIns.erase(I);
+  if (I != LiveIns.end())
+    LiveIns.erase(I);
 }
 
 bool MachineBasicBlock::isLiveIn(unsigned Reg) const {
@@ -297,8 +358,22 @@ void MachineBasicBlock::updateTerminator() {
         TII->RemoveBranch(*this);
     } else {
       // The block has an unconditional fallthrough. If its successor is not
-      // its layout successor, insert a branch.
-      TBB = *succ_begin();
+      // its layout successor, insert a branch. First we have to locate the
+      // only non-landing-pad successor, as that is the fallthrough block.
+      for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) {
+        if ((*SI)->isLandingPad())
+          continue;
+        assert(!TBB && "Found more than one non-landing-pad successor!");
+        TBB = *SI;
+      }
+
+      // If there is no non-landing-pad successor, the block has no
+      // fall-through edges to be concerned with.
+      if (!TBB)
+        return;
+
+      // Finally update the unconditional successor to be reached via a branch
+      // if it would not be reached by fallthrough.
       if (!isLayoutSuccessor(TBB))
         TII->InsertBranch(*this, TBB, 0, Cond, dl);
     }
@@ -435,8 +510,8 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) {
     fromMBB->removeSuccessor(Succ);
 
     // Fix up any PHI nodes in the successor.
-    for (MachineBasicBlock::iterator MI = Succ->begin(), ME = Succ->end();
-         MI != ME && MI->isPHI(); ++MI)
+    for (MachineBasicBlock::instr_iterator MI = Succ->instr_begin(),
+           ME = Succ->instr_end(); MI != ME && MI->isPHI(); ++MI)
       for (unsigned i = 2, e = MI->getNumOperands()+1; i != e; i += 2) {
         MachineOperand &MO = MI->getOperand(i);
         if (MO.getMBB() == fromMBB)
@@ -473,13 +548,10 @@ bool MachineBasicBlock::canFallThrough() {
   if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) {
     // If we couldn't analyze the branch, examine the last instruction.
     // If the block doesn't end in a known control barrier, assume fallthrough
-    // is possible. The isPredicable check is needed because this code can be
+    // is possible. The isPredicated check is needed because this code can be
     // called during IfConversion, where an instruction which is normally a
-    // Barrier is predicated and thus no longer an actual control barrier. This
-    // is over-conservative though, because if an instruction isn't actually
-    // predicated we could still treat it like a barrier.
-    return empty() || !back().getDesc().isBarrier() ||
-           back().getDesc().isPredicable();
+    // Barrier is predicated and thus no longer an actual control barrier.
+    return empty() || !back().isBarrier() || TII->isPredicated(&back());
   }
 
   // If there is no branch, control always falls through.
@@ -538,14 +610,16 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
   // Collect a list of virtual registers killed by the terminators.
   SmallVector<unsigned, 4> KilledRegs;
   if (LV)
-    for (iterator I = getFirstTerminator(), E = end(); I != E; ++I) {
+    for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
+         I != E; ++I) {
       MachineInstr *MI = I;
       for (MachineInstr::mop_iterator OI = MI->operands_begin(),
            OE = MI->operands_end(); OI != OE; ++OI) {
-        if (!OI->isReg() || !OI->isUse() || !OI->isKill() || OI->isUndef())
+        if (!OI->isReg() || OI->getReg() == 0 ||
+            !OI->isUse() || !OI->isKill() || OI->isUndef())
           continue;
         unsigned Reg = OI->getReg();
-        if (TargetRegisterInfo::isVirtualRegister(Reg) &&
+        if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
             LV->getVarInfo(Reg).removeKill(MI)) {
           KilledRegs.push_back(Reg);
           DEBUG(dbgs() << "Removing terminator kill: " << *MI);
@@ -565,7 +639,8 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
   }
 
   // Fix PHI nodes in Succ so they refer to NMBB instead of this
-  for (MachineBasicBlock::iterator i = Succ->begin(), e = Succ->end();
+  for (MachineBasicBlock::instr_iterator
+         i = Succ->instr_begin(),e = Succ->instr_end();
        i != e && i->isPHI(); ++i)
     for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2)
       if (i->getOperand(ni+1).getMBB() == this)
@@ -577,14 +652,16 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
     NMBB->addLiveIn(*I);
 
   // Update LiveVariables.
+  const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
   if (LV) {
     // Restore kills of virtual registers that were killed by the terminators.
     while (!KilledRegs.empty()) {
       unsigned Reg = KilledRegs.pop_back_val();
-      for (iterator I = end(), E = begin(); I != E;) {
-        if (!(--I)->addRegisterKilled(Reg, NULL, /* addIfNotFound= */ false))
+      for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) {
+        if (!(--I)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false))
           continue;
-        LV->getVarInfo(Reg).Kills.push_back(I);
+        if (TargetRegisterInfo::isVirtualRegister(Reg))
+          LV->getVarInfo(Reg).Kills.push_back(I);
         DEBUG(dbgs() << "Restored terminator kill: " << *I);
         break;
       }
@@ -650,6 +727,42 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
   return NMBB;
 }
 
+MachineBasicBlock::iterator
+MachineBasicBlock::erase(MachineBasicBlock::iterator I) {
+  if (I->isBundle()) {
+    MachineBasicBlock::iterator E = llvm::next(I);
+    return Insts.erase(I.getInstrIterator(), E.getInstrIterator());
+  }
+
+  return Insts.erase(I.getInstrIterator());
+}
+
+MachineInstr *MachineBasicBlock::remove(MachineInstr *I) {
+  if (I->isBundle()) {
+    instr_iterator MII = llvm::next(I);
+    iterator E = end();
+    while (MII != E && MII->isInsideBundle()) {
+      MachineInstr *MI = &*MII++;
+      Insts.remove(MI);
+    }
+  }
+
+  return Insts.remove(I);
+}
+
+void MachineBasicBlock::splice(MachineBasicBlock::iterator where,
+                               MachineBasicBlock *Other,
+                               MachineBasicBlock::iterator From) {
+  if (From->isBundle()) {
+    MachineBasicBlock::iterator To = llvm::next(From);
+    Insts.splice(where.getInstrIterator(), Other->Insts,
+                 From.getInstrIterator(), To.getInstrIterator());
+    return;
+  }
+
+  Insts.splice(where.getInstrIterator(), Other->Insts, From.getInstrIterator());
+}
+
 /// removeFromParent - This method unlinks 'this' from the containing function,
 /// and returns it, but does not delete it.
 MachineBasicBlock *MachineBasicBlock::removeFromParent() {
@@ -673,10 +786,10 @@ void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old,
                                                MachineBasicBlock *New) {
   assert(Old != New && "Cannot replace self with self!");
 
-  MachineBasicBlock::iterator I = end();
-  while (I != begin()) {
+  MachineBasicBlock::instr_iterator I = instr_end();
+  while (I != instr_begin()) {
     --I;
-    if (!I->getDesc().isTerminator()) break;
+    if (!I->isTerminator()) break;
 
     // Scan the operands of this machine instruction, replacing any uses of Old
     // with New.
@@ -755,27 +868,27 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
 /// findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping
 /// any DBG_VALUE instructions.  Return UnknownLoc if there is none.
 DebugLoc
-MachineBasicBlock::findDebugLoc(MachineBasicBlock::iterator &MBBI) {
+MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
   DebugLoc DL;
-  MachineBasicBlock::iterator E = end();
-  if (MBBI != E) {
-    // Skip debug declarations, we don't want a DebugLoc from them.
-    MachineBasicBlock::iterator MBBI2 = MBBI;
-    while (MBBI2 != E && MBBI2->isDebugValue())
-      MBBI2++;
-    if (MBBI2 != E)
-      DL = MBBI2->getDebugLoc();
-  }
+  instr_iterator E = instr_end();
+  if (MBBI == E)
+    return DL;
+
+  // Skip debug declarations, we don't want a DebugLoc from them.
+  while (MBBI != E && MBBI->isDebugValue())
+    MBBI++;
+  if (MBBI != E)
+    DL = MBBI->getDebugLoc();
   return DL;
 }
 
 /// getSuccWeight - Return weight of the edge from this block to MBB.
 ///
-uint32_t MachineBasicBlock::getSuccWeight(MachineBasicBlock *succ) {
+uint32_t MachineBasicBlock::getSuccWeight(const MachineBasicBlock *succ) const {
   if (Weights.empty())
     return 0;
 
-  succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
+  const_succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
   return *getWeightIterator(I);
 }
 
@@ -789,6 +902,16 @@ getWeightIterator(MachineBasicBlock::succ_iterator I) {
   return Weights.begin() + index;
 }
 
+/// getWeightIterator - Return wight iterator corresonding to the I successor
+/// iterator
+MachineBasicBlock::const_weight_iterator MachineBasicBlock::
+getWeightIterator(MachineBasicBlock::const_succ_iterator I) const {
+  assert(Weights.size() == Successors.size() && "Async weight list!");
+  const size_t index = std::distance(Successors.begin(), I);
+  assert(index < Weights.size() && "Not a current successor!");
+  return Weights.begin() + index;
+}
+
 void llvm::WriteAsOperand(raw_ostream &OS, const MachineBasicBlock *MBB,
                           bool t) {
   OS << "BB#" << MBB->getNumber();
diff --git a/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index b92cda961474..a079d6e59139 100644
--- a/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -56,6 +56,6 @@ bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) {
 /// the other block frequencies. We do this to avoid using of floating points.
 ///
 BlockFrequency MachineBlockFrequencyInfo::
-getBlockFreq(MachineBasicBlock *MBB) const {
+getBlockFreq(const MachineBasicBlock *MBB) const {
   return MBFI->getBlockFreq(MBB);
 }
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
new file mode 100644
index 000000000000..22d7212007fc
--- /dev/null
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -0,0 +1,1001 @@
+//===-- MachineBlockPlacement.cpp - Basic Block Code Layout optimization --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements basic block placement transformations using the CFG
+// structure and branch probability estimates.
+//
+// The pass strives to preserve the structure of the CFG (that is, retain
+// a topological ordering of basic blocks) in the absense of a *strong* signal
+// to the contrary from probabilities. However, within the CFG structure, it
+// attempts to choose an ordering which favors placing more likely sequences of
+// blocks adjacent to each other.
+//
+// The algorithm works from the inner-most loop within a function outward, and
+// at each stage walks through the basic blocks, trying to coalesce them into
+// sequential chains where allowed by the CFG (or demanded by heavy
+// probabilities). Finally, it walks the blocks in topological order, and the
+// first time it reaches a chain of basic blocks, it schedules them in the
+// function in-order.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "block-placement2"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumCondBranches, "Number of conditional branches");
+STATISTIC(NumUncondBranches, "Number of uncondittional branches");
+STATISTIC(CondBranchTakenFreq,
+          "Potential frequency of taking conditional branches");
+STATISTIC(UncondBranchTakenFreq,
+          "Potential frequency of taking unconditional branches");
+
+namespace {
+class BlockChain;
+/// \brief Type for our function-wide basic block -> block chain mapping.
+typedef DenseMap<MachineBasicBlock *, BlockChain *> BlockToChainMapType;
+}
+
+namespace {
+/// \brief A chain of blocks which will be laid out contiguously.
+///
+/// This is the datastructure representing a chain of consecutive blocks that
+/// are profitable to layout together in order to maximize fallthrough
+/// probabilities. We also can use a block chain to represent a sequence of
+/// basic blocks which have some external (correctness) requirement for
+/// sequential layout.
+///
+/// Eventually, the block chains will form a directed graph over the function.
+/// We provide an SCC-supporting-iterator in order to quicky build and walk the
+/// SCCs of block chains within a function.
+///
+/// The block chains also have support for calculating and caching probability
+/// information related to the chain itself versus other chains. This is used
+/// for ranking during the final layout of block chains.
+class BlockChain {
+  /// \brief The sequence of blocks belonging to this chain.
+  ///
+  /// This is the sequence of blocks for a particular chain. These will be laid
+  /// out in-order within the function.
+  SmallVector<MachineBasicBlock *, 4> Blocks;
+
+  /// \brief A handle to the function-wide basic block to block chain mapping.
+  ///
+  /// This is retained in each block chain to simplify the computation of child
+  /// block chains for SCC-formation and iteration. We store the edges to child
+  /// basic blocks, and map them back to their associated chains using this
+  /// structure.
+  BlockToChainMapType &BlockToChain;
+
+public:
+  /// \brief Construct a new BlockChain.
+  ///
+  /// This builds a new block chain representing a single basic block in the
+  /// function. It also registers itself as the chain that block participates
+  /// in with the BlockToChain mapping.
+  BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB)
+    : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) {
+    assert(BB && "Cannot create a chain with a null basic block");
+    BlockToChain[BB] = this;
+  }
+
+  /// \brief Iterator over blocks within the chain.
+  typedef SmallVectorImpl<MachineBasicBlock *>::const_iterator iterator;
+
+  /// \brief Beginning of blocks within the chain.
+  iterator begin() const { return Blocks.begin(); }
+
+  /// \brief End of blocks within the chain.
+  iterator end() const { return Blocks.end(); }
+
+  /// \brief Merge a block chain into this one.
+  ///
+  /// This routine merges a block chain into this one. It takes care of forming
+  /// a contiguous sequence of basic blocks, updating the edge list, and
+  /// updating the block -> chain mapping. It does not free or tear down the
+  /// old chain, but the old chain's block list is no longer valid.
+  void merge(MachineBasicBlock *BB, BlockChain *Chain) {
+    assert(BB);
+    assert(!Blocks.empty());
+
+    // Fast path in case we don't have a chain already.
+    if (!Chain) {
+      assert(!BlockToChain[BB]);
+      Blocks.push_back(BB);
+      BlockToChain[BB] = this;
+      return;
+    }
+
+    assert(BB == *Chain->begin());
+    assert(Chain->begin() != Chain->end());
+
+    // Update the incoming blocks to point to this chain, and add them to the
+    // chain structure.
+    for (BlockChain::iterator BI = Chain->begin(), BE = Chain->end();
+         BI != BE; ++BI) {
+      Blocks.push_back(*BI);
+      assert(BlockToChain[*BI] == Chain && "Incoming blocks not in chain");
+      BlockToChain[*BI] = this;
+    }
+  }
+
+#ifndef NDEBUG
+  /// \brief Dump the blocks in this chain.
+  void dump() LLVM_ATTRIBUTE_USED {
+    for (iterator I = begin(), E = end(); I != E; ++I)
+      (*I)->dump();
+  }
+#endif // NDEBUG
+
+  /// \brief Count of predecessors within the loop currently being processed.
+  ///
+  /// This count is updated at each loop we process to represent the number of
+  /// in-loop predecessors of this chain.
+  unsigned LoopPredecessors;
+};
+}
+
+namespace {
+class MachineBlockPlacement : public MachineFunctionPass {
+  /// \brief A typedef for a block filter set.
+  typedef SmallPtrSet<MachineBasicBlock *, 16> BlockFilterSet;
+
+  /// \brief A handle to the branch probability pass.
+  const MachineBranchProbabilityInfo *MBPI;
+
+  /// \brief A handle to the function-wide block frequency pass.
+  const MachineBlockFrequencyInfo *MBFI;
+
+  /// \brief A handle to the loop info.
+  const MachineLoopInfo *MLI;
+
+  /// \brief A handle to the target's instruction info.
+  const TargetInstrInfo *TII;
+
+  /// \brief A handle to the target's lowering info.
+  const TargetLowering *TLI;
+
+  /// \brief Allocator and owner of BlockChain structures.
+  ///
+  /// We build BlockChains lazily by merging together high probability BB
+  /// sequences acording to the "Algo2" in the paper mentioned at the top of
+  /// the file. To reduce malloc traffic, we allocate them using this slab-like
+  /// allocator, and destroy them after the pass completes.
+  SpecificBumpPtrAllocator<BlockChain> ChainAllocator;
+
+  /// \brief Function wide BasicBlock to BlockChain mapping.
+  ///
+  /// This mapping allows efficiently moving from any given basic block to the
+  /// BlockChain it participates in, if any. We use it to, among other things,
+  /// allow implicitly defining edges between chains as the existing edges
+  /// between basic blocks.
+  DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain;
+
+  void markChainSuccessors(BlockChain &Chain,
+                           MachineBasicBlock *LoopHeaderBB,
+                           SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
+                           const BlockFilterSet *BlockFilter = 0);
+  MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB,
+                                         BlockChain &Chain,
+                                         const BlockFilterSet *BlockFilter);
+  MachineBasicBlock *selectBestCandidateBlock(
+      BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList,
+      const BlockFilterSet *BlockFilter);
+  MachineBasicBlock *getFirstUnplacedBlock(
+      MachineFunction &F,
+      const BlockChain &PlacedChain,
+      MachineFunction::iterator &PrevUnplacedBlockIt,
+      const BlockFilterSet *BlockFilter);
+  void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
+                  SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
+                  const BlockFilterSet *BlockFilter = 0);
+  MachineBasicBlock *findBestLoopTop(MachineFunction &F,
+                                     MachineLoop &L,
+                                     const BlockFilterSet &LoopBlockSet);
+  void buildLoopChains(MachineFunction &F, MachineLoop &L);
+  void buildCFGChains(MachineFunction &F);
+  void AlignLoops(MachineFunction &F);
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+  MachineBlockPlacement() : MachineFunctionPass(ID) {
+    initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &F);
+
+  void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.addRequired<MachineBranchProbabilityInfo>();
+    AU.addRequired<MachineBlockFrequencyInfo>();
+    AU.addRequired<MachineLoopInfo>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+};
+}
+
+char MachineBlockPlacement::ID = 0;
+char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID;
+INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement2",
+                      "Branch Probability Basic Block Placement", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2",
+                    "Branch Probability Basic Block Placement", false, false)
+
+#ifndef NDEBUG
+/// \brief Helper to print the name of a MBB.
+///
+/// Only used by debug logging.
+static std::string getBlockName(MachineBasicBlock *BB) {
+  std::string Result;
+  raw_string_ostream OS(Result);
+  OS << "BB#" << BB->getNumber()
+     << " (derived from LLVM BB '" << BB->getName() << "')";
+  OS.flush();
+  return Result;
+}
+
+/// \brief Helper to print the number of a MBB.
+///
+/// Only used by debug logging.
+static std::string getBlockNum(MachineBasicBlock *BB) {
+  std::string Result;
+  raw_string_ostream OS(Result);
+  OS << "BB#" << BB->getNumber();
+  OS.flush();
+  return Result;
+}
+#endif
+
+/// \brief Mark a chain's successors as having one fewer preds.
+///
+/// When a chain is being merged into the "placed" chain, this routine will
+/// quickly walk the successors of each block in the chain and mark them as
+/// having one fewer active predecessor. It also adds any successors of this
+/// chain which reach the zero-predecessor state to the worklist passed in.
+void MachineBlockPlacement::markChainSuccessors(
+    BlockChain &Chain,
+    MachineBasicBlock *LoopHeaderBB,
+    SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
+    const BlockFilterSet *BlockFilter) {
+  // Walk all the blocks in this chain, marking their successors as having
+  // a predecessor placed.
+  for (BlockChain::iterator CBI = Chain.begin(), CBE = Chain.end();
+       CBI != CBE; ++CBI) {
+    // Add any successors for which this is the only un-placed in-loop
+    // predecessor to the worklist as a viable candidate for CFG-neutral
+    // placement. No subsequent placement of this block will violate the CFG
+    // shape, so we get to use heuristics to choose a favorable placement.
+    for (MachineBasicBlock::succ_iterator SI = (*CBI)->succ_begin(),
+                                          SE = (*CBI)->succ_end();
+         SI != SE; ++SI) {
+      if (BlockFilter && !BlockFilter->count(*SI))
+        continue;
+      BlockChain &SuccChain = *BlockToChain[*SI];
+      // Disregard edges within a fixed chain, or edges to the loop header.
+      if (&Chain == &SuccChain || *SI == LoopHeaderBB)
+        continue;
+
+      // This is a cross-chain edge that is within the loop, so decrement the
+      // loop predecessor count of the destination chain.
+      if (SuccChain.LoopPredecessors > 0 && --SuccChain.LoopPredecessors == 0)
+        BlockWorkList.push_back(*SuccChain.begin());
+    }
+  }
+}
+
+/// \brief Select the best successor for a block.
+///
+/// This looks across all successors of a particular block and attempts to
+/// select the "best" one to be the layout successor. It only considers direct
+/// successors which also pass the block filter. It will attempt to avoid
+/// breaking CFG structure, but cave and break such structures in the case of
+/// very hot successor edges.
+///
+/// \returns The best successor block found, or null if none are viable.
+MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor(
+    MachineBasicBlock *BB, BlockChain &Chain,
+    const BlockFilterSet *BlockFilter) {
+  const BranchProbability HotProb(4, 5); // 80%
+
+  MachineBasicBlock *BestSucc = 0;
+  // FIXME: Due to the performance of the probability and weight routines in
+  // the MBPI analysis, we manually compute probabilities using the edge
+  // weights. This is suboptimal as it means that the somewhat subtle
+  // definition of edge weight semantics is encoded here as well. We should
+  // improve the MBPI interface to effeciently support query patterns such as
+  // this.
+  uint32_t BestWeight = 0;
+  uint32_t WeightScale = 0;
+  uint32_t SumWeight = MBPI->getSumForBlock(BB, WeightScale);
+  DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
+  for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+                                        SE = BB->succ_end();
+       SI != SE; ++SI) {
+    if (BlockFilter && !BlockFilter->count(*SI))
+      continue;
+    BlockChain &SuccChain = *BlockToChain[*SI];
+    if (&SuccChain == &Chain) {
+      DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> Already merged!\n");
+      continue;
+    }
+    if (*SI != *SuccChain.begin()) {
+      DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> Mid chain!\n");
+      continue;
+    }
+
+    uint32_t SuccWeight = MBPI->getEdgeWeight(BB, *SI);
+    BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
+
+    // Only consider successors which are either "hot", or wouldn't violate
+    // any CFG constraints.
+    if (SuccChain.LoopPredecessors != 0) {
+      if (SuccProb < HotProb) {
+        DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> CFG conflict\n");
+        continue;
+      }
+
+      // Make sure that a hot successor doesn't have a globally more important
+      // predecessor.
+      BlockFrequency CandidateEdgeFreq
+        = MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl();
+      bool BadCFGConflict = false;
+      for (MachineBasicBlock::pred_iterator PI = (*SI)->pred_begin(),
+                                            PE = (*SI)->pred_end();
+           PI != PE; ++PI) {
+        if (*PI == *SI || (BlockFilter && !BlockFilter->count(*PI)) ||
+            BlockToChain[*PI] == &Chain)
+          continue;
+        BlockFrequency PredEdgeFreq
+          = MBFI->getBlockFreq(*PI) * MBPI->getEdgeProbability(*PI, *SI);
+        if (PredEdgeFreq >= CandidateEdgeFreq) {
+          BadCFGConflict = true;
+          break;
+        }
+      }
+      if (BadCFGConflict) {
+        DEBUG(dbgs() << "    " << getBlockName(*SI)
+                               << " -> non-cold CFG conflict\n");
+        continue;
+      }
+    }
+
+    DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> " << SuccProb
+                 << " (prob)"
+                 << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "")
+                 << "\n");
+    if (BestSucc && BestWeight >= SuccWeight)
+      continue;
+    BestSucc = *SI;
+    BestWeight = SuccWeight;
+  }
+  return BestSucc;
+}
+
+namespace {
+/// \brief Predicate struct to detect blocks already placed.
+class IsBlockPlaced {
+  const BlockChain &PlacedChain;
+  const BlockToChainMapType &BlockToChain;
+
+public:
+  IsBlockPlaced(const BlockChain &PlacedChain,
+                const BlockToChainMapType &BlockToChain)
+      : PlacedChain(PlacedChain), BlockToChain(BlockToChain) {}
+
+  bool operator()(MachineBasicBlock *BB) const {
+    return BlockToChain.lookup(BB) == &PlacedChain;
+  }
+};
+}
+
+/// \brief Select the best block from a worklist.
+///
+/// This looks through the provided worklist as a list of candidate basic
+/// blocks and select the most profitable one to place. The definition of
+/// profitable only really makes sense in the context of a loop. This returns
+/// the most frequently visited block in the worklist, which in the case of
+/// a loop, is the one most desirable to be physically close to the rest of the
+/// loop body in order to improve icache behavior.
+///
+/// \returns The best block found, or null if none are viable.
+MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
+    BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList,
+    const BlockFilterSet *BlockFilter) {
+  // Once we need to walk the worklist looking for a candidate, cleanup the
+  // worklist of already placed entries.
+  // FIXME: If this shows up on profiles, it could be folded (at the cost of
+  // some code complexity) into the loop below.
+  WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(),
+                                IsBlockPlaced(Chain, BlockToChain)),
+                 WorkList.end());
+
+  MachineBasicBlock *BestBlock = 0;
+  BlockFrequency BestFreq;
+  for (SmallVectorImpl<MachineBasicBlock *>::iterator WBI = WorkList.begin(),
+                                                      WBE = WorkList.end();
+       WBI != WBE; ++WBI) {
+    BlockChain &SuccChain = *BlockToChain[*WBI];
+    if (&SuccChain == &Chain) {
+      DEBUG(dbgs() << "    " << getBlockName(*WBI)
+                   << " -> Already merged!\n");
+      continue;
+    }
+    assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block");
+
+    BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI);
+    DEBUG(dbgs() << "    " << getBlockName(*WBI) << " -> " << CandidateFreq
+                 << " (freq)\n");
+    if (BestBlock && BestFreq >= CandidateFreq)
+      continue;
+    BestBlock = *WBI;
+    BestFreq = CandidateFreq;
+  }
+  return BestBlock;
+}
+
+/// \brief Retrieve the first unplaced basic block.
+///
+/// This routine is called when we are unable to use the CFG to walk through
+/// all of the basic blocks and form a chain due to unnatural loops in the CFG.
+/// We walk through the function's blocks in order, starting from the
+/// LastUnplacedBlockIt. We update this iterator on each call to avoid
+/// re-scanning the entire sequence on repeated calls to this routine.
+MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
+    MachineFunction &F, const BlockChain &PlacedChain,
+    MachineFunction::iterator &PrevUnplacedBlockIt,
+    const BlockFilterSet *BlockFilter) {
+  for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F.end(); I != E;
+       ++I) {
+    if (BlockFilter && !BlockFilter->count(I))
+      continue;
+    if (BlockToChain[I] != &PlacedChain) {
+      PrevUnplacedBlockIt = I;
+      // Now select the head of the chain to which the unplaced block belongs
+      // as the block to place. This will force the entire chain to be placed,
+      // and satisfies the requirements of merging chains.
+      return *BlockToChain[I]->begin();
+    }
+  }
+  return 0;
+}
+
+void MachineBlockPlacement::buildChain(
+    MachineBasicBlock *BB,
+    BlockChain &Chain,
+    SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
+    const BlockFilterSet *BlockFilter) {
+  assert(BB);
+  assert(BlockToChain[BB] == &Chain);
+  MachineFunction &F = *BB->getParent();
+  MachineFunction::iterator PrevUnplacedBlockIt = F.begin();
+
+  MachineBasicBlock *LoopHeaderBB = BB;
+  markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter);
+  BB = *llvm::prior(Chain.end());
+  for (;;) {
+    assert(BB);
+    assert(BlockToChain[BB] == &Chain);
+    assert(*llvm::prior(Chain.end()) == BB);
+    MachineBasicBlock *BestSucc = 0;
+
+    // Look for the best viable successor if there is one to place immediately
+    // after this block.
+    BestSucc = selectBestSuccessor(BB, Chain, BlockFilter);
+
+    // If an immediate successor isn't available, look for the best viable
+    // block among those we've identified as not violating the loop's CFG at
+    // this point. This won't be a fallthrough, but it will increase locality.
+    if (!BestSucc)
+      BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter);
+
+    if (!BestSucc) {
+      BestSucc = getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt,
+                                       BlockFilter);
+      if (!BestSucc)
+        break;
+
+      DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the "
+                      "layout successor until the CFG reduces\n");
+    }
+
+    // Place this block, updating the datastructures to reflect its placement.
+    BlockChain &SuccChain = *BlockToChain[BestSucc];
+    // Zero out LoopPredecessors for the successor we're about to merge in case
+    // we selected a successor that didn't fit naturally into the CFG.
+    SuccChain.LoopPredecessors = 0;
+    DEBUG(dbgs() << "Merging from " << getBlockNum(BB)
+                 << " to " << getBlockNum(BestSucc) << "\n");
+    markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter);
+    Chain.merge(BestSucc, &SuccChain);
+    BB = *llvm::prior(Chain.end());
+  }
+
+  DEBUG(dbgs() << "Finished forming chain for header block "
+               << getBlockNum(*Chain.begin()) << "\n");
+}
+
+/// \brief Find the best loop top block for layout.
+///
+/// This routine implements the logic to analyze the loop looking for the best
+/// block to layout at the top of the loop. Typically this is done to maximize
+/// fallthrough opportunities.
+MachineBasicBlock *
+MachineBlockPlacement::findBestLoopTop(MachineFunction &F,
+                                       MachineLoop &L,
+                                       const BlockFilterSet &LoopBlockSet) {
+  // We don't want to layout the loop linearly in all cases. If the loop header
+  // is just a normal basic block in the loop, we want to look for what block
+  // within the loop is the best one to layout at the top. However, if the loop
+  // header has be pre-merged into a chain due to predecessors not having
+  // analyzable branches, *and* the predecessor it is merged with is *not* part
+  // of the loop, rotating the header into the middle of the loop will create
+  // a non-contiguous range of blocks which is Very Bad. So start with the
+  // header and only rotate if safe.
+  BlockChain &HeaderChain = *BlockToChain[L.getHeader()];
+  if (!LoopBlockSet.count(*HeaderChain.begin()))
+    return L.getHeader();
+
+  BlockFrequency BestExitEdgeFreq;
+  MachineBasicBlock *ExitingBB = 0;
+  MachineBasicBlock *LoopingBB = 0;
+  // If there are exits to outer loops, loop rotation can severely limit
+  // fallthrough opportunites unless it selects such an exit. Keep a set of
+  // blocks where rotating to exit with that block will reach an outer loop.
+  SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop;
+
+  DEBUG(dbgs() << "Finding best loop exit for: "
+               << getBlockName(L.getHeader()) << "\n");
+  for (MachineLoop::block_iterator I = L.block_begin(),
+                                   E = L.block_end();
+       I != E; ++I) {
+    BlockChain &Chain = *BlockToChain[*I];
+    // Ensure that this block is at the end of a chain; otherwise it could be
+    // mid-way through an inner loop or a successor of an analyzable branch.
+    if (*I != *llvm::prior(Chain.end()))
+      continue;
+
+    // Now walk the successors. We need to establish whether this has a viable
+    // exiting successor and whether it has a viable non-exiting successor.
+    // We store the old exiting state and restore it if a viable looping
+    // successor isn't found.
+    MachineBasicBlock *OldExitingBB = ExitingBB;
+    BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq;
+    // We also compute and store the best looping successor for use in layout.
+    MachineBasicBlock *BestLoopSucc = 0;
+    // FIXME: Due to the performance of the probability and weight routines in
+    // the MBPI analysis, we use the internal weights. This is only valid
+    // because it is purely a ranking function, we don't care about anything
+    // but the relative values.
+    uint32_t BestLoopSuccWeight = 0;
+    // FIXME: We also manually compute the probabilities to avoid quadratic
+    // behavior.
+    uint32_t WeightScale = 0;
+    uint32_t SumWeight = MBPI->getSumForBlock(*I, WeightScale);
+    for (MachineBasicBlock::succ_iterator SI = (*I)->succ_begin(),
+                                          SE = (*I)->succ_end();
+         SI != SE; ++SI) {
+      if ((*SI)->isLandingPad())
+        continue;
+      if (*SI == *I)
+        continue;
+      BlockChain &SuccChain = *BlockToChain[*SI];
+      // Don't split chains, either this chain or the successor's chain.
+      if (&Chain == &SuccChain || *SI != *SuccChain.begin()) {
+        DEBUG(dbgs() << "    " << (LoopBlockSet.count(*SI) ? "looping: "
+                                                           : "exiting: ")
+                     << getBlockName(*I) << " -> "
+                     << getBlockName(*SI) << " (chain conflict)\n");
+        continue;
+      }
+
+      uint32_t SuccWeight = MBPI->getEdgeWeight(*I, *SI);
+      if (LoopBlockSet.count(*SI)) {
+        DEBUG(dbgs() << "    looping: " << getBlockName(*I) << " -> "
+                     << getBlockName(*SI) << " (" << SuccWeight << ")\n");
+        if (BestLoopSucc && BestLoopSuccWeight >= SuccWeight)
+          continue;
+
+        BestLoopSucc = *SI;
+        BestLoopSuccWeight = SuccWeight;
+        continue;
+      }
+
+      BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
+      BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(*I) * SuccProb;
+      DEBUG(dbgs() << "    exiting: " << getBlockName(*I) << " -> "
+                   << getBlockName(*SI) << " (" << ExitEdgeFreq << ")\n");
+      // Note that we slightly bias this toward an existing layout successor to
+      // retain incoming order in the absence of better information.
+      // FIXME: Should we bias this more strongly? It's pretty weak.
+      if (!ExitingBB || ExitEdgeFreq > BestExitEdgeFreq ||
+          ((*I)->isLayoutSuccessor(*SI) &&
+           !(ExitEdgeFreq < BestExitEdgeFreq))) {
+        BestExitEdgeFreq = ExitEdgeFreq;
+        ExitingBB = *I;
+      }
+
+      if (MachineLoop *ExitLoop = MLI->getLoopFor(*SI))
+        if (ExitLoop->contains(&L))
+          BlocksExitingToOuterLoop.insert(*I);
+    }
+
+    // Restore the old exiting state, no viable looping successor was found.
+    if (!BestLoopSucc) {
+      ExitingBB = OldExitingBB;
+      BestExitEdgeFreq = OldBestExitEdgeFreq;
+      continue;
+    }
+
+    // If this was best exiting block thus far, also record the looping block.
+    if (ExitingBB == *I)
+      LoopingBB = BestLoopSucc;
+  }
+  // Without a candidate exitting block or with only a single block in the
+  // loop, just use the loop header to layout the loop.
+  if (!ExitingBB || L.getNumBlocks() == 1)
+    return L.getHeader();
+
+  // Also, if we have exit blocks which lead to outer loops but didn't select
+  // one of them as the exiting block we are rotating toward, disable loop
+  // rotation altogether.
+  if (!BlocksExitingToOuterLoop.empty() &&
+      !BlocksExitingToOuterLoop.count(ExitingBB))
+    return L.getHeader();
+
+  assert(LoopingBB && "All successors of a loop block are exit blocks!");
+  DEBUG(dbgs() << "  Best exiting block: " << getBlockName(ExitingBB) << "\n");
+  DEBUG(dbgs() << "  Best top block: " << getBlockName(LoopingBB) << "\n");
+  return LoopingBB;
+}
+
+/// \brief Forms basic block chains from the natural loop structures.
+///
+/// These chains are designed to preserve the existing *structure* of the code
+/// as much as possible. We can then stitch the chains together in a way which
+/// both preserves the topological structure and minimizes taken conditional
+/// branches.
+void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
+                                            MachineLoop &L) {
+  // First recurse through any nested loops, building chains for those inner
+  // loops.
+  for (MachineLoop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI)
+    buildLoopChains(F, **LI);
+
+  SmallVector<MachineBasicBlock *, 16> BlockWorkList;
+  BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end());
+
+  MachineBasicBlock *LayoutTop = findBestLoopTop(F, L, LoopBlockSet);
+  BlockChain &LoopChain = *BlockToChain[LayoutTop];
+
+  // FIXME: This is a really lame way of walking the chains in the loop: we
+  // walk the blocks, and use a set to prevent visiting a particular chain
+  // twice.
+  SmallPtrSet<BlockChain *, 4> UpdatedPreds;
+  assert(LoopChain.LoopPredecessors == 0);
+  UpdatedPreds.insert(&LoopChain);
+  for (MachineLoop::block_iterator BI = L.block_begin(),
+                                   BE = L.block_end();
+       BI != BE; ++BI) {
+    BlockChain &Chain = *BlockToChain[*BI];
+    if (!UpdatedPreds.insert(&Chain))
+      continue;
+
+    assert(Chain.LoopPredecessors == 0);
+    for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end();
+         BCI != BCE; ++BCI) {
+      assert(BlockToChain[*BCI] == &Chain);
+      for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(),
+                                            PE = (*BCI)->pred_end();
+           PI != PE; ++PI) {
+        if (BlockToChain[*PI] == &Chain || !LoopBlockSet.count(*PI))
+          continue;
+        ++Chain.LoopPredecessors;
+      }
+    }
+
+    if (Chain.LoopPredecessors == 0)
+      BlockWorkList.push_back(*Chain.begin());
+  }
+
+  buildChain(LayoutTop, LoopChain, BlockWorkList, &LoopBlockSet);
+
+  DEBUG({
+    // Crash at the end so we get all of the debugging output first.
+    bool BadLoop = false;
+    if (LoopChain.LoopPredecessors) {
+      BadLoop = true;
+      dbgs() << "Loop chain contains a block without its preds placed!\n"
+             << "  Loop header:  " << getBlockName(*L.block_begin()) << "\n"
+             << "  Chain header: " << getBlockName(*LoopChain.begin()) << "\n";
+    }
+    for (BlockChain::iterator BCI = LoopChain.begin(), BCE = LoopChain.end();
+         BCI != BCE; ++BCI)
+      if (!LoopBlockSet.erase(*BCI)) {
+        // We don't mark the loop as bad here because there are real situations
+        // where this can occur. For example, with an unanalyzable fallthrough
+        // from a loop block to a non-loop block or vice versa.
+        dbgs() << "Loop chain contains a block not contained by the loop!\n"
+               << "  Loop header:  " << getBlockName(*L.block_begin()) << "\n"
+               << "  Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
+               << "  Bad block:    " << getBlockName(*BCI) << "\n";
+      }
+
+    if (!LoopBlockSet.empty()) {
+      BadLoop = true;
+      for (BlockFilterSet::iterator LBI = LoopBlockSet.begin(),
+                                    LBE = LoopBlockSet.end();
+           LBI != LBE; ++LBI)
+        dbgs() << "Loop contains blocks never placed into a chain!\n"
+               << "  Loop header:  " << getBlockName(*L.block_begin()) << "\n"
+               << "  Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
+               << "  Bad block:    " << getBlockName(*LBI) << "\n";
+    }
+    assert(!BadLoop && "Detected problems with the placement of this loop.");
+  });
+}
+
+void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
+  // Ensure that every BB in the function has an associated chain to simplify
+  // the assumptions of the remaining algorithm.
+  SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
+  for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+    MachineBasicBlock *BB = FI;
+    BlockChain *Chain
+      = new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
+    // Also, merge any blocks which we cannot reason about and must preserve
+    // the exact fallthrough behavior for.
+    for (;;) {
+      Cond.clear();
+      MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
+      if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough())
+        break;
+
+      MachineFunction::iterator NextFI(llvm::next(FI));
+      MachineBasicBlock *NextBB = NextFI;
+      // Ensure that the layout successor is a viable block, as we know that
+      // fallthrough is a possibility.
+      assert(NextFI != FE && "Can't fallthrough past the last block.");
+      DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: "
+                   << getBlockName(BB) << " -> " << getBlockName(NextBB)
+                   << "\n");
+      Chain->merge(NextBB, 0);
+      FI = NextFI;
+      BB = NextBB;
+    }
+  }
+
+  // Build any loop-based chains.
+  for (MachineLoopInfo::iterator LI = MLI->begin(), LE = MLI->end(); LI != LE;
+       ++LI)
+    buildLoopChains(F, **LI);
+
+  SmallVector<MachineBasicBlock *, 16> BlockWorkList;
+
+  SmallPtrSet<BlockChain *, 4> UpdatedPreds;
+  for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+    MachineBasicBlock *BB = &*FI;
+    BlockChain &Chain = *BlockToChain[BB];
+    if (!UpdatedPreds.insert(&Chain))
+      continue;
+
+    assert(Chain.LoopPredecessors == 0);
+    for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end();
+         BCI != BCE; ++BCI) {
+      assert(BlockToChain[*BCI] == &Chain);
+      for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(),
+                                            PE = (*BCI)->pred_end();
+           PI != PE; ++PI) {
+        if (BlockToChain[*PI] == &Chain)
+          continue;
+        ++Chain.LoopPredecessors;
+      }
+    }
+
+    if (Chain.LoopPredecessors == 0)
+      BlockWorkList.push_back(*Chain.begin());
+  }
+
+  BlockChain &FunctionChain = *BlockToChain[&F.front()];
+  buildChain(&F.front(), FunctionChain, BlockWorkList);
+
+  typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType;
+  DEBUG({
+    // Crash at the end so we get all of the debugging output first.
+    bool BadFunc = false;
+    FunctionBlockSetType FunctionBlockSet;
+    for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
+      FunctionBlockSet.insert(FI);
+
+    for (BlockChain::iterator BCI = FunctionChain.begin(),
+                              BCE = FunctionChain.end();
+         BCI != BCE; ++BCI)
+      if (!FunctionBlockSet.erase(*BCI)) {
+        BadFunc = true;
+        dbgs() << "Function chain contains a block not in the function!\n"
+               << "  Bad block:    " << getBlockName(*BCI) << "\n";
+      }
+
+    if (!FunctionBlockSet.empty()) {
+      BadFunc = true;
+      for (FunctionBlockSetType::iterator FBI = FunctionBlockSet.begin(),
+                                          FBE = FunctionBlockSet.end();
+           FBI != FBE; ++FBI)
+        dbgs() << "Function contains blocks never placed into a chain!\n"
+               << "  Bad block:    " << getBlockName(*FBI) << "\n";
+    }
+    assert(!BadFunc && "Detected problems with the block placement.");
+  });
+
+  // Splice the blocks into place.
+  MachineFunction::iterator InsertPos = F.begin();
+  for (BlockChain::iterator BI = FunctionChain.begin(),
+                            BE = FunctionChain.end();
+       BI != BE; ++BI) {
+    DEBUG(dbgs() << (BI == FunctionChain.begin() ? "Placing chain "
+                                                  : "          ... ")
+          << getBlockName(*BI) << "\n");
+    if (InsertPos != MachineFunction::iterator(*BI))
+      F.splice(InsertPos, *BI);
+    else
+      ++InsertPos;
+
+    // Update the terminator of the previous block.
+    if (BI == FunctionChain.begin())
+      continue;
+    MachineBasicBlock *PrevBB = llvm::prior(MachineFunction::iterator(*BI));
+
+    // FIXME: It would be awesome of updateTerminator would just return rather
+    // than assert when the branch cannot be analyzed in order to remove this
+    // boiler plate.
+    Cond.clear();
+    MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
+    if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond))
+      PrevBB->updateTerminator();
+  }
+
+  // Fixup the last block.
+  Cond.clear();
+  MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
+  if (!TII->AnalyzeBranch(F.back(), TBB, FBB, Cond))
+    F.back().updateTerminator();
+}
+
+/// \brief Recursive helper to align a loop and any nested loops.
+static void AlignLoop(MachineFunction &F, MachineLoop *L, unsigned Align) {
+  // Recurse through nested loops.
+  for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+    AlignLoop(F, *I, Align);
+
+  L->getTopBlock()->setAlignment(Align);
+}
+
+/// \brief Align loop headers to target preferred alignments.
+void MachineBlockPlacement::AlignLoops(MachineFunction &F) {
+  if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+    return;
+
+  unsigned Align = TLI->getPrefLoopAlignment();
+  if (!Align)
+    return;  // Don't care about loop alignment.
+
+  for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I)
+    AlignLoop(F, *I, Align);
+}
+
+bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
+  // Check for single-block functions and skip them.
+  if (llvm::next(F.begin()) == F.end())
+    return false;
+
+  MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+  MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+  MLI = &getAnalysis<MachineLoopInfo>();
+  TII = F.getTarget().getInstrInfo();
+  TLI = F.getTarget().getTargetLowering();
+  assert(BlockToChain.empty());
+
+  buildCFGChains(F);
+  AlignLoops(F);
+
+  BlockToChain.clear();
+  ChainAllocator.DestroyAll();
+
+  // We always return true as we have no way to track whether the final order
+  // differs from the original order.
+  return true;
+}
+
+namespace {
+/// \brief A pass to compute block placement statistics.
+///
+/// A separate pass to compute interesting statistics for evaluating block
+/// placement. This is separate from the actual placement pass so that they can
+/// be computed in the absense of any placement transformations or when using
+/// alternative placement strategies.
+class MachineBlockPlacementStats : public MachineFunctionPass {
+  /// \brief A handle to the branch probability pass.
+  const MachineBranchProbabilityInfo *MBPI;
+
+  /// \brief A handle to the function-wide block frequency pass.
+  const MachineBlockFrequencyInfo *MBFI;
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+  MachineBlockPlacementStats() : MachineFunctionPass(ID) {
+    initializeMachineBlockPlacementStatsPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &F);
+
+  void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.addRequired<MachineBranchProbabilityInfo>();
+    AU.addRequired<MachineBlockFrequencyInfo>();
+    AU.setPreservesAll();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+};
+}
+
+char MachineBlockPlacementStats::ID = 0;
+char &llvm::MachineBlockPlacementStatsID = MachineBlockPlacementStats::ID;
+INITIALIZE_PASS_BEGIN(MachineBlockPlacementStats, "block-placement-stats",
+                      "Basic Block Placement Stats", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_END(MachineBlockPlacementStats, "block-placement-stats",
+                    "Basic Block Placement Stats", false, false)
+
+bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) {
+  // Check for single-block functions and skip them.
+  if (llvm::next(F.begin()) == F.end())
+    return false;
+
+  MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+  MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+
+  for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+    BlockFrequency BlockFreq = MBFI->getBlockFreq(I);
+    Statistic &NumBranches = (I->succ_size() > 1) ? NumCondBranches
+                                                  : NumUncondBranches;
+    Statistic &BranchTakenFreq = (I->succ_size() > 1) ? CondBranchTakenFreq
+                                                      : UncondBranchTakenFreq;
+    for (MachineBasicBlock::succ_iterator SI = I->succ_begin(),
+                                          SE = I->succ_end();
+         SI != SE; ++SI) {
+      // Skip if this successor is a fallthrough.
+      if (I->isLayoutSuccessor(*SI))
+        continue;
+
+      BlockFrequency EdgeFreq = BlockFreq * MBPI->getEdgeProbability(I, *SI);
+      ++NumBranches;
+      BranchTakenFreq += EdgeFreq.getFrequency();
+    }
+  }
+
+  return false;
+}
+
diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index c13fa6bc5333..0cc1af07952d 100644
--- a/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -26,26 +26,43 @@ INITIALIZE_PASS_END(MachineBranchProbabilityInfo, "machine-branch-prob",
 
 char MachineBranchProbabilityInfo::ID = 0;
 
-uint32_t MachineBranchProbabilityInfo::
-getSumForBlock(MachineBasicBlock *MBB) const {
-  uint32_t Sum = 0;
+void MachineBranchProbabilityInfo::anchor() { }
 
+uint32_t MachineBranchProbabilityInfo::
+getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const {
+  // First we compute the sum with 64-bits of precision, ensuring that cannot
+  // overflow by bounding the number of weights considered. Hopefully no one
+  // actually needs 2^32 successors.
+  assert(MBB->succ_size() < UINT32_MAX);
+  uint64_t Sum = 0;
+  Scale = 1;
   for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
        E = MBB->succ_end(); I != E; ++I) {
-    MachineBasicBlock *Succ = *I;
-    uint32_t Weight = getEdgeWeight(MBB, Succ);
-    uint32_t PrevSum = Sum;
-
+    uint32_t Weight = getEdgeWeight(MBB, *I);
     Sum += Weight;
-    assert(Sum > PrevSum); (void) PrevSum;
   }
 
+  // If the computed sum fits in 32-bits, we're done.
+  if (Sum <= UINT32_MAX)
+    return Sum;
+
+  // Otherwise, compute the scale necessary to cause the weights to fit, and
+  // re-sum with that scale applied.
+  assert((Sum / UINT32_MAX) < UINT32_MAX);
+  Scale = (Sum / UINT32_MAX) + 1;
+  Sum = 0;
+  for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
+       E = MBB->succ_end(); I != E; ++I) {
+    uint32_t Weight = getEdgeWeight(MBB, *I);
+    Sum += Weight / Scale;
+  }
+  assert(Sum <= UINT32_MAX);
   return Sum;
 }
 
 uint32_t
-MachineBranchProbabilityInfo::getEdgeWeight(MachineBasicBlock *Src,
-                                            MachineBasicBlock *Dst) const {
+MachineBranchProbabilityInfo::getEdgeWeight(const MachineBasicBlock *Src,
+                                            const MachineBasicBlock *Dst) const {
   uint32_t Weight = Src->getSuccWeight(Dst);
   if (!Weight)
     return DEFAULT_WEIGHT;
@@ -55,37 +72,24 @@ MachineBranchProbabilityInfo::getEdgeWeight(MachineBasicBlock *Src,
 bool MachineBranchProbabilityInfo::isEdgeHot(MachineBasicBlock *Src,
                                              MachineBasicBlock *Dst) const {
   // Hot probability is at least 4/5 = 80%
-  uint32_t Weight = getEdgeWeight(Src, Dst);
-  uint32_t Sum = getSumForBlock(Src);
-
-  // FIXME: Implement BranchProbability::compare then change this code to
-  // compare this BranchProbability against a static "hot" BranchProbability.
-  return (uint64_t)Weight * 5 > (uint64_t)Sum * 4;
+  // FIXME: Compare against a static "hot" BranchProbability.
+  return getEdgeProbability(Src, Dst) > BranchProbability(4, 5);
 }
 
 MachineBasicBlock *
 MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const {
-  uint32_t Sum = 0;
   uint32_t MaxWeight = 0;
   MachineBasicBlock *MaxSucc = 0;
-
   for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
        E = MBB->succ_end(); I != E; ++I) {
-    MachineBasicBlock *Succ = *I;
-    uint32_t Weight = getEdgeWeight(MBB, Succ);
-    uint32_t PrevSum = Sum;
-
-    Sum += Weight;
-    assert(Sum > PrevSum); (void) PrevSum;
-
+    uint32_t Weight = getEdgeWeight(MBB, *I);
     if (Weight > MaxWeight) {
       MaxWeight = Weight;
-      MaxSucc = Succ;
+      MaxSucc = *I;
     }
   }
 
-  // FIXME: Use BranchProbability::compare.
-  if ((uint64_t)MaxWeight * 5 >= (uint64_t)Sum * 4)
+  if (getEdgeProbability(MBB, MaxSucc) >= BranchProbability(4, 5))
     return MaxSucc;
 
   return 0;
@@ -94,8 +98,9 @@ MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const {
 BranchProbability
 MachineBranchProbabilityInfo::getEdgeProbability(MachineBasicBlock *Src,
                                                  MachineBasicBlock *Dst) const {
-  uint32_t N = getEdgeWeight(Src, Dst);
-  uint32_t D = getSumForBlock(Src);
+  uint32_t Scale = 1;
+  uint32_t D = getSumForBlock(Src, Scale);
+  uint32_t N = getEdgeWeight(Src, Dst) / Scale;
 
   return BranchProbability(N, D);
 }
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 7eda8c129dc4..a63688e9ec62 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -26,13 +26,14 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/RecyclingAllocator.h"
-
 using namespace llvm;
 
 STATISTIC(NumCoalesces, "Number of copies coalesced");
 STATISTIC(NumCSEs,      "Number of common subexpression eliminated");
 STATISTIC(NumPhysCSEs,
           "Number of physreg referencing common subexpr eliminated");
+STATISTIC(NumCrossBBCSEs,
+          "Number of cross-MBB physreg referencing CS eliminated");
 STATISTIC(NumCommutes,  "Number of copies coalesced after commuting");
 
 namespace {
@@ -49,7 +50,7 @@ namespace {
     }
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
-    
+
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
       MachineFunctionPass::getAnalysisUsage(AU);
@@ -62,6 +63,8 @@ namespace {
     virtual void releaseMemory() {
       ScopeMap.clear();
       Exps.clear();
+      AllocatableRegs.clear();
+      ReservedRegs.clear();
     }
 
   private:
@@ -75,6 +78,8 @@ namespace {
     ScopedHTType VNT;
     SmallVector<MachineInstr*, 64> Exps;
     unsigned CurrVN;
+    BitVector AllocatableRegs;
+    BitVector ReservedRegs;
 
     bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB);
     bool isPhysDefTriviallyDead(unsigned Reg,
@@ -82,9 +87,12 @@ namespace {
                                 MachineBasicBlock::const_iterator E) const ;
     bool hasLivePhysRegDefUses(const MachineInstr *MI,
                                const MachineBasicBlock *MBB,
-                               SmallSet<unsigned,8> &PhysRefs) const;
+                               SmallSet<unsigned,8> &PhysRefs,
+                               SmallVector<unsigned,2> &PhysDefs) const;
     bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
-                          SmallSet<unsigned,8> &PhysRefs) const;
+                          SmallSet<unsigned,8> &PhysRefs,
+                          SmallVector<unsigned,2> &PhysDefs,
+                          bool &NonLocal) const;
     bool isCSECandidate(MachineInstr *MI);
     bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
                            MachineInstr *CSMI, MachineInstr *MI);
@@ -99,6 +107,7 @@ namespace {
 } // end anonymous namespace
 
 char MachineCSE::ID = 0;
+char &llvm::MachineCSEID = MachineCSE::ID;
 INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse",
                 "Machine Common Subexpression Elimination", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
@@ -106,8 +115,6 @@ INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_END(MachineCSE, "machine-cse",
                 "Machine Common Subexpression Elimination", false, false)
 
-FunctionPass *llvm::createMachineCSEPass() { return new MachineCSE(); }
-
 bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
                                           MachineBasicBlock *MBB) {
   bool Changed = false;
@@ -163,6 +170,8 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
     bool SeenDef = false;
     for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
       const MachineOperand &MO = I->getOperand(i);
+      if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
+        SeenDef = true;
       if (!MO.isReg() || !MO.getReg())
         continue;
       if (!TRI->regsOverlap(MO.getReg(), Reg))
@@ -173,7 +182,7 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
       SeenDef = true;
     }
     if (SeenDef)
-      // See a def of Reg (or an alias) before encountering any use, it's 
+      // See a def of Reg (or an alias) before encountering any use, it's
       // trivially dead.
       return true;
 
@@ -189,7 +198,8 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
 /// instruction does not uses a physical register.
 bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
                                        const MachineBasicBlock *MBB,
-                                       SmallSet<unsigned,8> &PhysRefs) const {
+                                       SmallSet<unsigned,8> &PhysRefs,
+                                       SmallVector<unsigned,2> &PhysDefs) const{
   MachineBasicBlock::const_iterator I = MI; I = llvm::next(I);
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
@@ -207,7 +217,9 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
         (MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end())))
       continue;
     PhysRefs.insert(Reg);
-    for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
+    if (MO.isDef())
+      PhysDefs.push_back(Reg);
+    for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
       PhysRefs.insert(*Alias);
   }
 
@@ -215,25 +227,56 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
 }
 
 bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
-                                  SmallSet<unsigned,8> &PhysRefs) const {
+                                  SmallSet<unsigned,8> &PhysRefs,
+                                  SmallVector<unsigned,2> &PhysDefs,
+                                  bool &NonLocal) const {
   // For now conservatively returns false if the common subexpression is
-  // not in the same basic block as the given instruction.
-  MachineBasicBlock *MBB = MI->getParent();
-  if (CSMI->getParent() != MBB)
-    return false;
+  // not in the same basic block as the given instruction. The only exception
+  // is if the common subexpression is in the sole predecessor block.
+  const MachineBasicBlock *MBB = MI->getParent();
+  const MachineBasicBlock *CSMBB = CSMI->getParent();
+
+  bool CrossMBB = false;
+  if (CSMBB != MBB) {
+    if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB)
+      return false;
+
+    for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
+      if (AllocatableRegs.test(PhysDefs[i]) || ReservedRegs.test(PhysDefs[i]))
+        // Avoid extending live range of physical registers if they are
+        //allocatable or reserved.
+        return false;
+    }
+    CrossMBB = true;
+  }
   MachineBasicBlock::const_iterator I = CSMI; I = llvm::next(I);
   MachineBasicBlock::const_iterator E = MI;
+  MachineBasicBlock::const_iterator EE = CSMBB->end();
   unsigned LookAheadLeft = LookAheadLimit;
   while (LookAheadLeft) {
     // Skip over dbg_value's.
-    while (I != E && I->isDebugValue())
+    while (I != E && I != EE && I->isDebugValue())
       ++I;
 
+    if (I == EE) {
+      assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
+      (void)CrossMBB;
+      CrossMBB = false;
+      NonLocal = true;
+      I = MBB->begin();
+      EE = MBB->end();
+      continue;
+    }
+
     if (I == E)
       return true;
 
     for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
       const MachineOperand &MO = I->getOperand(i);
+      // RegMasks go on instructions like calls that clobber lots of physregs.
+      // Don't attempt to CSE across such an instruction.
+      if (MO.isRegMask())
+        return false;
       if (!MO.isReg() || !MO.isDef())
         continue;
       unsigned MOReg = MO.getReg();
@@ -260,12 +303,11 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) {
     return false;
 
   // Ignore stuff that we obviously can't move.
-  const MCInstrDesc &MCID = MI->getDesc();  
-  if (MCID.mayStore() || MCID.isCall() || MCID.isTerminator() ||
+  if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
       MI->hasUnmodeledSideEffects())
     return false;
 
-  if (MCID.mayLoad()) {
+  if (MI->mayLoad()) {
     // Okay, this instruction does a load. As a refinement, we allow the target
     // to decide whether the loaded value is actually a constant. If so, we can
     // actually use it as a load.
@@ -287,7 +329,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
   // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
   // an immediate predecessor. We don't want to increase register pressure and
   // end up causing other computation to be spilled.
-  if (MI->getDesc().isAsCheapAsAMove()) {
+  if (MI->isAsCheapAsAMove()) {
     MachineBasicBlock *CSBB = CSMI->getParent();
     MachineBasicBlock *BB = MI->getParent();
     if (CSBB != BB && !CSBB->isSuccessor(BB))
@@ -376,7 +418,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
 
     // Commute commutable instructions.
     bool Commuted = false;
-    if (!FoundCSE && MI->getDesc().isCommutable()) {
+    if (!FoundCSE && MI->isCommutable()) {
       MachineInstr *NewMI = TII->commuteInstruction(MI);
       if (NewMI) {
         Commuted = true;
@@ -394,16 +436,18 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
     // If the instruction defines physical registers and the values *may* be
     // used, then it's not safe to replace it with a common subexpression.
     // It's also not safe if the instruction uses physical registers.
+    bool CrossMBBPhysDef = false;
     SmallSet<unsigned,8> PhysRefs;
-    if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs)) {
+    SmallVector<unsigned, 2> PhysDefs;
+    if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, PhysDefs)) {
       FoundCSE = false;
 
-      // ... Unless the CS is local and it also defines the physical register
-      // which is not clobbered in between and the physical register uses 
-      // were not clobbered.
+      // ... Unless the CS is local or is in the sole predecessor block
+      // and it also defines the physical register which is not clobbered
+      // in between and the physical register uses were not clobbered.
       unsigned CSVN = VNT.lookup(MI);
       MachineInstr *CSMI = Exps[CSVN];
-      if (PhysRegDefsReach(CSMI, MI, PhysRefs))
+      if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
         FoundCSE = true;
     }
 
@@ -458,6 +502,18 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
         MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second);
         MRI->clearKillFlags(CSEPairs[i].second);
       }
+
+      if (CrossMBBPhysDef) {
+        // Add physical register defs now coming in from a predecessor to MBB
+        // livein list.
+        while (!PhysDefs.empty()) {
+          unsigned LiveIn = PhysDefs.pop_back_val();
+          if (!MBB->isLiveIn(LiveIn))
+            MBB->addLiveIn(LiveIn);
+        }
+        ++NumCrossBBCSEs;
+      }
+
       MI->eraseFromParent();
       ++NumCSEs;
       if (!PhysRefs.empty())
@@ -542,5 +598,7 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
   MRI = &MF.getRegInfo();
   AA = &getAnalysis<AliasAnalysis>();
   DT = &getAnalysis<MachineDominatorTree>();
+  AllocatableRegs = TRI->getAllocatableSet(MF);
+  ReservedRegs = TRI->getReservedRegs(MF);
   return PerformCSE(DT->getRootNode());
 }
diff --git a/lib/CodeGen/MachineCodeEmitter.cpp b/lib/CodeGen/MachineCodeEmitter.cpp
new file mode 100644
index 000000000000..81b49784c052
--- /dev/null
+++ b/lib/CodeGen/MachineCodeEmitter.cpp
@@ -0,0 +1,14 @@
+//===-- llvm/CodeGen/MachineCodeEmitter.cpp - Code emission -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+
+using namespace llvm;
+
+void MachineCodeEmitter::anchor() { }
diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp
new file mode 100644
index 000000000000..9730eaacf6e4
--- /dev/null
+++ b/lib/CodeGen/MachineCopyPropagation.cpp
@@ -0,0 +1,340 @@
+//===- MachineCopyPropagation.cpp - Machine Copy Propagation Pass ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an extremely simple MachineInstr-level copy propagation pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "codegen-cp"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Pass.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumDeletes, "Number of dead copies deleted");
+
+namespace {
+  class MachineCopyPropagation : public MachineFunctionPass {
+    const TargetRegisterInfo *TRI;
+    BitVector ReservedRegs;
+
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    MachineCopyPropagation() : MachineFunctionPass(ID) {
+     initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  private:
+    typedef SmallVector<unsigned, 4> DestList;
+    typedef DenseMap<unsigned, DestList> SourceMap;
+
+    void SourceNoLongerAvailable(unsigned Reg,
+                                 SourceMap &SrcMap,
+                                 DenseMap<unsigned, MachineInstr*> &AvailCopyMap);
+    bool CopyPropagateBlock(MachineBasicBlock &MBB);
+  };
+}
+char MachineCopyPropagation::ID = 0;
+char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID;
+
+INITIALIZE_PASS(MachineCopyPropagation, "machine-cp",
+                "Machine Copy Propagation Pass", false, false)
+
+void
+MachineCopyPropagation::SourceNoLongerAvailable(unsigned Reg,
+                              SourceMap &SrcMap,
+                              DenseMap<unsigned, MachineInstr*> &AvailCopyMap) {
+  SourceMap::iterator SI = SrcMap.find(Reg);
+  if (SI != SrcMap.end()) {
+    const DestList& Defs = SI->second;
+    for (DestList::const_iterator I = Defs.begin(), E = Defs.end();
+         I != E; ++I) {
+      unsigned MappedDef = *I;
+      // Source of copy is no longer available for propagation.
+      if (AvailCopyMap.erase(MappedDef)) {
+        for (const uint16_t *SR = TRI->getSubRegisters(MappedDef); *SR; ++SR)
+          AvailCopyMap.erase(*SR);
+      }
+    }
+  }
+  for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+    SI = SrcMap.find(*AS);
+    if (SI != SrcMap.end()) {
+      const DestList& Defs = SI->second;
+      for (DestList::const_iterator I = Defs.begin(), E = Defs.end();
+           I != E; ++I) {
+        unsigned MappedDef = *I;
+        if (AvailCopyMap.erase(MappedDef)) {
+          for (const uint16_t *SR = TRI->getSubRegisters(MappedDef); *SR; ++SR)
+            AvailCopyMap.erase(*SR);
+        }
+      }
+    }
+  }
+}
+
+static bool NoInterveningSideEffect(const MachineInstr *CopyMI,
+                                    const MachineInstr *MI) {
+  const MachineBasicBlock *MBB = CopyMI->getParent();
+  if (MI->getParent() != MBB)
+    return false;
+  MachineBasicBlock::const_iterator I = CopyMI;
+  MachineBasicBlock::const_iterator E = MBB->end();
+  MachineBasicBlock::const_iterator E2 = MI;
+
+  ++I;
+  while (I != E && I != E2) {
+    if (I->hasUnmodeledSideEffects() || I->isCall() ||
+        I->isTerminator())
+      return false;
+    ++I;
+  }
+  return true;
+}
+
+/// isNopCopy - Return true if the specified copy is really a nop. That is
+/// if the source of the copy is the same of the definition of the copy that
+/// supplied the source. If the source of the copy is a sub-register than it
+/// must check the sub-indices match. e.g.
+/// ecx = mov eax
+/// al  = mov cl
+/// But not
+/// ecx = mov eax
+/// al  = mov ch
+static bool isNopCopy(MachineInstr *CopyMI, unsigned Def, unsigned Src,
+                      const TargetRegisterInfo *TRI) {
+  unsigned SrcSrc = CopyMI->getOperand(1).getReg();
+  if (Def == SrcSrc)
+    return true;
+  if (TRI->isSubRegister(SrcSrc, Def)) {
+    unsigned SrcDef = CopyMI->getOperand(0).getReg();
+    unsigned SubIdx = TRI->getSubRegIndex(SrcSrc, Def);
+    if (!SubIdx)
+      return false;
+    return SubIdx == TRI->getSubRegIndex(SrcDef, Src);
+  }
+
+  return false;
+}
+
+bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
+  SmallSetVector<MachineInstr*, 8> MaybeDeadCopies;  // Candidates for deletion
+  DenseMap<unsigned, MachineInstr*> AvailCopyMap;    // Def -> available copies map
+  DenseMap<unsigned, MachineInstr*> CopyMap;         // Def -> copies map
+  SourceMap SrcMap; // Src -> Def map
+
+  bool Changed = false;
+  for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) {
+    MachineInstr *MI = &*I;
+    ++I;
+
+    if (MI->isCopy()) {
+      unsigned Def = MI->getOperand(0).getReg();
+      unsigned Src = MI->getOperand(1).getReg();
+
+      if (TargetRegisterInfo::isVirtualRegister(Def) ||
+          TargetRegisterInfo::isVirtualRegister(Src))
+        report_fatal_error("MachineCopyPropagation should be run after"
+                           " register allocation!");
+
+      DenseMap<unsigned, MachineInstr*>::iterator CI = AvailCopyMap.find(Src);
+      if (CI != AvailCopyMap.end()) {
+        MachineInstr *CopyMI = CI->second;
+        if (!ReservedRegs.test(Def) &&
+            (!ReservedRegs.test(Src) || NoInterveningSideEffect(CopyMI, MI)) &&
+            isNopCopy(CopyMI, Def, Src, TRI)) {
+          // The two copies cancel out and the source of the first copy
+          // hasn't been overridden, eliminate the second one. e.g.
+          //  %ECX<def> = COPY %EAX<kill>
+          //  ... nothing clobbered EAX.
+          //  %EAX<def> = COPY %ECX
+          // =>
+          //  %ECX<def> = COPY %EAX
+          //
+          // Also avoid eliminating a copy from reserved registers unless the
+          // definition is proven not clobbered. e.g.
+          // %RSP<def> = COPY %RAX
+          // CALL
+          // %RAX<def> = COPY %RSP
+
+          // Clear any kills of Def between CopyMI and MI. This extends the
+          // live range.
+          for (MachineBasicBlock::iterator I = CopyMI, E = MI; I != E; ++I)
+            I->clearRegisterKills(Def, TRI);
+
+          MI->eraseFromParent();
+          Changed = true;
+          ++NumDeletes;
+          continue;
+        }
+      }
+
+      // If Src is defined by a previous copy, it cannot be eliminated.
+      CI = CopyMap.find(Src);
+      if (CI != CopyMap.end())
+        MaybeDeadCopies.remove(CI->second);
+      for (const uint16_t *AS = TRI->getAliasSet(Src); *AS; ++AS) {
+        CI = CopyMap.find(*AS);
+        if (CI != CopyMap.end())
+          MaybeDeadCopies.remove(CI->second);
+      }
+
+      // Copy is now a candidate for deletion.
+      MaybeDeadCopies.insert(MI);
+
+      // If 'Src' is previously source of another copy, then this earlier copy's
+      // source is no longer available. e.g.
+      // %xmm9<def> = copy %xmm2
+      // ...
+      // %xmm2<def> = copy %xmm0
+      // ...
+      // %xmm2<def> = copy %xmm9
+      SourceNoLongerAvailable(Def, SrcMap, AvailCopyMap);
+
+      // Remember Def is defined by the copy.
+      // ... Make sure to clear the def maps of aliases first.
+      for (const uint16_t *AS = TRI->getAliasSet(Def); *AS; ++AS) {
+        CopyMap.erase(*AS);
+        AvailCopyMap.erase(*AS);
+      }
+      CopyMap[Def] = MI;
+      AvailCopyMap[Def] = MI;
+      for (const uint16_t *SR = TRI->getSubRegisters(Def); *SR; ++SR) {
+        CopyMap[*SR] = MI;
+        AvailCopyMap[*SR] = MI;
+      }
+
+      // Remember source that's copied to Def. Once it's clobbered, then
+      // it's no longer available for copy propagation.
+      if (std::find(SrcMap[Src].begin(), SrcMap[Src].end(), Def) ==
+          SrcMap[Src].end()) {
+        SrcMap[Src].push_back(Def);
+      }
+
+      continue;
+    }
+
+    // Not a copy.
+    SmallVector<unsigned, 2> Defs;
+    int RegMaskOpNum = -1;
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (MO.isRegMask())
+        RegMaskOpNum = i;
+      if (!MO.isReg())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (!Reg)
+        continue;
+
+      if (TargetRegisterInfo::isVirtualRegister(Reg))
+        report_fatal_error("MachineCopyPropagation should be run after"
+                           " register allocation!");
+
+      if (MO.isDef()) {
+        Defs.push_back(Reg);
+        continue;
+      }
+
+      // If 'Reg' is defined by a copy, the copy is no longer a candidate
+      // for elimination.
+      DenseMap<unsigned, MachineInstr*>::iterator CI = CopyMap.find(Reg);
+      if (CI != CopyMap.end())
+        MaybeDeadCopies.remove(CI->second);
+      for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+        CI = CopyMap.find(*AS);
+        if (CI != CopyMap.end())
+          MaybeDeadCopies.remove(CI->second);
+      }
+    }
+
+    // The instruction has a register mask operand which means that it clobbers
+    // a large set of registers.  It is possible to use the register mask to
+    // prune the available copies, but treat it like a basic block boundary for
+    // now.
+    if (RegMaskOpNum >= 0) {
+      // Erase any MaybeDeadCopies whose destination register is clobbered.
+      const MachineOperand &MaskMO = MI->getOperand(RegMaskOpNum);
+      for (SmallSetVector<MachineInstr*, 8>::iterator
+           DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
+           DI != DE; ++DI) {
+        unsigned Reg = (*DI)->getOperand(0).getReg();
+        if (ReservedRegs.test(Reg) || !MaskMO.clobbersPhysReg(Reg))
+          continue;
+        (*DI)->eraseFromParent();
+        Changed = true;
+        ++NumDeletes;
+      }
+
+      // Clear all data structures as if we were beginning a new basic block.
+      MaybeDeadCopies.clear();
+      AvailCopyMap.clear();
+      CopyMap.clear();
+      SrcMap.clear();
+      continue;
+    }
+
+    for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+      unsigned Reg = Defs[i];
+
+      // No longer defined by a copy.
+      CopyMap.erase(Reg);
+      AvailCopyMap.erase(Reg);
+      for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+        CopyMap.erase(*AS);
+        AvailCopyMap.erase(*AS);
+      }
+
+      // If 'Reg' is previously source of a copy, it is no longer available for
+      // copy propagation.
+      SourceNoLongerAvailable(Reg, SrcMap, AvailCopyMap);
+    }
+  }
+
+  // If MBB doesn't have successors, delete the copies whose defs are not used.
+  // If MBB does have successors, then conservative assume the defs are live-out
+  // since we don't want to trust live-in lists.
+  if (MBB.succ_empty()) {
+    for (SmallSetVector<MachineInstr*, 8>::iterator
+           DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
+         DI != DE; ++DI) {
+      if (!ReservedRegs.test((*DI)->getOperand(0).getReg())) {
+        (*DI)->eraseFromParent();
+        Changed = true;
+        ++NumDeletes;
+      }
+    }
+  }
+
+  return Changed;
+}
+
+bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
+  bool Changed = false;
+
+  TRI = MF.getTarget().getRegisterInfo();
+  ReservedRegs = TRI->getReservedRegs(MF);
+
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+    Changed |= CopyPropagateBlock(*I);
+
+  return Changed;
+}
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 20066a067b8f..d8c2f6a2eaef 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -13,12 +13,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Config/config.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -28,6 +25,7 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetData.h"
@@ -197,9 +195,10 @@ MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) {
 MachineMemOperand *
 MachineFunction::getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f,
                                       uint64_t s, unsigned base_alignment,
-                                      const MDNode *TBAAInfo) {
+                                      const MDNode *TBAAInfo,
+                                      const MDNode *Ranges) {
   return new (Allocator) MachineMemOperand(PtrInfo, f, s, base_alignment,
-                                           TBAAInfo);
+                                           TBAAInfo, Ranges);
 }
 
 MachineMemOperand *
@@ -286,7 +285,13 @@ void MachineFunction::dump() const {
 }
 
 void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const {
-  OS << "# Machine code for function " << Fn->getName() << ":\n";
+  OS << "# Machine code for function " << Fn->getName() << ": ";
+  if (RegInfo) {
+    OS << (RegInfo->isSSA() ? "SSA" : "Post SSA");
+    if (!RegInfo->tracksLiveness())
+      OS << ", not tracking liveness";
+  }
+  OS << '\n';
 
   // Print Frame Information
   FrameInfo->print(*this, OS);
@@ -335,7 +340,7 @@ namespace llvm {
   DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
 
     static std::string getGraphName(const MachineFunction *F) {
-      return "CFG for '" + F->getFunction()->getNameStr() + "' function";
+      return "CFG for '" + F->getFunction()->getName().str() + "' function";
     }
 
     std::string getNodeLabel(const MachineBasicBlock *Node,
@@ -368,7 +373,7 @@ namespace llvm {
 void MachineFunction::viewCFG() const
 {
 #ifndef NDEBUG
-  ViewGraph(this, "mf" + getFunction()->getNameStr());
+  ViewGraph(this, "mf" + getFunction()->getName());
 #else
   errs() << "MachineFunction::viewCFG is only available in debug builds on "
          << "systems with Graphviz or gv!\n";
@@ -378,7 +383,7 @@ void MachineFunction::viewCFG() const
 void MachineFunction::viewCFGOnly() const
 {
 #ifndef NDEBUG
-  ViewGraph(this, "mf" + getFunction()->getNameStr(), true);
+  ViewGraph(this, "mf" + getFunction()->getName(), true);
 #else
   errs() << "MachineFunction::viewCFGOnly is only available in debug builds on "
          << "systems with Graphviz or gv!\n";
@@ -464,7 +469,7 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
   if (!isCalleeSavedInfoValid())
     return BV;
 
-  for (const unsigned *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR)
+  for (const uint16_t *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR)
     BV.set(*CSR);
 
   // The entry MBB always has all CSRs pristine.
@@ -532,6 +537,8 @@ unsigned MachineJumpTableInfo::getEntrySize(const TargetData &TD) const {
   switch (getEntryKind()) {
   case MachineJumpTableInfo::EK_BlockAddress:
     return TD.getPointerSize();
+  case MachineJumpTableInfo::EK_GPRel64BlockAddress:
+    return 8;
   case MachineJumpTableInfo::EK_GPRel32BlockAddress:
   case MachineJumpTableInfo::EK_LabelDifference32:
   case MachineJumpTableInfo::EK_Custom32:
@@ -539,8 +546,7 @@ unsigned MachineJumpTableInfo::getEntrySize(const TargetData &TD) const {
   case MachineJumpTableInfo::EK_Inline:
     return 0;
   }
-  assert(0 && "Unknown jump table encoding!");
-  return ~0;
+  llvm_unreachable("Unknown jump table encoding!");
 }
 
 /// getEntryAlignment - Return the alignment of each entry in the jump table.
@@ -551,6 +557,8 @@ unsigned MachineJumpTableInfo::getEntryAlignment(const TargetData &TD) const {
   switch (getEntryKind()) {
   case MachineJumpTableInfo::EK_BlockAddress:
     return TD.getPointerABIAlignment();
+  case MachineJumpTableInfo::EK_GPRel64BlockAddress:
+    return TD.getABIIntegerTypeAlignment(64);
   case MachineJumpTableInfo::EK_GPRel32BlockAddress:
   case MachineJumpTableInfo::EK_LabelDifference32:
   case MachineJumpTableInfo::EK_Custom32:
@@ -558,8 +566,7 @@ unsigned MachineJumpTableInfo::getEntryAlignment(const TargetData &TD) const {
   case MachineJumpTableInfo::EK_Inline:
     return 1;
   }
-  assert(0 && "Unknown jump table encoding!");
-  return ~0;
+  llvm_unreachable("Unknown jump table encoding!");
 }
 
 /// createJumpTableIndex - Create a new jump table entry in the jump table info.
@@ -619,6 +626,8 @@ void MachineJumpTableInfo::dump() const { print(dbgs()); }
 //  MachineConstantPool implementation
 //===----------------------------------------------------------------------===//
 
+void MachineConstantPoolValue::anchor() { }
+
 Type *MachineConstantPoolEntry::getType() const {
   if (isMachineConstantPoolEntry())
     return Val.MachineCPVal->getType();
@@ -653,35 +662,37 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
   // reject them.
   if (A->getType() == B->getType()) return false;
 
+  // We can't handle structs or arrays.
+  if (isa<StructType>(A->getType()) || isa<ArrayType>(A->getType()) ||
+      isa<StructType>(B->getType()) || isa<ArrayType>(B->getType()))
+    return false;
+  
   // For now, only support constants with the same size.
-  if (TD->getTypeStoreSize(A->getType()) != TD->getTypeStoreSize(B->getType()))
+  uint64_t StoreSize = TD->getTypeStoreSize(A->getType());
+  if (StoreSize != TD->getTypeStoreSize(B->getType()) || 
+      StoreSize > 128)
     return false;
 
-  // If a floating-point value and an integer value have the same encoding,
-  // they can share a constant-pool entry.
-  if (const ConstantFP *AFP = dyn_cast<ConstantFP>(A))
-    if (const ConstantInt *BI = dyn_cast<ConstantInt>(B))
-      return AFP->getValueAPF().bitcastToAPInt() == BI->getValue();
-  if (const ConstantFP *BFP = dyn_cast<ConstantFP>(B))
-    if (const ConstantInt *AI = dyn_cast<ConstantInt>(A))
-      return BFP->getValueAPF().bitcastToAPInt() == AI->getValue();
-
-  // Two vectors can share an entry if each pair of corresponding
-  // elements could.
-  if (const ConstantVector *AV = dyn_cast<ConstantVector>(A))
-    if (const ConstantVector *BV = dyn_cast<ConstantVector>(B)) {
-      if (AV->getType()->getNumElements() != BV->getType()->getNumElements())
-        return false;
-      for (unsigned i = 0, e = AV->getType()->getNumElements(); i != e; ++i)
-        if (!CanShareConstantPoolEntry(AV->getOperand(i),
-                                       BV->getOperand(i), TD))
-          return false;
-      return true;
-    }
-
-  // TODO: Handle other cases.
-
-  return false;
+  Type *IntTy = IntegerType::get(A->getContext(), StoreSize*8);
+
+  // Try constant folding a bitcast of both instructions to an integer.  If we
+  // get two identical ConstantInt's, then we are good to share them.  We use
+  // the constant folding APIs to do this so that we get the benefit of
+  // TargetData.
+  if (isa<PointerType>(A->getType()))
+    A = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy,
+                                 const_cast<Constant*>(A), TD);
+  else if (A->getType() != IntTy)
+    A = ConstantFoldInstOperands(Instruction::BitCast, IntTy,
+                                 const_cast<Constant*>(A), TD);
+  if (isa<PointerType>(B->getType()))
+    B = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy,
+                                 const_cast<Constant*>(B), TD);
+  else if (B->getType() != IntTy)
+    B = ConstantFoldInstOperands(Instruction::BitCast, IntTy,
+                                 const_cast<Constant*>(B), TD);
+  
+  return A == B;
 }
 
 /// getConstantPoolIndex - Create a new entry in the constant pool or return
diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp
index 054c750c9f2b..35591e1649d3 100644
--- a/lib/CodeGen/MachineFunctionAnalysis.cpp
+++ b/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -19,9 +19,8 @@ using namespace llvm;
 
 char MachineFunctionAnalysis::ID = 0;
 
-MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm,
-                                                 CodeGenOpt::Level OL) :
-  FunctionPass(ID), TM(tm), OptLevel(OL), MF(0) {
+MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm) :
+  FunctionPass(ID), TM(tm), MF(0) {
   initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
 }
 
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index a240667f7d6a..e553a0463a2a 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -40,6 +40,7 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Hashing.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -178,6 +179,7 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
   IsKill = isKill;
   IsDead = isDead;
   IsUndef = isUndef;
+  IsInternalRead = false;
   IsEarlyClobber = false;
   IsDebug = isDebug;
   SubReg = 0;
@@ -191,7 +193,6 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
     return false;
 
   switch (getType()) {
-  default: llvm_unreachable("Unrecognized operand type");
   case MachineOperand::MO_Register:
     return getReg() == Other.getReg() && isDef() == Other.isDef() &&
            getSubReg() == Other.getSubReg();
@@ -216,11 +217,14 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
            getOffset() == Other.getOffset();
   case MachineOperand::MO_BlockAddress:
     return getBlockAddress() == Other.getBlockAddress();
+  case MO_RegisterMask:
+    return getRegMask() == Other.getRegMask();
   case MachineOperand::MO_MCSymbol:
     return getMCSymbol() == Other.getMCSymbol();
   case MachineOperand::MO_Metadata:
     return getMetadata() == Other.getMetadata();
   }
+  llvm_unreachable("Invalid machine operand type");
 }
 
 /// print - Print the specified machine operand.
@@ -240,7 +244,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
     OS << PrintReg(getReg(), TRI, getSubReg());
 
     if (isDef() || isKill() || isDead() || isImplicit() || isUndef() ||
-        isEarlyClobber()) {
+        isInternalRead() || isEarlyClobber()) {
       OS << '<';
       bool NeedComma = false;
       if (isDef()) {
@@ -256,14 +260,26 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
           NeedComma = true;
       }
 
-      if (isKill() || isDead() || isUndef()) {
+      if (isKill() || isDead() || isUndef() || isInternalRead()) {
         if (NeedComma) OS << ',';
-        if (isKill())  OS << "kill";
-        if (isDead())  OS << "dead";
+        NeedComma = false;
+        if (isKill()) {
+          OS << "kill";
+          NeedComma = true;
+        }
+        if (isDead()) {
+          OS << "dead";
+          NeedComma = true;
+        }
         if (isUndef()) {
-          if (isKill() || isDead())
-            OS << ',';
+          if (NeedComma) OS << ',';
           OS << "undef";
+          NeedComma = true;
+        }
+        if (isInternalRead()) {
+          if (NeedComma) OS << ',';
+          OS << "internal";
+          NeedComma = true;
         }
       }
       OS << '>';
@@ -311,6 +327,9 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
     WriteAsOperand(OS, getBlockAddress(), /*PrintType=*/false);
     OS << '>';
     break;
+  case MachineOperand::MO_RegisterMask:
+    OS << "<regmask>";
+    break;
   case MachineOperand::MO_Metadata:
     OS << '<';
     WriteAsOperand(OS, getMetadata(), /*PrintType=*/false);
@@ -319,8 +338,6 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
   case MachineOperand::MO_MCSymbol:
     OS << "<MCSym=" << *getMCSymbol() << '>';
     break;
-  default:
-    llvm_unreachable("Unrecognized operand type");
   }
 
   if (unsigned TF = getTargetFlags())
@@ -364,10 +381,11 @@ MachinePointerInfo MachinePointerInfo::getStack(int64_t Offset) {
 
 MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f,
                                      uint64_t s, unsigned int a,
-                                     const MDNode *TBAAInfo)
+                                     const MDNode *TBAAInfo,
+                                     const MDNode *Ranges)
   : PtrInfo(ptrinfo), Size(s),
     Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)),
-    TBAAInfo(TBAAInfo) {
+    TBAAInfo(TBAAInfo), Ranges(Ranges) {
   assert((PtrInfo.V == 0 || isa<PointerType>(PtrInfo.V->getType())) &&
          "invalid pointer value");
   assert(getBaseAlignment() == a && "Alignment is not a power of 2!");
@@ -465,7 +483,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
 /// MCID NULL and no operands.
 MachineInstr::MachineInstr()
   : MCID(0), Flags(0), AsmPrinterFlags(0),
-    MemRefs(0), MemRefsEnd(0),
+    NumMemRefs(0), MemRefs(0),
     Parent(0) {
   // Make sure that we get added to a machine basicblock
   LeakDetector::addGarbageObject(this);
@@ -473,10 +491,10 @@ MachineInstr::MachineInstr()
 
 void MachineInstr::addImplicitDefUseOperands() {
   if (MCID->ImplicitDefs)
-    for (const unsigned *ImpDefs = MCID->ImplicitDefs; *ImpDefs; ++ImpDefs)
+    for (const uint16_t *ImpDefs = MCID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
       addOperand(MachineOperand::CreateReg(*ImpDefs, true, true));
   if (MCID->ImplicitUses)
-    for (const unsigned *ImpUses = MCID->ImplicitUses; *ImpUses; ++ImpUses)
+    for (const uint16_t *ImpUses = MCID->getImplicitUses(); *ImpUses; ++ImpUses)
       addOperand(MachineOperand::CreateReg(*ImpUses, false, true));
 }
 
@@ -485,7 +503,7 @@ void MachineInstr::addImplicitDefUseOperands() {
 /// the MCInstrDesc.
 MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp)
   : MCID(&tid), Flags(0), AsmPrinterFlags(0),
-    MemRefs(0), MemRefsEnd(0), Parent(0) {
+    NumMemRefs(0), MemRefs(0), Parent(0) {
   unsigned NumImplicitOps = 0;
   if (!NoImp)
     NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
@@ -500,7 +518,7 @@ MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp)
 MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl,
                            bool NoImp)
   : MCID(&tid), Flags(0), AsmPrinterFlags(0),
-    MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) {
+    NumMemRefs(0), MemRefs(0), Parent(0), debugLoc(dl) {
   unsigned NumImplicitOps = 0;
   if (!NoImp)
     NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
@@ -516,7 +534,7 @@ MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl,
 /// basic block.
 MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid)
   : MCID(&tid), Flags(0), AsmPrinterFlags(0),
-    MemRefs(0), MemRefsEnd(0), Parent(0) {
+    NumMemRefs(0), MemRefs(0), Parent(0) {
   assert(MBB && "Cannot use inserting ctor with null basic block!");
   unsigned NumImplicitOps =
     MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
@@ -532,7 +550,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid)
 MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
                            const MCInstrDesc &tid)
   : MCID(&tid), Flags(0), AsmPrinterFlags(0),
-    MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) {
+    NumMemRefs(0), MemRefs(0), Parent(0), debugLoc(dl) {
   assert(MBB && "Cannot use inserting ctor with null basic block!");
   unsigned NumImplicitOps =
     MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
@@ -547,7 +565,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
 ///
 MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
   : MCID(&MI.getDesc()), Flags(0), AsmPrinterFlags(0),
-    MemRefs(MI.MemRefs), MemRefsEnd(MI.MemRefsEnd),
+    NumMemRefs(MI.NumMemRefs), MemRefs(MI.MemRefs),
     Parent(0), debugLoc(MI.getDebugLoc()) {
   Operands.reserve(MI.getNumOperands());
 
@@ -722,17 +740,33 @@ void MachineInstr::RemoveOperand(unsigned OpNo) {
 void MachineInstr::addMemOperand(MachineFunction &MF,
                                  MachineMemOperand *MO) {
   mmo_iterator OldMemRefs = MemRefs;
-  mmo_iterator OldMemRefsEnd = MemRefsEnd;
+  uint16_t OldNumMemRefs = NumMemRefs;
 
-  size_t NewNum = (MemRefsEnd - MemRefs) + 1;
+  uint16_t NewNum = NumMemRefs + 1;
   mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NewNum);
-  mmo_iterator NewMemRefsEnd = NewMemRefs + NewNum;
 
-  std::copy(OldMemRefs, OldMemRefsEnd, NewMemRefs);
+  std::copy(OldMemRefs, OldMemRefs + OldNumMemRefs, NewMemRefs);
   NewMemRefs[NewNum - 1] = MO;
 
   MemRefs = NewMemRefs;
-  MemRefsEnd = NewMemRefsEnd;
+  NumMemRefs = NewNum;
+}
+
+bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const {
+  const MachineBasicBlock *MBB = getParent();
+  MachineBasicBlock::const_instr_iterator MII = *this; ++MII;
+  while (MII != MBB->end() && MII->isInsideBundle()) {
+    if (MII->getDesc().getFlags() & Mask) {
+      if (Type == AnyInBundle)
+        return true;
+    } else {
+      if (Type == AllInBundle)
+        return false;
+    }
+    ++MII;
+  }
+
+  return Type == AllInBundle;
 }
 
 bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
@@ -743,6 +777,19 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
       Other->getNumOperands() != getNumOperands())
     return false;
 
+  if (isBundle()) {
+    // Both instructions are bundles, compare MIs inside the bundle.
+    MachineBasicBlock::const_instr_iterator I1 = *this;
+    MachineBasicBlock::const_instr_iterator E1 = getParent()->instr_end();
+    MachineBasicBlock::const_instr_iterator I2 = *Other;
+    MachineBasicBlock::const_instr_iterator E2= Other->getParent()->instr_end();
+    while (++I1 != E1 && I1->isInsideBundle()) {
+      ++I2;
+      if (I2 == E2 || !I2->isInsideBundle() || !I1->isIdenticalTo(I2, Check))
+        return false;
+    }
+  }
+
   // Check operands to make sure they match.
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = getOperand(i);
@@ -789,6 +836,18 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
 /// block, and returns it, but does not delete it.
 MachineInstr *MachineInstr::removeFromParent() {
   assert(getParent() && "Not embedded in a basic block!");
+
+  // If it's a bundle then remove the MIs inside the bundle as well.
+  if (isBundle()) {
+    MachineBasicBlock *MBB = getParent();
+    MachineBasicBlock::instr_iterator MII = *this; ++MII;
+    MachineBasicBlock::instr_iterator E = MBB->instr_end();
+    while (MII != E && MII->isInsideBundle()) {
+      MachineInstr *MI = &*MII;
+      ++MII;
+      MBB->remove(MI);
+    }
+  }
   getParent()->remove(this);
   return this;
 }
@@ -798,6 +857,17 @@ MachineInstr *MachineInstr::removeFromParent() {
 /// block, and deletes it.
 void MachineInstr::eraseFromParent() {
   assert(getParent() && "Not embedded in a basic block!");
+  // If it's a bundle then remove the MIs inside the bundle as well.
+  if (isBundle()) {
+    MachineBasicBlock *MBB = getParent();
+    MachineBasicBlock::instr_iterator MII = *this; ++MII;
+    MachineBasicBlock::instr_iterator E = MBB->instr_end();
+    while (MII != E && MII->isInsideBundle()) {
+      MachineInstr *MI = &*MII;
+      ++MII;
+      MBB->erase(MI);
+    }
+  }
   getParent()->erase(this);
 }
 
@@ -817,6 +887,16 @@ unsigned MachineInstr::getNumExplicitOperands() const {
   return NumOperands;
 }
 
+/// isBundled - Return true if this instruction part of a bundle. This is true
+/// if either itself or its following instruction is marked "InsideBundle".
+bool MachineInstr::isBundled() const {
+  if (isInsideBundle())
+    return true;
+  MachineBasicBlock::const_instr_iterator nextMI = this;
+  ++nextMI;
+  return nextMI != Parent->instr_end() && nextMI->isInsideBundle();
+}
+
 bool MachineInstr::isStackAligningInlineAsm() const {
   if (isInlineAsm()) {
     unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
@@ -887,6 +967,20 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx,
   return NULL;
 }
 
+/// getBundleSize - Return the number of instructions inside the MI bundle.
+unsigned MachineInstr::getBundleSize() const {
+  assert(isBundle() && "Expecting a bundle");
+
+  MachineBasicBlock::const_instr_iterator I = *this;
+  unsigned Size = 0;
+  while ((++I)->isInsideBundle()) {
+    ++Size;
+  }
+  assert(Size > 1 && "Malformed bundle");
+
+  return Size;
+}
+
 /// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of
 /// the specific register or -1 if it is not found. It further tightens
 /// the search criteria to a use that kills the register if isKill is true.
@@ -948,6 +1042,10 @@ MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, bool Overlap,
   bool isPhys = TargetRegisterInfo::isPhysicalRegister(Reg);
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = getOperand(i);
+    // Accept regmask operands when Overlap is set.
+    // Ignore them when looking for a specific def operand (Overlap == false).
+    if (isPhys && Overlap && MO.isRegMask() && MO.clobbersPhysReg(Reg))
+      return i;
     if (!MO.isReg() || !MO.isDef())
       continue;
     unsigned MOReg = MO.getReg();
@@ -1118,6 +1216,8 @@ void MachineInstr::copyKillDeadInfo(const MachineInstr *MI) {
 
 /// copyPredicates - Copies predicate operand(s) from MI.
 void MachineInstr::copyPredicates(const MachineInstr *MI) {
+  assert(!isBundle() && "MachineInstr::copyPredicates() can't handle bundles");
+
   const MCInstrDesc &MCID = MI->getDesc();
   if (!MCID.isPredicable())
     return;
@@ -1159,13 +1259,13 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
                                 AliasAnalysis *AA,
                                 bool &SawStore) const {
   // Ignore stuff that we obviously can't move.
-  if (MCID->mayStore() || MCID->isCall()) {
+  if (mayStore() || isCall()) {
     SawStore = true;
     return false;
   }
 
   if (isLabel() || isDebugValue() ||
-      MCID->isTerminator() || hasUnmodeledSideEffects())
+      isTerminator() || hasUnmodeledSideEffects())
     return false;
 
   // See if this instruction does a load.  If so, we have to guarantee that the
@@ -1173,7 +1273,7 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
   // destination. The check for isInvariantLoad gives the targe the chance to
   // classify the load as always returning a constant, e.g. a constant pool
   // load.
-  if (MCID->mayLoad() && !isInvariantLoad(AA))
+  if (mayLoad() && !isInvariantLoad(AA))
     // Otherwise, this is a real load.  If there is a store between the load and
     // end of block, or if the load is volatile, we can't move it.
     return !SawStore && !hasVolatileMemoryRef();
@@ -1213,9 +1313,9 @@ bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII,
 /// have no volatile memory references.
 bool MachineInstr::hasVolatileMemoryRef() const {
   // An instruction known never to access memory won't have a volatile access.
-  if (!MCID->mayStore() &&
-      !MCID->mayLoad() &&
-      !MCID->isCall() &&
+  if (!mayStore() &&
+      !mayLoad() &&
+      !isCall() &&
       !hasUnmodeledSideEffects())
     return false;
 
@@ -1239,7 +1339,7 @@ bool MachineInstr::hasVolatileMemoryRef() const {
 /// *all* loads the instruction does are invariant (if it does multiple loads).
 bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
   // If the instruction doesn't load at all, it isn't an invariant load.
-  if (!MCID->mayLoad())
+  if (!mayLoad())
     return false;
 
   // If the instruction has lost its memoperands, conservatively assume that
@@ -1253,6 +1353,7 @@ bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
        E = memoperands_end(); I != E; ++I) {
     if ((*I)->isVolatile()) return false;
     if ((*I)->isStore()) return false;
+    if ((*I)->isInvariant()) return true;
 
     if (const Value *V = (*I)->getValue()) {
       // A load from a constant PseudoSourceValue is invariant.
@@ -1291,7 +1392,7 @@ unsigned MachineInstr::isConstantValuePHI() const {
 }
 
 bool MachineInstr::hasUnmodeledSideEffects() const {
-  if (getDesc().hasUnmodeledSideEffects())
+  if (hasProperty(MCID::UnmodeledSideEffects))
     return true;
   if (isInlineAsm()) {
     unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
@@ -1384,7 +1485,10 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
     OS << " = ";
 
   // Print the opcode name.
-  OS << getDesc().getName();
+  if (TM && TM->getInstrInfo())
+    OS << TM->getInstrInfo()->getName(getOpcode());
+  else
+    OS << "UNKNOWN";
 
   // Print the rest of the operands.
   bool OmittedAnyCallClobbers = false;
@@ -1419,14 +1523,14 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
     // call instructions much less noisy on targets where calls clobber lots
     // of registers. Don't rely on MO.isDead() because we may be called before
     // LiveVariables is run, or we may be looking at a non-allocatable reg.
-    if (MF && getDesc().isCall() &&
+    if (MF && isCall() &&
         MO.isReg() && MO.isImplicit() && MO.isDef()) {
       unsigned Reg = MO.getReg();
       if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
         const MachineRegisterInfo &MRI = MF->getRegInfo();
         if (MRI.use_empty(Reg) && !MRI.isLiveOut(Reg)) {
           bool HasAliasLive = false;
-          for (const unsigned *Alias = TM->getRegisterInfo()->getAliasSet(Reg);
+          for (const uint16_t *Alias = TM->getRegisterInfo()->getAliasSet(Reg);
                unsigned AliasReg = *Alias; ++Alias)
             if (!MRI.use_empty(AliasReg) || MRI.isLiveOut(AliasReg)) {
               HasAliasLive = true;
@@ -1617,6 +1721,20 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg,
   return Found;
 }
 
+void MachineInstr::clearRegisterKills(unsigned Reg,
+                                      const TargetRegisterInfo *RegInfo) {
+  if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+    RegInfo = 0;
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = getOperand(i);
+    if (!MO.isReg() || !MO.isUse() || !MO.isKill())
+      continue;
+    unsigned OpReg = MO.getReg();
+    if (OpReg == Reg || (RegInfo && RegInfo->isSuperRegister(Reg, OpReg)))
+      MO.setIsKill(false);
+  }
+}
+
 bool MachineInstr::addRegisterDead(unsigned IncomingReg,
                                    const TargetRegisterInfo *RegInfo,
                                    bool AddIfNotFound) {
@@ -1689,16 +1807,21 @@ void MachineInstr::addRegisterDefined(unsigned IncomingReg,
                                        true  /*IsImp*/));
 }
 
-void MachineInstr::setPhysRegsDeadExcept(const SmallVectorImpl<unsigned> &UsedRegs,
+void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs,
                                          const TargetRegisterInfo &TRI) {
+  bool HasRegMask = false;
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
     MachineOperand &MO = getOperand(i);
+    if (MO.isRegMask()) {
+      HasRegMask = true;
+      continue;
+    }
     if (!MO.isReg() || !MO.isDef()) continue;
     unsigned Reg = MO.getReg();
-    if (Reg == 0) continue;
+    if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
     bool Dead = true;
-    for (SmallVectorImpl<unsigned>::const_iterator I = UsedRegs.begin(),
-         E = UsedRegs.end(); I != E; ++I)
+    for (ArrayRef<unsigned>::iterator I = UsedRegs.begin(), E = UsedRegs.end();
+         I != E; ++I)
       if (TRI.regsOverlap(*I, Reg)) {
         Dead = false;
         break;
@@ -1706,53 +1829,66 @@ void MachineInstr::setPhysRegsDeadExcept(const SmallVectorImpl<unsigned> &UsedRe
     // If there are no uses, including partial uses, the def is dead.
     if (Dead) MO.setIsDead();
   }
+
+  // This is a call with a register mask operand.
+  // Mask clobbers are always dead, so add defs for the non-dead defines.
+  if (HasRegMask)
+    for (ArrayRef<unsigned>::iterator I = UsedRegs.begin(), E = UsedRegs.end();
+         I != E; ++I)
+      addRegisterDefined(*I, &TRI);
 }
 
 unsigned
 MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
-  unsigned Hash = MI->getOpcode() * 37;
+  // Build up a buffer of hash code components.
+  //
+  // FIXME: This is a total hack. We should have a hash_value overload for
+  // MachineOperand, but currently that doesn't work because there are many
+  // different ideas of "equality" and thus different sets of information that
+  // contribute to the hash code. This one happens to want to take a specific
+  // subset. And it's still not clear that this routine uses the *correct*
+  // subset of information when computing the hash code. The goal is to use the
+  // same inputs for the hash code here that MachineInstr::isIdenticalTo uses to
+  // test for equality when passed the 'IgnoreVRegDefs' filter flag. It would
+  // be very useful to factor the selection of relevant inputs out of the two
+  // functions and into a common routine, but it's not clear how that can be
+  // done.
+  SmallVector<size_t, 8> HashComponents;
+  HashComponents.reserve(MI->getNumOperands() + 1);
+  HashComponents.push_back(MI->getOpcode());
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
-    uint64_t Key = (uint64_t)MO.getType() << 32;
     switch (MO.getType()) {
     default: break;
     case MachineOperand::MO_Register:
       if (MO.isDef() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
         continue;  // Skip virtual register defs.
-      Key |= MO.getReg();
+      HashComponents.push_back(hash_combine(MO.getType(), MO.getReg()));
       break;
     case MachineOperand::MO_Immediate:
-      Key |= MO.getImm();
+      HashComponents.push_back(hash_combine(MO.getType(), MO.getImm()));
       break;
     case MachineOperand::MO_FrameIndex:
     case MachineOperand::MO_ConstantPoolIndex:
     case MachineOperand::MO_JumpTableIndex:
-      Key |= MO.getIndex();
+      HashComponents.push_back(hash_combine(MO.getType(), MO.getIndex()));
       break;
     case MachineOperand::MO_MachineBasicBlock:
-      Key |= DenseMapInfo<void*>::getHashValue(MO.getMBB());
+      HashComponents.push_back(hash_combine(MO.getType(), MO.getMBB()));
       break;
     case MachineOperand::MO_GlobalAddress:
-      Key |= DenseMapInfo<void*>::getHashValue(MO.getGlobal());
+      HashComponents.push_back(hash_combine(MO.getType(), MO.getGlobal()));
       break;
     case MachineOperand::MO_BlockAddress:
-      Key |= DenseMapInfo<void*>::getHashValue(MO.getBlockAddress());
+      HashComponents.push_back(hash_combine(MO.getType(),
+                                            MO.getBlockAddress()));
       break;
     case MachineOperand::MO_MCSymbol:
-      Key |= DenseMapInfo<void*>::getHashValue(MO.getMCSymbol());
+      HashComponents.push_back(hash_combine(MO.getType(), MO.getMCSymbol()));
       break;
     }
-    Key += ~(Key << 32);
-    Key ^= (Key >> 22);
-    Key += ~(Key << 13);
-    Key ^= (Key >> 8);
-    Key += (Key << 3);
-    Key ^= (Key >> 15);
-    Key += ~(Key << 27);
-    Key ^= (Key >> 31);
-    Hash = (unsigned)Key + Hash * 37;
-  }
-  return Hash;
+  }
+  return hash_combine_range(HashComponents.begin(), HashComponents.end());
 }
 
 void MachineInstr::emitError(StringRef Msg) const {
diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp
new file mode 100644
index 000000000000..73489a7160bf
--- /dev/null
+++ b/lib/CodeGen/MachineInstrBundle.cpp
@@ -0,0 +1,278 @@
+//===-- lib/CodeGen/MachineInstrBundle.cpp --------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+namespace {
+  class UnpackMachineBundles : public MachineFunctionPass {
+  public:
+    static char ID; // Pass identification
+    UnpackMachineBundles() : MachineFunctionPass(ID) {
+      initializeUnpackMachineBundlesPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+  };
+} // end anonymous namespace
+
+char UnpackMachineBundles::ID = 0;
+char &llvm::UnpackMachineBundlesID = UnpackMachineBundles::ID;
+INITIALIZE_PASS(UnpackMachineBundles, "unpack-mi-bundles",
+                "Unpack machine instruction bundles", false, false)
+
+bool UnpackMachineBundles::runOnMachineFunction(MachineFunction &MF) {
+  bool Changed = false;
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+    MachineBasicBlock *MBB = &*I;
+
+    for (MachineBasicBlock::instr_iterator MII = MBB->instr_begin(),
+           MIE = MBB->instr_end(); MII != MIE; ) {
+      MachineInstr *MI = &*MII;
+
+      // Remove BUNDLE instruction and the InsideBundle flags from bundled
+      // instructions.
+      if (MI->isBundle()) {
+        while (++MII != MIE && MII->isInsideBundle()) {
+          MII->setIsInsideBundle(false);
+          for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
+            MachineOperand &MO = MII->getOperand(i);
+            if (MO.isReg() && MO.isInternalRead())
+              MO.setIsInternalRead(false);
+          }
+        }
+        MI->eraseFromParent();
+
+        Changed = true;
+        continue;
+      }
+
+      ++MII;
+    }
+  }
+
+  return Changed;
+}
+
+
+namespace {
+  class FinalizeMachineBundles : public MachineFunctionPass {
+  public:
+    static char ID; // Pass identification
+    FinalizeMachineBundles() : MachineFunctionPass(ID) {
+      initializeFinalizeMachineBundlesPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+  };
+} // end anonymous namespace
+
+char FinalizeMachineBundles::ID = 0;
+char &llvm::FinalizeMachineBundlesID = FinalizeMachineBundles::ID;
+INITIALIZE_PASS(FinalizeMachineBundles, "finalize-mi-bundles",
+                "Finalize machine instruction bundles", false, false)
+
+bool FinalizeMachineBundles::runOnMachineFunction(MachineFunction &MF) {
+  return llvm::finalizeBundles(MF);
+}
+
+
+/// finalizeBundle - Finalize a machine instruction bundle which includes
+/// a sequence of instructions starting from FirstMI to LastMI (exclusive).
+/// This routine adds a BUNDLE instruction to represent the bundle, it adds
+/// IsInternalRead markers to MachineOperands which are defined inside the
+/// bundle, and it copies externally visible defs and uses to the BUNDLE
+/// instruction.
+void llvm::finalizeBundle(MachineBasicBlock &MBB,
+                          MachineBasicBlock::instr_iterator FirstMI,
+                          MachineBasicBlock::instr_iterator LastMI) {
+  assert(FirstMI != LastMI && "Empty bundle?");
+
+  const TargetMachine &TM = MBB.getParent()->getTarget();
+  const TargetInstrInfo *TII = TM.getInstrInfo();
+  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
+  MachineInstrBuilder MIB = BuildMI(MBB, FirstMI, FirstMI->getDebugLoc(),
+                                    TII->get(TargetOpcode::BUNDLE));
+
+  SmallVector<unsigned, 8> LocalDefs;
+  SmallSet<unsigned, 8> LocalDefSet;
+  SmallSet<unsigned, 8> DeadDefSet;
+  SmallSet<unsigned, 8> KilledDefSet;
+  SmallVector<unsigned, 8> ExternUses;
+  SmallSet<unsigned, 8> ExternUseSet;
+  SmallSet<unsigned, 8> KilledUseSet;
+  SmallSet<unsigned, 8> UndefUseSet;
+  SmallVector<MachineOperand*, 4> Defs;
+  for (; FirstMI != LastMI; ++FirstMI) {
+    for (unsigned i = 0, e = FirstMI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = FirstMI->getOperand(i);
+      if (!MO.isReg())
+        continue;
+      if (MO.isDef()) {
+        Defs.push_back(&MO);
+        continue;
+      }
+
+      unsigned Reg = MO.getReg();
+      if (!Reg)
+        continue;
+      assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+      if (LocalDefSet.count(Reg)) {
+        MO.setIsInternalRead();
+        if (MO.isKill())
+          // Internal def is now killed.
+          KilledDefSet.insert(Reg);
+      } else {
+        if (ExternUseSet.insert(Reg)) {
+          ExternUses.push_back(Reg);
+          if (MO.isUndef())
+            UndefUseSet.insert(Reg);
+        }
+        if (MO.isKill())
+          // External def is now killed.
+          KilledUseSet.insert(Reg);
+      }
+    }
+
+    for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+      MachineOperand &MO = *Defs[i];
+      unsigned Reg = MO.getReg();
+      if (!Reg)
+        continue;
+
+      if (LocalDefSet.insert(Reg)) {
+        LocalDefs.push_back(Reg);
+        if (MO.isDead()) {
+          DeadDefSet.insert(Reg);
+        }
+      } else {
+        // Re-defined inside the bundle, it's no longer killed.
+        KilledDefSet.erase(Reg);
+        if (!MO.isDead())
+          // Previously defined but dead.
+          DeadDefSet.erase(Reg);
+      }
+
+      if (!MO.isDead()) {
+        for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
+             unsigned SubReg = *SubRegs; ++SubRegs) {
+          if (LocalDefSet.insert(SubReg))
+            LocalDefs.push_back(SubReg);
+        }
+      }
+    }
+
+    FirstMI->setIsInsideBundle();
+    Defs.clear();
+  }
+
+  SmallSet<unsigned, 8> Added;
+  for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
+    unsigned Reg = LocalDefs[i];
+    if (Added.insert(Reg)) {
+      // If it's not live beyond end of the bundle, mark it dead.
+      bool isDead = DeadDefSet.count(Reg) || KilledDefSet.count(Reg);
+      MIB.addReg(Reg, getDefRegState(true) | getDeadRegState(isDead) |
+                 getImplRegState(true));
+    }
+  }
+
+  for (unsigned i = 0, e = ExternUses.size(); i != e; ++i) {
+    unsigned Reg = ExternUses[i];
+    bool isKill = KilledUseSet.count(Reg);
+    bool isUndef = UndefUseSet.count(Reg);
+    MIB.addReg(Reg, getKillRegState(isKill) | getUndefRegState(isUndef) |
+               getImplRegState(true));
+  }
+}
+
+/// finalizeBundle - Same functionality as the previous finalizeBundle except
+/// the last instruction in the bundle is not provided as an input. This is
+/// used in cases where bundles are pre-determined by marking instructions
+/// with 'InsideBundle' marker. It returns the MBB instruction iterator that
+/// points to the end of the bundle.
+MachineBasicBlock::instr_iterator
+llvm::finalizeBundle(MachineBasicBlock &MBB,
+                     MachineBasicBlock::instr_iterator FirstMI) {
+  MachineBasicBlock::instr_iterator E = MBB.instr_end();
+  MachineBasicBlock::instr_iterator LastMI = llvm::next(FirstMI);
+  while (LastMI != E && LastMI->isInsideBundle())
+    ++LastMI;
+  finalizeBundle(MBB, FirstMI, LastMI);
+  return LastMI;
+}
+
+/// finalizeBundles - Finalize instruction bundles in the specified
+/// MachineFunction. Return true if any bundles are finalized.
+bool llvm::finalizeBundles(MachineFunction &MF) {
+  bool Changed = false;
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+    MachineBasicBlock &MBB = *I;
+
+    MachineBasicBlock::instr_iterator MII = MBB.instr_begin();
+    assert(!MII->isInsideBundle() &&
+           "First instr cannot be inside bundle before finalization!");
+
+    MachineBasicBlock::instr_iterator MIE = MBB.instr_end();
+    if (MII == MIE)
+      continue;
+    for (++MII; MII != MIE; ) {
+      if (!MII->isInsideBundle())
+        ++MII;
+      else {
+        MII = finalizeBundle(MBB, llvm::prior(MII));
+        Changed = true;
+      }
+    }
+  }
+
+  return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// MachineOperand iterator
+//===----------------------------------------------------------------------===//
+
+MachineOperandIteratorBase::RegInfo
+MachineOperandIteratorBase::analyzeVirtReg(unsigned Reg,
+                    SmallVectorImpl<std::pair<MachineInstr*, unsigned> > *Ops) {
+  RegInfo RI = { false, false, false };
+  for(; isValid(); ++*this) {
+    MachineOperand &MO = deref();
+    if (!MO.isReg() || MO.getReg() != Reg)
+      continue;
+
+    // Remember each (MI, OpNo) that refers to Reg.
+    if (Ops)
+      Ops->push_back(std::make_pair(MO.getParent(), getOperandNo()));
+
+    // Both defs and uses can read virtual registers.
+    if (MO.readsReg()) {
+      RI.Reads = true;
+      if (MO.isDef())
+        RI.Tied = true;
+    }
+
+    // Only defs can write.
+    if (MO.isDef())
+      RI.Writes = true;
+    else if (!RI.Tied && MO.getParent()->isRegTiedToDefOperand(getOperandNo()))
+      RI.Tied = true;
+  }
+  return RI;
+}
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index a1f80d5282e0..8c562cc4454a 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -45,7 +45,7 @@ using namespace llvm;
 static cl::opt<bool>
 AvoidSpeculation("avoid-speculation",
                  cl::desc("MachineLICM should avoid speculation"),
-                 cl::init(false), cl::Hidden);
+                 cl::init(true), cl::Hidden);
 
 STATISTIC(NumHoisted,
           "Number of machine instructions hoisted out of loops");
@@ -60,8 +60,6 @@ STATISTIC(NumPostRAHoisted,
 
 namespace {
   class MachineLICM : public MachineFunctionPass {
-    bool PreRegAlloc;
-
     const TargetMachine   *TM;
     const TargetInstrInfo *TII;
     const TargetLowering *TLI;
@@ -69,6 +67,7 @@ namespace {
     const MachineFrameInfo *MFI;
     MachineRegisterInfo *MRI;
     const InstrItineraryData *InstrItins;
+    bool PreRegAlloc;
 
     // Various analyses that we use...
     AliasAnalysis        *AA;      // Alias analysis info.
@@ -81,7 +80,13 @@ namespace {
     MachineLoop *CurLoop;          // The current loop we are working on.
     MachineBasicBlock *CurPreheader; // The preheader for CurLoop.
 
-    BitVector AllocatableSet;
+    // Exit blocks for CurLoop.
+    SmallVector<MachineBasicBlock*, 8> ExitBlocks;
+
+    bool isExitBlock(const MachineBasicBlock *MBB) const {
+      return std::find(ExitBlocks.begin(), ExitBlocks.end(), MBB) !=
+        ExitBlocks.end();
+    }
 
     // Track 'estimated' register pressure.
     SmallSet<unsigned, 32> RegSeen;
@@ -122,8 +127,6 @@ namespace {
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
 
-    const char *getPassName() const { return "Machine Instruction LICM"; }
-
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequired<MachineLoopInfo>();
       AU.addRequired<MachineDominatorTree>();
@@ -165,7 +168,9 @@ namespace {
 
     /// ProcessMI - Examine the instruction for potentai LICM candidate. Also
     /// gather register def and frame object update information.
-    void ProcessMI(MachineInstr *MI, unsigned *PhysRegDefs,
+    void ProcessMI(MachineInstr *MI,
+                   BitVector &PhysRegDefs,
+                   BitVector &PhysRegClobbers,
                    SmallSet<int, 32> &StoredFIs,
                    SmallVector<CandidateInfo, 32> &Candidates);
 
@@ -182,12 +187,12 @@ namespace {
     /// invariant. I.e., all virtual register operands are defined outside of
     /// the loop, physical registers aren't accessed (explicitly or implicitly),
     /// and the instruction is hoistable.
-    /// 
+    ///
     bool IsLoopInvariantInst(MachineInstr &I);
 
-    /// HasAnyPHIUse - Return true if the specified register is used by any
-    /// phi node.
-    bool HasAnyPHIUse(unsigned Reg) const;
+    /// HasLoopPHIUse - Return true if the specified instruction is used by any
+    /// phi node in the current loop.
+    bool HasLoopPHIUse(const MachineInstr *MI) const;
 
     /// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
     /// and an use in the current loop, return true if the target considered
@@ -200,7 +205,7 @@ namespace {
     /// CanCauseHighRegPressure - Visit BBs from header to current BB,
     /// check if hoisting an instruction of the given cost matrix can cause high
     /// register pressure.
-    bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost);
+    bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, bool Cheap);
 
     /// UpdateBackTraceRegPressure - Traverse the back trace from header to
     /// the current block and update their register pressures to reflect the
@@ -215,13 +220,25 @@ namespace {
     /// If not then a load from this mbb may not be safe to hoist.
     bool IsGuaranteedToExecute(MachineBasicBlock *BB);
 
-    /// HoistRegion - Walk the specified region of the CFG (defined by all
-    /// blocks dominated by the specified block, and that are in the current
-    /// loop) in depth first order w.r.t the DominatorTree. This allows us to
-    /// visit definitions before uses, allowing us to hoist a loop body in one
-    /// pass without iteration.
+    void EnterScope(MachineBasicBlock *MBB);
+
+    void ExitScope(MachineBasicBlock *MBB);
+
+    /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to given
+    /// dominator tree node if its a leaf or all of its children are done. Walk
+    /// up the dominator tree to destroy ancestors which are now done.
+    void ExitScopeIfDone(MachineDomTreeNode *Node,
+                DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
+                DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap);
+
+    /// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all
+    /// blocks dominated by the specified header block, and that are in the
+    /// current loop) in depth first order w.r.t the DominatorTree. This allows
+    /// us to visit definitions before uses, allowing us to hoist a loop body in
+    /// one pass without iteration.
     ///
-    void HoistRegion(MachineDomTreeNode *N, bool IsHeader = false);
+    void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode);
+    void HoistRegion(MachineDomTreeNode *N, bool IsHeader);
 
     /// getRegisterClassIDAndCost - For a given MI, register, and the operand
     /// index, return the ID and cost of its representative register class by
@@ -278,6 +295,7 @@ namespace {
 } // end anonymous namespace
 
 char MachineLICM::ID = 0;
+char &llvm::MachineLICMID = MachineLICM::ID;
 INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm",
                 "Machine Loop Invariant Code Motion", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
@@ -286,10 +304,6 @@ INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_END(MachineLICM, "machinelicm",
                 "Machine Loop Invariant Code Motion", false, false)
 
-FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) {
-  return new MachineLICM(PreRegAlloc);
-}
-
 /// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most
 /// loop that has a unique predecessor.
 static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
@@ -305,12 +319,6 @@ static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
 }
 
 bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
-  if (PreRegAlloc)
-    DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
-  else
-    DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
-  DEBUG(dbgs() << MF.getFunction()->getName() << " ********\n");
-
   Changed = FirstInLoop = false;
   TM = &MF.getTarget();
   TII = TM->getInstrInfo();
@@ -319,7 +327,14 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
   MFI = MF.getFrameInfo();
   MRI = &MF.getRegInfo();
   InstrItins = TM->getInstrItineraryData();
-  AllocatableSet = TRI->getAllocatableSet(MF);
+
+  PreRegAlloc = MRI->isSSA();
+
+  if (PreRegAlloc)
+    DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
+  else
+    DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
+  DEBUG(dbgs() << MF.getFunction()->getName() << " ********\n");
 
   if (PreRegAlloc) {
     // Estimate register pressure during pre-regalloc pass.
@@ -341,6 +356,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
   while (!Worklist.empty()) {
     CurLoop = Worklist.pop_back_val();
     CurPreheader = 0;
+    ExitBlocks.clear();
 
     // If this is done before regalloc, only visit outer-most preheader-sporting
     // loops.
@@ -349,6 +365,8 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
       continue;
     }
 
+    CurLoop->getExitBlocks(ExitBlocks);
+
     if (!PreRegAlloc)
       HoistRegionPostRA();
     else {
@@ -356,7 +374,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
       // being hoisted.
       MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
       FirstInLoop = true;
-      HoistRegion(N, true);
+      HoistOutOfLoop(N);
       CSEMap.clear();
     }
   }
@@ -383,7 +401,8 @@ static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
 /// ProcessMI - Examine the instruction for potentai LICM candidate. Also
 /// gather register def and frame object update information.
 void MachineLICM::ProcessMI(MachineInstr *MI,
-                            unsigned *PhysRegDefs,
+                            BitVector &PhysRegDefs,
+                            BitVector &PhysRegClobbers,
                             SmallSet<int, 32> &StoredFIs,
                             SmallVector<CandidateInfo, 32> &Candidates) {
   bool RuledOut = false;
@@ -402,6 +421,13 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
       continue;
     }
 
+    // We can't hoist an instruction defining a physreg that is clobbered in
+    // the loop.
+    if (MO.isRegMask()) {
+      PhysRegClobbers.setBitsNotInMask(MO.getRegMask());
+      continue;
+    }
+
     if (!MO.isReg())
       continue;
     unsigned Reg = MO.getReg();
@@ -411,7 +437,7 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
            "Not expecting virtual register!");
 
     if (!MO.isDef()) {
-      if (Reg && PhysRegDefs[Reg])
+      if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg)))
         // If it's using a non-loop-invariant register, then it's obviously not
         // safe to hoist.
         HasNonInvariantUse = true;
@@ -419,9 +445,8 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
     }
 
     if (MO.isImplicit()) {
-      ++PhysRegDefs[Reg];
-      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
-        ++PhysRegDefs[*AS];
+      for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS)
+        PhysRegClobbers.set(*AS);
       if (!MO.isDead())
         // Non-dead implicit def? This cannot be hoisted.
         RuledOut = true;
@@ -438,14 +463,17 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
       Def = Reg;
 
     // If we have already seen another instruction that defines the same
-    // register, then this is not safe.
-    if (++PhysRegDefs[Reg] > 1)
-      // MI defined register is seen defined by another instruction in
-      // the loop, it cannot be a LICM candidate.
-      RuledOut = true;
-    for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
-      if (++PhysRegDefs[*AS] > 1)
+    // register, then this is not safe.  Two defs is indicated by setting a
+    // PhysRegClobbers bit.
+    for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS) {
+      if (PhysRegDefs.test(*AS))
+        PhysRegClobbers.set(*AS);
+      if (PhysRegClobbers.test(*AS))
+        // MI defined register is seen defined by another instruction in
+        // the loop, it cannot be a LICM candidate.
         RuledOut = true;
+      PhysRegDefs.set(*AS);
+    }
   }
 
   // Only consider reloads for now and remats which do not have register
@@ -461,9 +489,13 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
 /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
 /// invariants out to the preheader.
 void MachineLICM::HoistRegionPostRA() {
+  MachineBasicBlock *Preheader = getCurPreheader();
+  if (!Preheader)
+    return;
+
   unsigned NumRegs = TRI->getNumRegs();
-  unsigned *PhysRegDefs = new unsigned[NumRegs];
-  std::fill(PhysRegDefs, PhysRegDefs + NumRegs, 0);
+  BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop.
+  BitVector PhysRegClobbers(NumRegs); // Regs defined more than once.
 
   SmallVector<CandidateInfo, 32> Candidates;
   SmallSet<int, 32> StoredFIs;
@@ -485,16 +517,31 @@ void MachineLICM::HoistRegionPostRA() {
     for (MachineBasicBlock::livein_iterator I = BB->livein_begin(),
            E = BB->livein_end(); I != E; ++I) {
       unsigned Reg = *I;
-      ++PhysRegDefs[Reg];
-      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
-        ++PhysRegDefs[*AS];
+      for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS)
+        PhysRegDefs.set(*AS);
     }
 
     SpeculationState = SpeculateUnknown;
     for (MachineBasicBlock::iterator
            MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
       MachineInstr *MI = &*MII;
-      ProcessMI(MI, PhysRegDefs, StoredFIs, Candidates);
+      ProcessMI(MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates);
+    }
+  }
+
+  // Gather the registers read / clobbered by the terminator.
+  BitVector TermRegs(NumRegs);
+  MachineBasicBlock::iterator TI = Preheader->getFirstTerminator();
+  if (TI != Preheader->end()) {
+    for (unsigned i = 0, e = TI->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = TI->getOperand(i);
+      if (!MO.isReg())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (!Reg)
+        continue;
+      for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS)
+        TermRegs.set(*AS);
     }
   }
 
@@ -503,19 +550,25 @@ void MachineLICM::HoistRegionPostRA() {
   //    instruction in the loop.
   // 2. If the candidate is a load from stack slot (always true for now),
   //    check if the slot is stored anywhere in the loop.
+  // 3. Make sure candidate def should not clobber
+  //    registers read by the terminator. Similarly its def should not be
+  //    clobbered by the terminator.
   for (unsigned i = 0, e = Candidates.size(); i != e; ++i) {
     if (Candidates[i].FI != INT_MIN &&
         StoredFIs.count(Candidates[i].FI))
       continue;
 
-    if (PhysRegDefs[Candidates[i].Def] == 1) {
+    unsigned Def = Candidates[i].Def;
+    if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) {
       bool Safe = true;
       MachineInstr *MI = Candidates[i].MI;
       for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
         const MachineOperand &MO = MI->getOperand(j);
         if (!MO.isReg() || MO.isDef() || !MO.getReg())
           continue;
-        if (PhysRegDefs[MO.getReg()]) {
+        unsigned Reg = MO.getReg();
+        if (PhysRegDefs.test(Reg) ||
+            PhysRegClobbers.test(Reg)) {
           // If it's using a non-loop-invariant register, then it's obviously
           // not safe to hoist.
           Safe = false;
@@ -526,8 +579,6 @@ void MachineLICM::HoistRegionPostRA() {
         HoistPostRA(MI, Candidates[i].Def);
     }
   }
-
-  delete[] PhysRegDefs;
 }
 
 /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current
@@ -556,26 +607,17 @@ void MachineLICM::AddToLiveIns(unsigned Reg) {
 /// dirty work.
 void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
   MachineBasicBlock *Preheader = getCurPreheader();
-  if (!Preheader) return;
 
   // Now move the instructions to the predecessor, inserting it before any
   // terminator instructions.
-  DEBUG({
-      dbgs() << "Hoisting " << *MI;
-      if (Preheader->getBasicBlock())
-        dbgs() << " to MachineBasicBlock "
-               << Preheader->getName();
-      if (MI->getParent()->getBasicBlock())
-        dbgs() << " from MachineBasicBlock "
-               << MI->getParent()->getName();
-      dbgs() << "\n";
-    });
+  DEBUG(dbgs() << "Hoisting to BB#" << Preheader->getNumber() << " from BB#"
+               << MI->getParent()->getNumber() << ": " << *MI);
 
   // Splice the instruction to the preheader.
   MachineBasicBlock *MBB = MI->getParent();
   Preheader->splice(Preheader->getFirstTerminator(), MBB, MI);
 
-  // Add register to livein list to all the BBs in the current loop since a 
+  // Add register to livein list to all the BBs in the current loop since a
   // loop invariant must be kept live throughout the whole loop. This is
   // important to ensure later passes do not scavenge the def register.
   AddToLiveIns(Def);
@@ -589,7 +631,7 @@ void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
 bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) {
   if (SpeculationState != SpeculateUnknown)
     return SpeculationState == SpeculateFalse;
-    
+
   if (BB != CurLoop->getHeader()) {
     // Check loop exiting blocks.
     SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks;
@@ -605,57 +647,126 @@ bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) {
   return true;
 }
 
-/// HoistRegion - Walk the specified region of the CFG (defined by all blocks
-/// dominated by the specified block, and that are in the current loop) in depth
-/// first order w.r.t the DominatorTree. This allows us to visit definitions
-/// before uses, allowing us to hoist a loop body in one pass without iteration.
-///
-void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) {
-  assert(N != 0 && "Null dominator tree node?");
-  MachineBasicBlock *BB = N->getBlock();
+void MachineLICM::EnterScope(MachineBasicBlock *MBB) {
+  DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
 
-  // If the header of the loop containing this basic block is a landing pad,
-  // then don't try to hoist instructions out of this loop.
-  const MachineLoop *ML = MLI->getLoopFor(BB);
-  if (ML && ML->getHeader()->isLandingPad()) return;
+  // Remember livein register pressure.
+  BackTrace.push_back(RegPressure);
+}
 
-  // If this subregion is not in the top level loop at all, exit.
-  if (!CurLoop->contains(BB)) return;
+void MachineLICM::ExitScope(MachineBasicBlock *MBB) {
+  DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
+  BackTrace.pop_back();
+}
 
-  MachineBasicBlock *Preheader = getCurPreheader();
-  if (!Preheader)
+/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
+/// dominator tree node if its a leaf or all of its children are done. Walk
+/// up the dominator tree to destroy ancestors which are now done.
+void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node,
+                DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
+                DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) {
+  if (OpenChildren[Node])
     return;
 
-  if (IsHeader) {
+  // Pop scope.
+  ExitScope(Node->getBlock());
+
+  // Now traverse upwards to pop ancestors whose offsprings are all done.
+  while (MachineDomTreeNode *Parent = ParentMap[Node]) {
+    unsigned Left = --OpenChildren[Parent];
+    if (Left != 0)
+      break;
+    ExitScope(Parent->getBlock());
+    Node = Parent;
+  }
+}
+
+/// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all
+/// blocks dominated by the specified header block, and that are in the
+/// current loop) in depth first order w.r.t the DominatorTree. This allows
+/// us to visit definitions before uses, allowing us to hoist a loop body in
+/// one pass without iteration.
+///
+void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
+  SmallVector<MachineDomTreeNode*, 32> Scopes;
+  SmallVector<MachineDomTreeNode*, 8> WorkList;
+  DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
+  DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
+
+  // Perform a DFS walk to determine the order of visit.
+  WorkList.push_back(HeaderN);
+  do {
+    MachineDomTreeNode *Node = WorkList.pop_back_val();
+    assert(Node != 0 && "Null dominator tree node?");
+    MachineBasicBlock *BB = Node->getBlock();
+
+    // If the header of the loop containing this basic block is a landing pad,
+    // then don't try to hoist instructions out of this loop.
+    const MachineLoop *ML = MLI->getLoopFor(BB);
+    if (ML && ML->getHeader()->isLandingPad())
+      continue;
+
+    // If this subregion is not in the top level loop at all, exit.
+    if (!CurLoop->contains(BB))
+      continue;
+
+    Scopes.push_back(Node);
+    const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
+    unsigned NumChildren = Children.size();
+
+    // Don't hoist things out of a large switch statement.  This often causes
+    // code to be hoisted that wasn't going to be executed, and increases
+    // register pressure in a situation where it's likely to matter.
+    if (BB->succ_size() >= 25)
+      NumChildren = 0;
+
+    OpenChildren[Node] = NumChildren;
+    // Add children in reverse order as then the next popped worklist node is
+    // the first child of this node.  This means we ultimately traverse the
+    // DOM tree in exactly the same order as if we'd recursed.
+    for (int i = (int)NumChildren-1; i >= 0; --i) {
+      MachineDomTreeNode *Child = Children[i];
+      ParentMap[Child] = Node;
+      WorkList.push_back(Child);
+    }
+  } while (!WorkList.empty());
+
+  if (Scopes.size() != 0) {
+    MachineBasicBlock *Preheader = getCurPreheader();
+    if (!Preheader)
+      return;
+
     // Compute registers which are livein into the loop headers.
     RegSeen.clear();
     BackTrace.clear();
     InitRegPressure(Preheader);
   }
 
-  // Remember livein register pressure.
-  BackTrace.push_back(RegPressure);
+  // Now perform LICM.
+  for (unsigned i = 0, e = Scopes.size(); i != e; ++i) {
+    MachineDomTreeNode *Node = Scopes[i];
+    MachineBasicBlock *MBB = Node->getBlock();
 
-  SpeculationState = SpeculateUnknown;
-  for (MachineBasicBlock::iterator
-         MII = BB->begin(), E = BB->end(); MII != E; ) {
-    MachineBasicBlock::iterator NextMII = MII; ++NextMII;
-    MachineInstr *MI = &*MII;
-    if (!Hoist(MI, Preheader))
-      UpdateRegPressure(MI);
-    MII = NextMII;
-  }
+    MachineBasicBlock *Preheader = getCurPreheader();
+    if (!Preheader)
+      continue;
 
-  // Don't hoist things out of a large switch statement.  This often causes
-  // code to be hoisted that wasn't going to be executed, and increases
-  // register pressure in a situation where it's likely to matter.
-  if (BB->succ_size() < 25) {
-    const std::vector<MachineDomTreeNode*> &Children = N->getChildren();
-    for (unsigned I = 0, E = Children.size(); I != E; ++I)
-      HoistRegion(Children[I]);
-  }
+    EnterScope(MBB);
 
-  BackTrace.pop_back();
+    // Process the block
+    SpeculationState = SpeculateUnknown;
+    for (MachineBasicBlock::iterator
+         MII = MBB->begin(), E = MBB->end(); MII != E; ) {
+      MachineBasicBlock::iterator NextMII = MII; ++NextMII;
+      MachineInstr *MI = &*MII;
+      if (!Hoist(MI, Preheader))
+        UpdateRegPressure(MI);
+      MII = NextMII;
+    }
+
+    // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
+    ExitScopeIfDone(Node, OpenChildren, ParentMap);
+  }
 }
 
 static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
@@ -670,7 +781,7 @@ MachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI,
                                        unsigned &RCId, unsigned &RCCost) const {
   const TargetRegisterClass *RC = MRI->getRegClass(Reg);
   EVT VT = *RC->vt_begin();
-  if (VT == MVT::untyped) {
+  if (VT == MVT::Untyped) {
     RCId = RC->getID();
     RCCost = 1;
   } else {
@@ -678,7 +789,7 @@ MachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI,
     RCCost = TLI->getRepRegClassCostFor(VT);
   }
 }
-                                      
+
 /// InitRegPressure - Find all virtual register references that are liveout of
 /// the preheader to initialize the starting "register pressure". Note this
 /// does not count live through (livein but not used) registers.
@@ -762,6 +873,21 @@ void MachineLICM::UpdateRegPressure(const MachineInstr *MI) {
   }
 }
 
+/// isLoadFromGOTOrConstantPool - Return true if this machine instruction
+/// loads from global offset table or constant pool.
+static bool isLoadFromGOTOrConstantPool(MachineInstr &MI) {
+  assert (MI.mayLoad() && "Expected MI that loads!");
+  for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
+         E = MI.memoperands_end(); I != E; ++I) {
+    if (const Value *V = (*I)->getValue()) {
+      if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V))
+        if (PSV == PSV->getGOT() || PSV == PSV->getConstantPool())
+          return true;
+    }
+  }
+  return false;
+}
+
 /// IsLICMCandidate - Returns true if the instruction may be a suitable
 /// candidate for LICM. e.g. If the instruction is a call, then it's obviously
 /// not safe to hoist it.
@@ -773,9 +899,12 @@ bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
 
   // If it is load then check if it is guaranteed to execute by making sure that
   // it dominates all exiting blocks. If it doesn't, then there is a path out of
-  // the loop which does not execute this load, so we can't hoist it.
+  // the loop which does not execute this load, so we can't hoist it. Loads
+  // from constant memory are not safe to speculate all the time, for example
+  // indexed load from a jump table.
   // Stores and side effects are already checked by isSafeToMove.
-  if (I.getDesc().mayLoad() && !IsGuaranteedToExecute(I.getParent()))
+  if (I.mayLoad() && !isLoadFromGOTOrConstantPool(I) &&
+      !IsGuaranteedToExecute(I.getParent()))
     return false;
 
   return true;
@@ -785,7 +914,7 @@ bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
 /// invariant. I.e., all virtual register operands are defined outside of the
 /// loop, physical registers aren't accessed explicitly, and there are no side
 /// effects that aren't captured by the operands or other flags.
-/// 
+///
 bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
   if (!IsLICMCandidate(I))
     return false;
@@ -806,18 +935,8 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
         // If the physreg has no defs anywhere, it's just an ambient register
         // and we can freely move its uses. Alternatively, if it's allocatable,
         // it could get allocated to something with a def during allocation.
-        if (!MRI->def_empty(Reg))
-          return false;
-        if (AllocatableSet.test(Reg))
+        if (!MRI->isConstantPhysReg(Reg, *I.getParent()->getParent()))
           return false;
-        // Check for a def among the register's aliases too.
-        for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
-          unsigned AliasReg = *Alias;
-          if (!MRI->def_empty(AliasReg))
-            return false;
-          if (AllocatableSet.test(AliasReg))
-            return false;
-        }
         // Otherwise it's safe to move.
         continue;
       } else if (!MO.isDead()) {
@@ -847,22 +966,40 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
 }
 
 
-/// HasAnyPHIUse - Return true if the specified register is used by any
-/// phi node.
-bool MachineLICM::HasAnyPHIUse(unsigned Reg) const {
-  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
-         UE = MRI->use_end(); UI != UE; ++UI) {
-    MachineInstr *UseMI = &*UI;
-    if (UseMI->isPHI())
-      return true;
-    // Look pass copies as well.
-    if (UseMI->isCopy()) {
-      unsigned Def = UseMI->getOperand(0).getReg();
-      if (TargetRegisterInfo::isVirtualRegister(Def) &&
-          HasAnyPHIUse(Def))
-        return true;
+/// HasLoopPHIUse - Return true if the specified instruction is used by a
+/// phi node and hoisting it could cause a copy to be inserted.
+bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const {
+  SmallVector<const MachineInstr*, 8> Work(1, MI);
+  do {
+    MI = Work.pop_back_val();
+    for (ConstMIOperands MO(MI); MO.isValid(); ++MO) {
+      if (!MO->isReg() || !MO->isDef())
+        continue;
+      unsigned Reg = MO->getReg();
+      if (!TargetRegisterInfo::isVirtualRegister(Reg))
+        continue;
+      for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
+           UE = MRI->use_end(); UI != UE; ++UI) {
+        MachineInstr *UseMI = &*UI;
+        // A PHI may cause a copy to be inserted.
+        if (UseMI->isPHI()) {
+          // A PHI inside the loop causes a copy because the live range of Reg is
+          // extended across the PHI.
+          if (CurLoop->contains(UseMI))
+            return true;
+          // A PHI in an exit block can cause a copy to be inserted if the PHI
+          // has multiple predecessors in the loop with different values.
+          // For now, approximate by rejecting all exit blocks.
+          if (isExitBlock(UseMI->getParent()))
+            return true;
+          continue;
+        }
+        // Look past copies as well.
+        if (UseMI->isCopy() && CurLoop->contains(UseMI))
+          Work.push_back(UseMI);
+      }
     }
-  }
+  } while (!Work.empty());
   return false;
 }
 
@@ -903,7 +1040,7 @@ bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
 /// IsCheapInstruction - Return true if the instruction is marked "cheap" or
 /// the operand latency between its def and a use is one or less.
 bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
-  if (MI.getDesc().isAsCheapAsAMove() || MI.isCopyLike())
+  if (MI.isAsCheapAsAMove() || MI.isCopyLike())
     return true;
   if (!InstrItins || InstrItins->isEmpty())
     return false;
@@ -930,16 +1067,25 @@ bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
 /// CanCauseHighRegPressure - Visit BBs from header to current BB, check
 /// if hoisting an instruction of the given cost matrix can cause high
 /// register pressure.
-bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost) {
+bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost,
+                                          bool CheapInstr) {
   for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
        CI != CE; ++CI) {
-    if (CI->second <= 0) 
+    if (CI->second <= 0)
       continue;
 
     unsigned RCId = CI->first;
+    unsigned Limit = RegLimit[RCId];
+    int Cost = CI->second;
+
+    // Don't hoist cheap instructions if they would increase register pressure,
+    // even if we're under the limit.
+    if (CheapInstr)
+      return true;
+
     for (unsigned i = BackTrace.size(); i != 0; --i) {
       SmallVector<unsigned, 8> &RP = BackTrace[i-1];
-      if (RP[RCId] + CI->second >= RegLimit[RCId])
+      if (RP[RCId] + Cost >= Limit)
         return true;
     }
   }
@@ -999,87 +1145,95 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
   if (MI.isImplicitDef())
     return true;
 
-  // If the instruction is cheap, only hoist if it is re-materilizable. LICM
-  // will increase register pressure. It's probably not worth it if the
-  // instruction is cheap.
-  // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting
-  // these tend to help performance in low register pressure situation. The
-  // trade off is it may cause spill in high pressure situation. It will end up
-  // adding a store in the loop preheader. But the reload is no more expensive.
-  // The side benefit is these loads are frequently CSE'ed.
-  if (IsCheapInstruction(MI)) {
-    if (!TII->isTriviallyReMaterializable(&MI, AA))
-      return false;
-  } else {
-    // Estimate register pressure to determine whether to LICM the instruction.
-    // In low register pressure situation, we can be more aggressive about 
-    // hoisting. Also, favors hoisting long latency instructions even in
-    // moderately high pressure situation.
-    // FIXME: If there are long latency loop-invariant instructions inside the
-    // loop at this point, why didn't the optimizer's LICM hoist them?
-    DenseMap<unsigned, int> Cost;
-    for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
-      const MachineOperand &MO = MI.getOperand(i);
-      if (!MO.isReg() || MO.isImplicit())
-        continue;
-      unsigned Reg = MO.getReg();
-      if (!TargetRegisterInfo::isVirtualRegister(Reg))
-        continue;
+  // Besides removing computation from the loop, hoisting an instruction has
+  // these effects:
+  //
+  // - The value defined by the instruction becomes live across the entire
+  //   loop. This increases register pressure in the loop.
+  //
+  // - If the value is used by a PHI in the loop, a copy will be required for
+  //   lowering the PHI after extending the live range.
+  //
+  // - When hoisting the last use of a value in the loop, that value no longer
+  //   needs to be live in the loop. This lowers register pressure in the loop.
+
+  bool CheapInstr = IsCheapInstruction(MI);
+  bool CreatesCopy = HasLoopPHIUse(&MI);
+
+  // Don't hoist a cheap instruction if it would create a copy in the loop.
+  if (CheapInstr && CreatesCopy) {
+    DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI);
+    return false;
+  }
 
-      unsigned RCId, RCCost;
-      getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost);
-      if (MO.isDef()) {
-        if (HasHighOperandLatency(MI, i, Reg)) {
-          ++NumHighLatency;
-          return true;
-        }
+  // Rematerializable instructions should always be hoisted since the register
+  // allocator can just pull them down again when needed.
+  if (TII->isTriviallyReMaterializable(&MI, AA))
+    return true;
+
+  // Estimate register pressure to determine whether to LICM the instruction.
+  // In low register pressure situation, we can be more aggressive about
+  // hoisting. Also, favors hoisting long latency instructions even in
+  // moderately high pressure situation.
+  // Cheap instructions will only be hoisted if they don't increase register
+  // pressure at all.
+  // FIXME: If there are long latency loop-invariant instructions inside the
+  // loop at this point, why didn't the optimizer's LICM hoist them?
+  DenseMap<unsigned, int> Cost;
+  for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || MO.isImplicit())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
 
-        DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
-        if (CI != Cost.end())
-          CI->second += RCCost;
-        else
-          Cost.insert(std::make_pair(RCId, RCCost));
-      } else if (isOperandKill(MO, MRI)) {
-        // Is a virtual register use is a kill, hoisting it out of the loop
-        // may actually reduce register pressure or be register pressure
-        // neutral.
-        DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
-        if (CI != Cost.end())
-          CI->second -= RCCost;
-        else
-          Cost.insert(std::make_pair(RCId, -RCCost));
+    unsigned RCId, RCCost;
+    getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost);
+    if (MO.isDef()) {
+      if (HasHighOperandLatency(MI, i, Reg)) {
+        DEBUG(dbgs() << "Hoist High Latency: " << MI);
+        ++NumHighLatency;
+        return true;
       }
+      Cost[RCId] += RCCost;
+    } else if (isOperandKill(MO, MRI)) {
+      // Is a virtual register use is a kill, hoisting it out of the loop
+      // may actually reduce register pressure or be register pressure
+      // neutral.
+      Cost[RCId] -= RCCost;
     }
+  }
 
-    // Visit BBs from header to current BB, if hoisting this doesn't cause
-    // high register pressure, then it's safe to proceed.
-    if (!CanCauseHighRegPressure(Cost)) {
-      ++NumLowRP;
-      return true;
-    }
+  // Visit BBs from header to current BB, if hoisting this doesn't cause
+  // high register pressure, then it's safe to proceed.
+  if (!CanCauseHighRegPressure(Cost, CheapInstr)) {
+    DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI);
+    ++NumLowRP;
+    return true;
+  }
 
-    // Do not "speculate" in high register pressure situation. If an
-    // instruction is not guaranteed to be executed in the loop, it's best to be
-    // conservative.
-    if (AvoidSpeculation &&
-        (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI)))
-      return false;
+  // Don't risk increasing register pressure if it would create copies.
+  if (CreatesCopy) {
+    DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI);
+    return false;
+  }
 
-    // High register pressure situation, only hoist if the instruction is going to
-    // be remat'ed.
-    if (!TII->isTriviallyReMaterializable(&MI, AA) &&
-        !MI.isInvariantLoad(AA))
-      return false;
+  // Do not "speculate" in high register pressure situation. If an
+  // instruction is not guaranteed to be executed in the loop, it's best to be
+  // conservative.
+  if (AvoidSpeculation &&
+      (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) {
+    DEBUG(dbgs() << "Won't speculate: " << MI);
+    return false;
   }
 
-  // If result(s) of this instruction is used by PHIs outside of the loop, then
-  // don't hoist it if the instruction because it will introduce an extra copy.
-  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI.getOperand(i);
-    if (!MO.isReg() || !MO.isDef())
-      continue;
-    if (HasAnyPHIUse(MO.getReg()))
-      return false;
+  // High register pressure situation, only hoist if the instruction is going
+  // to be remat'ed.
+  if (!TII->isTriviallyReMaterializable(&MI, AA) &&
+      !MI.isInvariantLoad(AA)) {
+    DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
+    return false;
   }
 
   return true;
@@ -1087,7 +1241,7 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
 
 MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
   // Don't unfold simple loads.
-  if (MI->getDesc().canFoldAsLoad())
+  if (MI->canFoldAsLoad())
     return 0;
 
   // If not, we may be able to unfold a load and hoist that.
@@ -1123,8 +1277,9 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
   assert(NewMIs.size() == 2 &&
          "Unfolded a load into multiple instructions!");
   MachineBasicBlock *MBB = MI->getParent();
-  MBB->insert(MI, NewMIs[0]);
-  MBB->insert(MI, NewMIs[1]);
+  MachineBasicBlock::iterator Pos = MI;
+  MBB->insert(Pos, NewMIs[0]);
+  MBB->insert(Pos, NewMIs[1]);
   // If unfolding produced a load that wasn't loop-invariant or profitable to
   // hoist, discard the new instructions and bail.
   if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) {
@@ -1180,6 +1335,7 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI,
 
     // Replace virtual registers defined by MI by their counterparts defined
     // by Dup.
+    SmallVector<unsigned, 2> Defs;
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
       const MachineOperand &MO = MI->getOperand(i);
 
@@ -1190,11 +1346,33 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI,
              "Instructions with different phys regs are not identical!");
 
       if (MO.isReg() && MO.isDef() &&
-          !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
-        MRI->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
-        MRI->clearKillFlags(Dup->getOperand(i).getReg());
+          !TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+        Defs.push_back(i);
+    }
+
+    SmallVector<const TargetRegisterClass*, 2> OrigRCs;
+    for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+      unsigned Idx = Defs[i];
+      unsigned Reg = MI->getOperand(Idx).getReg();
+      unsigned DupReg = Dup->getOperand(Idx).getReg();
+      OrigRCs.push_back(MRI->getRegClass(DupReg));
+
+      if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) {
+        // Restore old RCs if more than one defs.
+        for (unsigned j = 0; j != i; ++j)
+          MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]);
+        return false;
       }
     }
+
+    for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+      unsigned Idx = Defs[i];
+      unsigned Reg = MI->getOperand(Idx).getReg();
+      unsigned DupReg = Dup->getOperand(Idx).getReg();
+      MRI->replaceRegWith(Reg, DupReg);
+      MRI->clearKillFlags(DupReg);
+    }
+
     MI->eraseFromParent();
     ++NumCSEed;
     return true;
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 80c4854238af..ea98b23c6d57 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -257,7 +257,7 @@ MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI,
   : ImmutablePass(ID), Context(MAI, MRI, MOFI),
     ObjFileMMI(0), CompactUnwindEncoding(0), CurCallSite(0), CallsEHReturn(0),
     CallsUnwindInit(0), DbgInfoAvailable(false),
-    CallsExternalVAFunctionWithFloatingPointArguments(false) {
+    UsesVAFloatArgument(false) {
   initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
   // Always emit some info, by default "no personality" info.
   Personalities.push_back(NULL);
@@ -268,9 +268,9 @@ MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI,
 MachineModuleInfo::MachineModuleInfo()
   : ImmutablePass(ID),
     Context(*(MCAsmInfo*)0, *(MCRegisterInfo*)0, (MCObjectFileInfo*)0) {
-  assert(0 && "This MachineModuleInfo constructor should never be called, MMI "
-         "should always be explicitly constructed by LLVMTargetMachine");
-  abort();
+  llvm_unreachable("This MachineModuleInfo constructor should never be called, "
+                   "MMI should always be explicitly constructed by "
+                   "LLVMTargetMachine");
 }
 
 MachineModuleInfo::~MachineModuleInfo() {
@@ -503,8 +503,7 @@ void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
 /// indexes.
 void MachineModuleInfo::setCallSiteLandingPad(MCSymbol *Sym,
                                               ArrayRef<unsigned> Sites) {
-  for (unsigned I = 0, E = Sites.size(); I != E; ++I)
-    LPadToCallSiteMap[Sym].push_back(Sites[I]);
+  LPadToCallSiteMap[Sym].append(Sites.begin(), Sites.end());
 }
 
 /// getTypeIDFor - Return the type id for the specified typeinfo.  This is
@@ -541,8 +540,7 @@ try_next:;
   // Add the new filter.
   int FilterID = -(1 + FilterIds.size());
   FilterIds.reserve(FilterIds.size() + TyIds.size() + 1);
-  for (unsigned I = 0, N = TyIds.size(); I != N; ++I)
-    FilterIds.push_back(TyIds[I]);
+  FilterIds.insert(FilterIds.end(), TyIds.begin(), TyIds.end());
   FilterEnds.push_back(FilterIds.size());
   FilterIds.push_back(0); // terminator
   return FilterID;
@@ -561,13 +559,13 @@ unsigned MachineModuleInfo::getPersonalityIndex() const {
   const Function* Personality = NULL;
 
   // Scan landing pads. If there is at least one non-NULL personality - use it.
-  for (unsigned i = 0; i != LandingPads.size(); ++i)
+  for (unsigned i = 0, e = LandingPads.size(); i != e; ++i)
     if (LandingPads[i].Personality) {
       Personality = LandingPads[i].Personality;
       break;
     }
 
-  for (unsigned i = 0; i < Personalities.size(); ++i) {
+  for (unsigned i = 0, e = Personalities.size(); i < e; ++i) {
     if (Personalities[i] == Personality)
       return i;
   }
diff --git a/lib/CodeGen/MachinePassRegistry.cpp b/lib/CodeGen/MachinePassRegistry.cpp
index 9f4ef1287803..58e067bcb9b2 100644
--- a/lib/CodeGen/MachinePassRegistry.cpp
+++ b/lib/CodeGen/MachinePassRegistry.cpp
@@ -16,6 +16,7 @@
 
 using namespace llvm;
 
+void MachinePassRegistryListener::anchor() { }
 
 /// Add - Adds a function pass to the registration list.
 ///
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 266ebf64a3fc..7ea151713a6d 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -18,11 +18,12 @@
 using namespace llvm;
 
 MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI)
-  : TRI(&TRI), IsSSA(true) {
+  : TRI(&TRI), IsSSA(true), TracksLiveness(true) {
   VRegInfo.reserve(256);
   RegAllocHints.reserve(256);
   UsedPhysRegs.resize(TRI.getNumRegs());
-  
+  UsedPhysRegMask.resize(TRI.getNumRegs());
+
   // Create the physreg use/def lists.
   PhysRegUseDefLists = new MachineOperand*[TRI.getNumRegs()];
   memset(PhysRegUseDefLists, 0, sizeof(MachineOperand*)*TRI.getNumRegs());
@@ -30,9 +31,7 @@ MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI)
 
 MachineRegisterInfo::~MachineRegisterInfo() {
 #ifndef NDEBUG
-  for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i)
-    assert(VRegInfo[TargetRegisterInfo::index2VirtReg(i)].second == 0 &&
-           "Vreg use list non-empty still?");
+  clearVirtRegs();
   for (unsigned i = 0, e = UsedPhysRegs.size(); i != e; ++i)
     assert(!PhysRegUseDefLists[i] &&
            "PhysRegUseDefLists has entries after all instructions are deleted");
@@ -76,12 +75,14 @@ MachineRegisterInfo::recomputeRegClass(unsigned Reg, const TargetMachine &TM) {
   // Accumulate constraints from all uses.
   for (reg_nodbg_iterator I = reg_nodbg_begin(Reg), E = reg_nodbg_end(); I != E;
        ++I) {
-    // TRI doesn't have accurate enough information to model this yet.
-    if (I.getOperand().getSubReg())
-      return false;
     const TargetRegisterClass *OpRC =
       I->getRegClassConstraint(I.getOperandNo(), TII, TRI);
-    if (OpRC)
+    if (unsigned SubIdx = I.getOperand().getSubReg()) {
+      if (OpRC)
+        NewRC = TRI->getMatchingSuperRegClass(NewRC, OpRC, SubIdx);
+      else
+        NewRC = TRI->getSubClassWithSubReg(NewRC, SubIdx);
+    } else if (OpRC)
       NewRC = TRI->getCommonSubClass(NewRC, OpRC);
     if (!NewRC || NewRC == OldRC)
       return false;
@@ -115,6 +116,16 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
   return Reg;
 }
 
+/// clearVirtRegs - Remove all virtual registers (after physreg assignment).
+void MachineRegisterInfo::clearVirtRegs() {
+#ifndef NDEBUG
+  for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i)
+    assert(VRegInfo[TargetRegisterInfo::index2VirtReg(i)].second == 0 &&
+           "Vreg use list non-empty still?");
+#endif
+  VRegInfo.clear();
+}
+
 /// HandleVRegListReallocation - We just added a virtual register to the
 /// VRegInfo info list and it reallocated.  Update the use/def lists info
 /// pointers.
@@ -150,9 +161,8 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) {
 /// form, so there should only be one definition.
 MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const {
   // Since we are in SSA form, we can use the first definition.
-  if (!def_empty(Reg))
-    return &*def_begin(Reg);
-  return 0;
+  def_iterator I = def_begin(Reg);
+  return !I.atEnd() ? &*I : 0;
 }
 
 bool MachineRegisterInfo::hasOneUse(unsigned RegNo) const {
@@ -242,18 +252,31 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB,
     }
 }
 
-void MachineRegisterInfo::closePhysRegsUsed(const TargetRegisterInfo &TRI) {
-  for (int i = UsedPhysRegs.find_first(); i >= 0;
-       i = UsedPhysRegs.find_next(i))
-         for (const unsigned *SS = TRI.getSubRegisters(i);
-              unsigned SubReg = *SS; ++SS)
-           if (SubReg > unsigned(i))
-             UsedPhysRegs.set(SubReg);
-}
-
 #ifndef NDEBUG
 void MachineRegisterInfo::dumpUses(unsigned Reg) const {
   for (use_iterator I = use_begin(Reg), E = use_end(); I != E; ++I)
     I.getOperand().getParent()->dump();
 }
 #endif
+
+void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) {
+  ReservedRegs = TRI->getReservedRegs(MF);
+}
+
+bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg,
+                                            const MachineFunction &MF) const {
+  assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
+
+  // Check if any overlapping register is modified.
+  for (const uint16_t *R = TRI->getOverlaps(PhysReg); *R; ++R)
+    if (!def_empty(*R))
+      return false;
+
+  // Check if any overlapping register is allocatable so it may be used later.
+  if (AllocatableRegs.empty())
+    AllocatableRegs = TRI->getAllocatableSet(MF);
+  for (const uint16_t *R = TRI->getOverlaps(PhysReg); *R; ++R)
+    if (AllocatableRegs.test(*R))
+      return false;
+  return true;
+}
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index 84d6df25397c..070a55704dc5 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -81,7 +81,7 @@ unsigned LookForIdenticalPHI(MachineBasicBlock *BB,
   if (BB->empty())
     return 0;
 
-  MachineBasicBlock::iterator I = BB->front();
+  MachineBasicBlock::iterator I = BB->begin();
   if (!I->isPHI())
     return 0;
 
@@ -182,7 +182,7 @@ unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
     return DupPHI;
 
   // Otherwise, we do need a PHI: insert one now.
-  MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
+  MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin();
   MachineInstr *InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB,
                                            Loc, VRC, MRI, TII);
 
@@ -214,7 +214,6 @@ MachineBasicBlock *findCorrespondingPred(const MachineInstr *MI,
   }
 
   llvm_unreachable("MachineOperand::getParent() failure?");
-  return 0;
 }
 
 /// RewriteUse - Rewrite a use of the symbolic value.  This handles PHI nodes,
@@ -311,7 +310,7 @@ public:
   /// Add it into the specified block and return the register.
   static unsigned CreateEmptyPHI(MachineBasicBlock *BB, unsigned NumPreds,
                                  MachineSSAUpdater *Updater) {
-    MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
+    MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin();
     MachineInstr *PHI = InsertNewDef(TargetOpcode::PHI, BB, Loc,
                                      Updater->VRC, Updater->MRI,
                                      Updater->TII);
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
new file mode 100644
index 000000000000..1d3241b8cc6b
--- /dev/null
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -0,0 +1,614 @@
+//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// MachineScheduler schedules machine instructions after phi elimination. It
+// preserves LiveIntervals so it can be invoked before register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "misched"
+
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/PriorityQueue.h"
+
+#include <queue>
+
+using namespace llvm;
+
+static cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
+                                  cl::desc("Force top-down list scheduling"));
+static cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
+                                  cl::desc("Force bottom-up list scheduling"));
+
+#ifndef NDEBUG
+static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
+  cl::desc("Pop up a window to show MISched dags after they are processed"));
+
+static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
+  cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
+#else
+static bool ViewMISchedDAGs = false;
+#endif // NDEBUG
+
+//===----------------------------------------------------------------------===//
+// Machine Instruction Scheduling Pass and Registry
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// MachineScheduler runs after coalescing and before register allocation.
+class MachineScheduler : public MachineSchedContext,
+                         public MachineFunctionPass {
+public:
+  MachineScheduler();
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+  virtual void releaseMemory() {}
+
+  virtual bool runOnMachineFunction(MachineFunction&);
+
+  virtual void print(raw_ostream &O, const Module* = 0) const;
+
+  static char ID; // Class identification, replacement for typeinfo
+};
+} // namespace
+
+char MachineScheduler::ID = 0;
+
+char &llvm::MachineSchedulerID = MachineScheduler::ID;
+
+INITIALIZE_PASS_BEGIN(MachineScheduler, "misched",
+                      "Machine Instruction Scheduler", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(MachineScheduler, "misched",
+                    "Machine Instruction Scheduler", false, false)
+
+MachineScheduler::MachineScheduler()
+: MachineFunctionPass(ID) {
+  initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
+}
+
+void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addRequiredID(MachineDominatorsID);
+  AU.addRequired<MachineLoopInfo>();
+  AU.addRequired<AliasAnalysis>();
+  AU.addRequired<TargetPassConfig>();
+  AU.addRequired<SlotIndexes>();
+  AU.addPreserved<SlotIndexes>();
+  AU.addRequired<LiveIntervals>();
+  AU.addPreserved<LiveIntervals>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachinePassRegistry MachineSchedRegistry::Registry;
+
+/// A dummy default scheduler factory indicates whether the scheduler
+/// is overridden on the command line.
+static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
+  return 0;
+}
+
+/// MachineSchedOpt allows command line selection of the scheduler.
+static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
+               RegisterPassParser<MachineSchedRegistry> >
+MachineSchedOpt("misched",
+                cl::init(&useDefaultMachineSched), cl::Hidden,
+                cl::desc("Machine instruction scheduler to use"));
+
+static MachineSchedRegistry
+DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
+                     useDefaultMachineSched);
+
+/// Forward declare the standard machine scheduler. This will be used as the
+/// default scheduler if the target does not set a default.
+static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C);
+
+/// Top-level MachineScheduler pass driver.
+///
+/// Visit blocks in function order. Divide each block into scheduling regions
+/// and visit them bottom-up. Visiting regions bottom-up is not required, but is
+/// consistent with the DAG builder, which traverses the interior of the
+/// scheduling regions bottom-up.
+///
+/// This design avoids exposing scheduling boundaries to the DAG builder,
+/// simplifying the DAG builder's support for "special" target instructions.
+/// At the same time the design allows target schedulers to operate across
+/// scheduling boundaries, for example to bundle the boudary instructions
+/// without reordering them. This creates complexity, because the target
+/// scheduler must update the RegionBegin and RegionEnd positions cached by
+/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
+/// design would be to split blocks at scheduling boundaries, but LLVM has a
+/// general bias against block splitting purely for implementation simplicity.
+bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
+  // Initialize the context of the pass.
+  MF = &mf;
+  MLI = &getAnalysis<MachineLoopInfo>();
+  MDT = &getAnalysis<MachineDominatorTree>();
+  PassConfig = &getAnalysis<TargetPassConfig>();
+  AA = &getAnalysis<AliasAnalysis>();
+
+  LIS = &getAnalysis<LiveIntervals>();
+  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+
+  // Select the scheduler, or set the default.
+  MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
+  if (Ctor == useDefaultMachineSched) {
+    // Get the default scheduler set by the target.
+    Ctor = MachineSchedRegistry::getDefault();
+    if (!Ctor) {
+      Ctor = createConvergingSched;
+      MachineSchedRegistry::setDefault(Ctor);
+    }
+  }
+  // Instantiate the selected scheduler.
+  OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this));
+
+  // Visit all machine basic blocks.
+  for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
+       MBB != MBBEnd; ++MBB) {
+
+    Scheduler->startBlock(MBB);
+
+    // Break the block into scheduling regions [I, RegionEnd), and schedule each
+    // region as soon as it is discovered. RegionEnd points the the scheduling
+    // boundary at the bottom of the region. The DAG does not include RegionEnd,
+    // but the region does (i.e. the next RegionEnd is above the previous
+    // RegionBegin). If the current block has no terminator then RegionEnd ==
+    // MBB->end() for the bottom region.
+    //
+    // The Scheduler may insert instructions during either schedule() or
+    // exitRegion(), even for empty regions. So the local iterators 'I' and
+    // 'RegionEnd' are invalid across these calls.
+    unsigned RemainingCount = MBB->size();
+    for(MachineBasicBlock::iterator RegionEnd = MBB->end();
+        RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) {
+      // Avoid decrementing RegionEnd for blocks with no terminator.
+      if (RegionEnd != MBB->end()
+          || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) {
+        --RegionEnd;
+        // Count the boundary instruction.
+        --RemainingCount;
+      }
+
+      // The next region starts above the previous region. Look backward in the
+      // instruction stream until we find the nearest boundary.
+      MachineBasicBlock::iterator I = RegionEnd;
+      for(;I != MBB->begin(); --I, --RemainingCount) {
+        if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF))
+          break;
+      }
+      // Notify the scheduler of the region, even if we may skip scheduling
+      // it. Perhaps it still needs to be bundled.
+      Scheduler->enterRegion(MBB, I, RegionEnd, RemainingCount);
+
+      // Skip empty scheduling regions (0 or 1 schedulable instructions).
+      if (I == RegionEnd || I == llvm::prior(RegionEnd)) {
+        // Close the current region. Bundle the terminator if needed.
+        // This invalidates 'RegionEnd' and 'I'.
+        Scheduler->exitRegion();
+        continue;
+      }
+      DEBUG(dbgs() << "MachineScheduling " << MF->getFunction()->getName()
+            << ":BB#" << MBB->getNumber() << "\n  From: " << *I << "    To: ";
+            if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
+            else dbgs() << "End";
+            dbgs() << " Remaining: " << RemainingCount << "\n");
+
+      // Schedule a region: possibly reorder instructions.
+      // This invalidates 'RegionEnd' and 'I'.
+      Scheduler->schedule();
+
+      // Close the current region.
+      Scheduler->exitRegion();
+
+      // Scheduling has invalidated the current iterator 'I'. Ask the
+      // scheduler for the top of it's scheduled region.
+      RegionEnd = Scheduler->begin();
+    }
+    assert(RemainingCount == 0 && "Instruction count mismatch!");
+    Scheduler->finishBlock();
+  }
+  Scheduler->finalizeSchedule();
+  DEBUG(LIS->print(dbgs()));
+  return true;
+}
+
+void MachineScheduler::print(raw_ostream &O, const Module* m) const {
+  // unimplemented
+}
+
+//===----------------------------------------------------------------------===//
+// MachineSchedStrategy - Interface to a machine scheduling algorithm.
+//===----------------------------------------------------------------------===//
+
+namespace {
+class ScheduleDAGMI;
+
+/// MachineSchedStrategy - Interface used by ScheduleDAGMI to drive the selected
+/// scheduling algorithm.
+///
+/// If this works well and targets wish to reuse ScheduleDAGMI, we may expose it
+/// in ScheduleDAGInstrs.h
+class MachineSchedStrategy {
+public:
+  virtual ~MachineSchedStrategy() {}
+
+  /// Initialize the strategy after building the DAG for a new region.
+  virtual void initialize(ScheduleDAGMI *DAG) = 0;
+
+  /// Pick the next node to schedule, or return NULL. Set IsTopNode to true to
+  /// schedule the node at the top of the unscheduled region. Otherwise it will
+  /// be scheduled at the bottom.
+  virtual SUnit *pickNode(bool &IsTopNode) = 0;
+
+  /// When all predecessor dependencies have been resolved, free this node for
+  /// top-down scheduling.
+  virtual void releaseTopNode(SUnit *SU) = 0;
+  /// When all successor dependencies have been resolved, free this node for
+  /// bottom-up scheduling.
+  virtual void releaseBottomNode(SUnit *SU) = 0;
+};
+} // namespace
+
+//===----------------------------------------------------------------------===//
+// ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals
+// preservation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// ScheduleDAGMI is an implementation of ScheduleDAGInstrs that schedules
+/// machine instructions while updating LiveIntervals.
+class ScheduleDAGMI : public ScheduleDAGInstrs {
+  AliasAnalysis *AA;
+  MachineSchedStrategy *SchedImpl;
+
+  /// The top of the unscheduled zone.
+  MachineBasicBlock::iterator CurrentTop;
+
+  /// The bottom of the unscheduled zone.
+  MachineBasicBlock::iterator CurrentBottom;
+
+  /// The number of instructions scheduled so far. Used to cut off the
+  /// scheduler at the point determined by misched-cutoff.
+  unsigned NumInstrsScheduled;
+public:
+  ScheduleDAGMI(MachineSchedContext *C, MachineSchedStrategy *S):
+    ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, /*IsPostRA=*/false, C->LIS),
+    AA(C->AA), SchedImpl(S), CurrentTop(), CurrentBottom(),
+    NumInstrsScheduled(0) {}
+
+  ~ScheduleDAGMI() {
+    delete SchedImpl;
+  }
+
+  MachineBasicBlock::iterator top() const { return CurrentTop; }
+  MachineBasicBlock::iterator bottom() const { return CurrentBottom; }
+
+  /// Implement ScheduleDAGInstrs interface.
+  void schedule();
+
+protected:
+  void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos);
+  bool checkSchedLimit();
+
+  void releaseSucc(SUnit *SU, SDep *SuccEdge);
+  void releaseSuccessors(SUnit *SU);
+  void releasePred(SUnit *SU, SDep *PredEdge);
+  void releasePredecessors(SUnit *SU);
+};
+} // namespace
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
+/// NumPredsLeft reaches zero, release the successor node.
+void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
+  SUnit *SuccSU = SuccEdge->getSUnit();
+
+#ifndef NDEBUG
+  if (SuccSU->NumPredsLeft == 0) {
+    dbgs() << "*** Scheduling failed! ***\n";
+    SuccSU->dump(this);
+    dbgs() << " has been released too many times!\n";
+    llvm_unreachable(0);
+  }
+#endif
+  --SuccSU->NumPredsLeft;
+  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
+    SchedImpl->releaseTopNode(SuccSU);
+}
+
+/// releaseSuccessors - Call releaseSucc on each of SU's successors.
+void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    releaseSucc(SU, &*I);
+  }
+}
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
+/// NumSuccsLeft reaches zero, release the predecessor node.
+void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
+  SUnit *PredSU = PredEdge->getSUnit();
+
+#ifndef NDEBUG
+  if (PredSU->NumSuccsLeft == 0) {
+    dbgs() << "*** Scheduling failed! ***\n";
+    PredSU->dump(this);
+    dbgs() << " has been released too many times!\n";
+    llvm_unreachable(0);
+  }
+#endif
+  --PredSU->NumSuccsLeft;
+  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
+    SchedImpl->releaseBottomNode(PredSU);
+}
+
+/// releasePredecessors - Call releasePred on each of SU's predecessors.
+void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    releasePred(SU, &*I);
+  }
+}
+
+void ScheduleDAGMI::moveInstruction(MachineInstr *MI,
+                                    MachineBasicBlock::iterator InsertPos) {
+  // Fix RegionBegin if the first instruction moves down.
+  if (&*RegionBegin == MI)
+    RegionBegin = llvm::next(RegionBegin);
+  BB->splice(InsertPos, BB, MI);
+  LIS->handleMove(MI);
+  // Fix RegionBegin if another instruction moves above the first instruction.
+  if (RegionBegin == InsertPos)
+    RegionBegin = MI;
+}
+
+bool ScheduleDAGMI::checkSchedLimit() {
+#ifndef NDEBUG
+  if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
+    CurrentTop = CurrentBottom;
+    return false;
+  }
+  ++NumInstrsScheduled;
+#endif
+  return true;
+}
+
+/// schedule - Called back from MachineScheduler::runOnMachineFunction
+/// after setting up the current scheduling region.
+void ScheduleDAGMI::schedule() {
+  buildSchedGraph(AA);
+
+  DEBUG(dbgs() << "********** MI Scheduling **********\n");
+  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+          SUnits[su].dumpAll(this));
+
+  if (ViewMISchedDAGs) viewGraph();
+
+  SchedImpl->initialize(this);
+
+  // Release edges from the special Entry node or to the special Exit node.
+  releaseSuccessors(&EntrySU);
+  releasePredecessors(&ExitSU);
+
+  // Release all DAG roots for scheduling.
+  for (std::vector<SUnit>::iterator I = SUnits.begin(), E = SUnits.end();
+       I != E; ++I) {
+    // A SUnit is ready to top schedule if it has no predecessors.
+    if (I->Preds.empty())
+      SchedImpl->releaseTopNode(&(*I));
+    // A SUnit is ready to bottom schedule if it has no successors.
+    if (I->Succs.empty())
+      SchedImpl->releaseBottomNode(&(*I));
+  }
+
+  CurrentTop = RegionBegin;
+  CurrentBottom = RegionEnd;
+  bool IsTopNode = false;
+  while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
+    DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
+          << " Scheduling Instruction:\n"; SU->dump(this));
+    if (!checkSchedLimit())
+      break;
+
+    // Move the instruction to its new location in the instruction stream.
+    MachineInstr *MI = SU->getInstr();
+
+    if (IsTopNode) {
+      assert(SU->isTopReady() && "node still has unscheduled dependencies");
+      if (&*CurrentTop == MI)
+        ++CurrentTop;
+      else
+        moveInstruction(MI, CurrentTop);
+      // Release dependent instructions for scheduling.
+      releaseSuccessors(SU);
+    }
+    else {
+      assert(SU->isBottomReady() && "node still has unscheduled dependencies");
+      if (&*llvm::prior(CurrentBottom) == MI)
+        --CurrentBottom;
+      else {
+        if (&*CurrentTop == MI)
+          CurrentTop = llvm::next(CurrentTop);
+        moveInstruction(MI, CurrentBottom);
+        CurrentBottom = MI;
+      }
+      // Release dependent instructions for scheduling.
+      releasePredecessors(SU);
+    }
+    SU->isScheduled = true;
+  }
+  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
+}
+
+//===----------------------------------------------------------------------===//
+// ConvergingScheduler - Implementation of the standard MachineSchedStrategy.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance
+/// the schedule.
+class ConvergingScheduler : public MachineSchedStrategy {
+  ScheduleDAGMI *DAG;
+
+  unsigned NumTopReady;
+  unsigned NumBottomReady;
+
+public:
+  virtual void initialize(ScheduleDAGMI *dag) {
+    DAG = dag;
+
+    assert((!ForceTopDown || !ForceBottomUp) &&
+           "-misched-topdown incompatible with -misched-bottomup");
+  }
+
+  virtual SUnit *pickNode(bool &IsTopNode) {
+    if (DAG->top() == DAG->bottom())
+      return NULL;
+
+    // As an initial placeholder heuristic, schedule in the direction that has
+    // the fewest choices.
+    SUnit *SU;
+    if (ForceTopDown || (!ForceBottomUp && NumTopReady <= NumBottomReady)) {
+      SU = DAG->getSUnit(DAG->top());
+      IsTopNode = true;
+    }
+    else {
+      SU = DAG->getSUnit(llvm::prior(DAG->bottom()));
+      IsTopNode = false;
+    }
+    if (SU->isTopReady()) {
+      assert(NumTopReady > 0 && "bad ready count");
+      --NumTopReady;
+    }
+    if (SU->isBottomReady()) {
+      assert(NumBottomReady > 0 && "bad ready count");
+      --NumBottomReady;
+    }
+    return SU;
+  }
+
+  virtual void releaseTopNode(SUnit *SU) {
+    ++NumTopReady;
+  }
+  virtual void releaseBottomNode(SUnit *SU) {
+    ++NumBottomReady;
+  }
+};
+} // namespace
+
+/// Create the standard converging machine scheduler. This will be used as the
+/// default scheduler if the target does not set a default.
+static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
+  assert((!ForceTopDown || !ForceBottomUp) &&
+         "-misched-topdown incompatible with -misched-bottomup");
+  return new ScheduleDAGMI(C, new ConvergingScheduler());
+}
+static MachineSchedRegistry
+ConvergingSchedRegistry("converge", "Standard converging scheduler.",
+                        createConvergingSched);
+
+//===----------------------------------------------------------------------===//
+// Machine Instruction Shuffler for Correctness Testing
+//===----------------------------------------------------------------------===//
+
+#ifndef NDEBUG
+namespace {
+/// Apply a less-than relation on the node order, which corresponds to the
+/// instruction order prior to scheduling. IsReverse implements greater-than.
+template<bool IsReverse>
+struct SUnitOrder {
+  bool operator()(SUnit *A, SUnit *B) const {
+    if (IsReverse)
+      return A->NodeNum > B->NodeNum;
+    else
+      return A->NodeNum < B->NodeNum;
+  }
+};
+
+/// Reorder instructions as much as possible.
+class InstructionShuffler : public MachineSchedStrategy {
+  bool IsAlternating;
+  bool IsTopDown;
+
+  // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
+  // gives nodes with a higher number higher priority causing the latest
+  // instructions to be scheduled first.
+  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> >
+    TopQ;
+  // When scheduling bottom-up, use greater-than as the queue priority.
+  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> >
+    BottomQ;
+public:
+  InstructionShuffler(bool alternate, bool topdown)
+    : IsAlternating(alternate), IsTopDown(topdown) {}
+
+  virtual void initialize(ScheduleDAGMI *) {
+    TopQ.clear();
+    BottomQ.clear();
+  }
+
+  /// Implement MachineSchedStrategy interface.
+  /// -----------------------------------------
+
+  virtual SUnit *pickNode(bool &IsTopNode) {
+    SUnit *SU;
+    if (IsTopDown) {
+      do {
+        if (TopQ.empty()) return NULL;
+        SU = TopQ.top();
+        TopQ.pop();
+      } while (SU->isScheduled);
+      IsTopNode = true;
+    }
+    else {
+      do {
+        if (BottomQ.empty()) return NULL;
+        SU = BottomQ.top();
+        BottomQ.pop();
+      } while (SU->isScheduled);
+      IsTopNode = false;
+    }
+    if (IsAlternating)
+      IsTopDown = !IsTopDown;
+    return SU;
+  }
+
+  virtual void releaseTopNode(SUnit *SU) {
+    TopQ.push(SU);
+  }
+  virtual void releaseBottomNode(SUnit *SU) {
+    BottomQ.push(SU);
+  }
+};
+} // namespace
+
+static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
+  bool Alternate = !ForceTopDown && !ForceBottomUp;
+  bool TopDown = !ForceBottomUp;
+  assert((TopDown || !ForceTopDown) &&
+         "-misched-topdown incompatible with -misched-bottomup");
+  return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown));
+}
+static MachineSchedRegistry ShufflerRegistry(
+  "shuffle", "Shuffle machine instructions alternating directions",
+  createInstructionShuffler);
+#endif // !NDEBUG
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index 29cfb49953b9..1ce546b578ad 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -32,7 +32,7 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-static cl::opt<bool> 
+static cl::opt<bool>
 SplitEdges("machine-sink-split",
            cl::desc("Split critical edges during machine sinking"),
            cl::init(true), cl::Hidden);
@@ -90,12 +90,19 @@ namespace {
     bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB,
                                  MachineBasicBlock *DefMBB,
                                  bool &BreakPHIEdge, bool &LocalUse) const;
+    MachineBasicBlock *FindSuccToSinkTo(MachineInstr *MI, MachineBasicBlock *MBB,
+               bool &BreakPHIEdge);
+    bool isProfitableToSinkTo(unsigned Reg, MachineInstr *MI,
+                              MachineBasicBlock *MBB,
+                              MachineBasicBlock *SuccToSinkTo);
+
     bool PerformTrivialForwardCoalescing(MachineInstr *MI,
                                          MachineBasicBlock *MBB);
   };
 } // end anonymous namespace
 
 char MachineSinking::ID = 0;
+char &llvm::MachineSinkingID = MachineSinking::ID;
 INITIALIZE_PASS_BEGIN(MachineSinking, "machine-sink",
                 "Machine code sinking", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
@@ -104,8 +111,6 @@ INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_END(MachineSinking, "machine-sink",
                 "Machine code sinking", false, false)
 
-FunctionPass *llvm::createMachineSinkingPass() { return new MachineSinking(); }
-
 bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr *MI,
                                                      MachineBasicBlock *MBB) {
   if (!MI->isCopy())
@@ -147,14 +152,10 @@ MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
   assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
          "Only makes sense for vregs");
 
+  // Ignore debug uses because debug info doesn't affect the code.
   if (MRI->use_nodbg_empty(Reg))
     return true;
 
-  // Ignoring debug uses is necessary so debug info doesn't affect the code.
-  // This may leave a referencing dbg_value in the original block, before
-  // the definition of the vreg.  Dwarf generator handles this although the
-  // user might not get the right info at runtime.
-
   // BreakPHIEdge is true if all the uses are in the successor MBB being sunken
   // into and they are all PHI nodes. In this case, machine-sink must break
   // the critical edge first. e.g.
@@ -291,7 +292,7 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI,
   if (!CEBCandidates.insert(std::make_pair(From, To)))
     return true;
 
-  if (!MI->isCopy() && !MI->getDesc().isAsCheapAsAMove())
+  if (!MI->isCopy() && !MI->isAsCheapAsAMove())
     return true;
 
   // MI is cheap, we probably don't want to break the critical edge for it.
@@ -382,9 +383,9 @@ static bool AvoidsSinking(MachineInstr *MI, MachineRegisterInfo *MRI) {
   return MI->isInsertSubreg() || MI->isSubregToReg() || MI->isRegSequence();
 }
 
-/// collectDebgValues - Scan instructions following MI and collect any 
+/// collectDebgValues - Scan instructions following MI and collect any
 /// matching DBG_VALUEs.
-static void collectDebugValues(MachineInstr *MI, 
+static void collectDebugValues(MachineInstr *MI,
                                SmallVector<MachineInstr *, 2> & DbgValues) {
   DbgValues.clear();
   if (!MI->getOperand(0).isReg())
@@ -401,35 +402,76 @@ static void collectDebugValues(MachineInstr *MI,
   }
 }
 
-/// SinkInstruction - Determine whether it is safe to sink the specified machine
-/// instruction out of its current block into a successor.
-bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
-  // Don't sink insert_subreg, subreg_to_reg, reg_sequence. These are meant to
-  // be close to the source to make it easier to coalesce.
-  if (AvoidsSinking(MI, MRI))
+/// isPostDominatedBy - Return true if A is post dominated by B.
+static bool isPostDominatedBy(MachineBasicBlock *A, MachineBasicBlock *B) {
+
+  // FIXME - Use real post dominator.
+  if (A->succ_size() != 2)
+    return false;
+  MachineBasicBlock::succ_iterator I = A->succ_begin();
+  if (B == *I)
+    ++I;
+  MachineBasicBlock *OtherSuccBlock = *I;
+  if (OtherSuccBlock->succ_size() != 1 ||
+      *(OtherSuccBlock->succ_begin()) != B)
     return false;
 
-  // Check if it's safe to move the instruction.
-  if (!MI->isSafeToMove(TII, AA, SawStore))
+  return true;
+}
+
+/// isProfitableToSinkTo - Return true if it is profitable to sink MI.
+bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr *MI,
+                                          MachineBasicBlock *MBB,
+                                          MachineBasicBlock *SuccToSinkTo) {
+  assert (MI && "Invalid MachineInstr!");
+  assert (SuccToSinkTo && "Invalid SinkTo Candidate BB");
+
+  if (MBB == SuccToSinkTo)
     return false;
 
-  // FIXME: This should include support for sinking instructions within the
-  // block they are currently in to shorten the live ranges.  We often get
-  // instructions sunk into the top of a large block, but it would be better to
-  // also sink them down before their first use in the block.  This xform has to
-  // be careful not to *increase* register pressure though, e.g. sinking
-  // "x = y + z" down if it kills y and z would increase the live ranges of y
-  // and z and only shrink the live range of x.
+  // It is profitable if SuccToSinkTo does not post dominate current block.
+  if (!isPostDominatedBy(MBB, SuccToSinkTo))
+      return true;
+
+  // Check if only use in post dominated block is PHI instruction.
+  bool NonPHIUse = false;
+  for (MachineRegisterInfo::use_nodbg_iterator
+         I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end();
+       I != E; ++I) {
+    MachineInstr *UseInst = &*I;
+    MachineBasicBlock *UseBlock = UseInst->getParent();
+    if (UseBlock == SuccToSinkTo && !UseInst->isPHI())
+      NonPHIUse = true;
+  }
+  if (!NonPHIUse)
+    return true;
+
+  // If SuccToSinkTo post dominates then also it may be profitable if MI
+  // can further profitably sinked into another block in next round.
+  bool BreakPHIEdge = false;
+  // FIXME - If finding successor is compile time expensive then catch results.
+  if (MachineBasicBlock *MBB2 = FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge))
+    return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2);
+
+  // If SuccToSinkTo is final destination and it is a post dominator of current
+  // block then it is not profitable to sink MI into SuccToSinkTo block.
+  return false;
+}
+
+/// FindSuccToSinkTo - Find a successor to sink this instruction to.
+MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI,
+                                   MachineBasicBlock *MBB,
+                                   bool &BreakPHIEdge) {
+
+  assert (MI && "Invalid MachineInstr!");
+  assert (MBB && "Invalid MachineBasicBlock!");
 
   // Loop over all the operands of the specified instruction.  If there is
   // anything we can't handle, bail out.
-  MachineBasicBlock *ParentBlock = MI->getParent();
 
   // SuccToSinkTo - This is the successor to sink this instruction to, once we
   // decide.
   MachineBasicBlock *SuccToSinkTo = 0;
-
-  bool BreakPHIEdge = false;
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
     if (!MO.isReg()) continue;  // Ignore non-register operands.
@@ -442,24 +484,11 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
         // If the physreg has no defs anywhere, it's just an ambient register
         // and we can freely move its uses. Alternatively, if it's allocatable,
         // it could get allocated to something with a def during allocation.
-        if (!MRI->def_empty(Reg))
-          return false;
-
-        if (AllocatableSet.test(Reg))
-          return false;
-
-        // Check for a def among the register's aliases too.
-        for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
-          unsigned AliasReg = *Alias;
-          if (!MRI->def_empty(AliasReg))
-            return false;
-
-          if (AllocatableSet.test(AliasReg))
-            return false;
-        }
+        if (!MRI->isConstantPhysReg(Reg, *MBB->getParent()))
+          return NULL;
       } else if (!MO.isDead()) {
         // A def that isn't dead. We can't move it.
-        return false;
+        return NULL;
       }
     } else {
       // Virtual register uses are always safe to sink.
@@ -467,7 +496,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
 
       // If it's not safe to move defs of the register class, then abort.
       if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg)))
-        return false;
+        return NULL;
 
       // FIXME: This picks a successor to sink into based on having one
       // successor that dominates all the uses.  However, there are cases where
@@ -488,48 +517,79 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
         // If a previous operand picked a block to sink to, then this operand
         // must be sinkable to the same block.
         bool LocalUse = false;
-        if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, ParentBlock,
+        if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB,
                                      BreakPHIEdge, LocalUse))
-          return false;
+          return NULL;
 
         continue;
       }
 
       // Otherwise, we should look at all the successors and decide which one
       // we should sink to.
-      for (MachineBasicBlock::succ_iterator SI = ParentBlock->succ_begin(),
-           E = ParentBlock->succ_end(); SI != E; ++SI) {
+      for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+           E = MBB->succ_end(); SI != E; ++SI) {
+        MachineBasicBlock *SuccBlock = *SI;
         bool LocalUse = false;
-        if (AllUsesDominatedByBlock(Reg, *SI, ParentBlock,
+        if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB,
                                     BreakPHIEdge, LocalUse)) {
-          SuccToSinkTo = *SI;
+          SuccToSinkTo = SuccBlock;
           break;
         }
         if (LocalUse)
           // Def is used locally, it's never safe to move this def.
-          return false;
+          return NULL;
       }
 
       // If we couldn't find a block to sink to, ignore this instruction.
       if (SuccToSinkTo == 0)
-        return false;
+        return NULL;
+      else if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo))
+        return NULL;
     }
   }
 
-  // If there are no outputs, it must have side-effects.
-  if (SuccToSinkTo == 0)
-    return false;
+  // It is not possible to sink an instruction into its own block.  This can
+  // happen with loops.
+  if (MBB == SuccToSinkTo)
+    return NULL;
 
   // It's not safe to sink instructions to EH landing pad. Control flow into
   // landing pad is implicitly defined.
-  if (SuccToSinkTo->isLandingPad())
+  if (SuccToSinkTo && SuccToSinkTo->isLandingPad())
+    return NULL;
+
+  return SuccToSinkTo;
+}
+
+/// SinkInstruction - Determine whether it is safe to sink the specified machine
+/// instruction out of its current block into a successor.
+bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
+  // Don't sink insert_subreg, subreg_to_reg, reg_sequence. These are meant to
+  // be close to the source to make it easier to coalesce.
+  if (AvoidsSinking(MI, MRI))
     return false;
 
-  // It is not possible to sink an instruction into its own block.  This can
-  // happen with loops.
-  if (MI->getParent() == SuccToSinkTo)
+  // Check if it's safe to move the instruction.
+  if (!MI->isSafeToMove(TII, AA, SawStore))
     return false;
 
+  // FIXME: This should include support for sinking instructions within the
+  // block they are currently in to shorten the live ranges.  We often get
+  // instructions sunk into the top of a large block, but it would be better to
+  // also sink them down before their first use in the block.  This xform has to
+  // be careful not to *increase* register pressure though, e.g. sinking
+  // "x = y + z" down if it kills y and z would increase the live ranges of y
+  // and z and only shrink the live range of x.
+
+  bool BreakPHIEdge = false;
+  MachineBasicBlock *ParentBlock = MI->getParent();
+  MachineBasicBlock *SuccToSinkTo = FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge);
+
+  // If there are no outputs, it must have side-effects.
+  if (SuccToSinkTo == 0)
+    return false;
+
+
   // If the instruction to move defines a dead physical register which is live
   // when leaving the basic block, don't move it because it could turn into a
   // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 26847d39e7ad..74ba94d1fcc0 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -28,6 +28,7 @@
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
@@ -69,14 +70,17 @@ namespace {
     unsigned foundErrors;
 
     typedef SmallVector<unsigned, 16> RegVector;
+    typedef SmallVector<const uint32_t*, 4> RegMaskVector;
     typedef DenseSet<unsigned> RegSet;
     typedef DenseMap<unsigned, const MachineInstr*> RegMap;
 
     const MachineInstr *FirstTerminator;
 
     BitVector regsReserved;
+    BitVector regsAllocatable;
     RegSet regsLive;
     RegVector regsDefined, regsDead, regsKilled;
+    RegMaskVector regMasks;
     RegSet regsLiveInButUnused;
 
     SlotIndex lastIndex;
@@ -85,7 +89,7 @@ namespace {
     void addRegWithSubRegs(RegVector &RV, unsigned Reg) {
       RV.push_back(Reg);
       if (TargetRegisterInfo::isPhysicalRegister(Reg))
-        for (const unsigned *R = TRI->getSubRegisters(Reg); *R; R++)
+        for (const uint16_t *R = TRI->getSubRegisters(Reg); *R; R++)
           RV.push_back(*R);
     }
 
@@ -175,6 +179,10 @@ namespace {
       return Reg < regsReserved.size() && regsReserved.test(Reg);
     }
 
+    bool isAllocatable(unsigned Reg) {
+      return Reg < regsAllocatable.size() && regsAllocatable.test(Reg);
+    }
+
     // Analysis information if available
     LiveVariables *LiveVars;
     LiveIntervals *LiveInts;
@@ -194,6 +202,7 @@ namespace {
     void report(const char *msg, const MachineInstr *MI);
     void report(const char *msg, const MachineOperand *MO, unsigned MONum);
 
+    void checkLiveness(const MachineOperand *MO, unsigned MONum);
     void markReachable(const MachineBasicBlock *MBB);
     void calcRegsPassed();
     void checkPHIOps(const MachineBasicBlock *MBB);
@@ -279,13 +288,17 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
   for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end();
        MFI!=MFE; ++MFI) {
     visitMachineBasicBlockBefore(MFI);
-    for (MachineBasicBlock::const_iterator MBBI = MFI->begin(),
-           MBBE = MFI->end(); MBBI != MBBE; ++MBBI) {
+    for (MachineBasicBlock::const_instr_iterator MBBI = MFI->instr_begin(),
+           MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) {
       if (MBBI->getParent() != MFI) {
         report("Bad instruction parent pointer", MFI);
         *OS << "Instruction: " << *MBBI;
         continue;
       }
+      // Skip BUNDLE instruction for now. FIXME: We should add code to verify
+      // the BUNDLE's specifically.
+      if (MBBI->isBundle())
+        continue;
       visitMachineInstrBefore(MBBI);
       for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I)
         visitMachineOperand(&MBBI->getOperand(I), I);
@@ -305,6 +318,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
   regsDefined.clear();
   regsDead.clear();
   regsKilled.clear();
+  regMasks.clear();
   regsLiveInButUnused.clear();
   MBBInfoMap.clear();
 
@@ -320,7 +334,7 @@ void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
     MF->print(*OS, Indexes);
   }
   *OS << "*** Bad machine code: " << msg << " ***\n"
-      << "- function:    " << MF->getFunction()->getNameStr() << "\n";
+      << "- function:    " << MF->getFunction()->getName() << "\n";
 }
 
 void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) {
@@ -370,12 +384,15 @@ void MachineVerifier::visitMachineFunctionBefore() {
   // A sub-register of a reserved register is also reserved
   for (int Reg = regsReserved.find_first(); Reg>=0;
        Reg = regsReserved.find_next(Reg)) {
-    for (const unsigned *Sub = TRI->getSubRegisters(Reg); *Sub; ++Sub) {
+    for (const uint16_t *Sub = TRI->getSubRegisters(Reg); *Sub; ++Sub) {
       // FIXME: This should probably be:
       // assert(regsReserved.test(*Sub) && "Non-reserved sub-register");
       regsReserved.set(*Sub);
     }
   }
+
+  regsAllocatable = TRI->getAllocatableSet(*MF);
+
   markReachable(&MF->front());
 }
 
@@ -393,6 +410,20 @@ void
 MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
   FirstTerminator = 0;
 
+  if (MRI->isSSA()) {
+    // If this block has allocatable physical registers live-in, check that
+    // it is an entry block or landing pad.
+    for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(),
+           LE = MBB->livein_end();
+         LI != LE; ++LI) {
+      unsigned reg = *LI;
+      if (isAllocatable(reg) && !MBB->isLandingPad() &&
+          MBB != MBB->getParent()->begin()) {
+        report("MBB has allocable live-in, but isn't entry or landing-pad.", MBB);
+      }
+    }
+  }
+
   // Count the number of landing pad successors.
   SmallPtrSet<MachineBasicBlock*, 4> LandingPadSuccs;
   for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
@@ -435,7 +466,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
         report("MBB exits via unconditional fall-through but its successor "
                "differs from its CFG successor!", MBB);
       }
-      if (!MBB->empty() && MBB->back().getDesc().isBarrier() &&
+      if (!MBB->empty() && MBB->back().isBarrier() &&
           !TII->isPredicated(&MBB->back())) {
         report("MBB exits via unconditional fall-through but ends with a "
                "barrier instruction!", MBB);
@@ -456,10 +487,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
       if (MBB->empty()) {
         report("MBB exits via unconditional branch but doesn't contain "
                "any instructions!", MBB);
-      } else if (!MBB->back().getDesc().isBarrier()) {
+      } else if (!MBB->back().isBarrier()) {
         report("MBB exits via unconditional branch but doesn't end with a "
                "barrier instruction!", MBB);
-      } else if (!MBB->back().getDesc().isTerminator()) {
+      } else if (!MBB->back().isTerminator()) {
         report("MBB exits via unconditional branch but the branch isn't a "
                "terminator instruction!", MBB);
       }
@@ -479,10 +510,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
       if (MBB->empty()) {
         report("MBB exits via conditional branch/fall-through but doesn't "
                "contain any instructions!", MBB);
-      } else if (MBB->back().getDesc().isBarrier()) {
+      } else if (MBB->back().isBarrier()) {
         report("MBB exits via conditional branch/fall-through but ends with a "
                "barrier instruction!", MBB);
-      } else if (!MBB->back().getDesc().isTerminator()) {
+      } else if (!MBB->back().isTerminator()) {
         report("MBB exits via conditional branch/fall-through but the branch "
                "isn't a terminator instruction!", MBB);
       }
@@ -499,10 +530,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
       if (MBB->empty()) {
         report("MBB exits via conditional branch/branch but doesn't "
                "contain any instructions!", MBB);
-      } else if (!MBB->back().getDesc().isBarrier()) {
+      } else if (!MBB->back().isBarrier()) {
         report("MBB exits via conditional branch/branch but doesn't end with a "
                "barrier instruction!", MBB);
-      } else if (!MBB->back().getDesc().isTerminator()) {
+      } else if (!MBB->back().isTerminator()) {
         report("MBB exits via conditional branch/branch but the branch "
                "isn't a terminator instruction!", MBB);
       }
@@ -523,7 +554,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
       continue;
     }
     regsLive.insert(*I);
-    for (const unsigned *R = TRI->getSubRegisters(*I); *R; R++)
+    for (const uint16_t *R = TRI->getSubRegisters(*I); *R; R++)
       regsLive.insert(*R);
   }
   regsLiveInButUnused = regsLive;
@@ -533,7 +564,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
   BitVector PR = MFI->getPristineRegs(MBB);
   for (int I = PR.find_first(); I>0; I = PR.find_next(I)) {
     regsLive.insert(I);
-    for (const unsigned *R = TRI->getSubRegisters(I); *R; R++)
+    for (const uint16_t *R = TRI->getSubRegisters(I); *R; R++)
       regsLive.insert(*R);
   }
 
@@ -555,19 +586,22 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
   // Check the MachineMemOperands for basic consistency.
   for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
        E = MI->memoperands_end(); I != E; ++I) {
-    if ((*I)->isLoad() && !MCID.mayLoad())
+    if ((*I)->isLoad() && !MI->mayLoad())
       report("Missing mayLoad flag", MI);
-    if ((*I)->isStore() && !MCID.mayStore())
+    if ((*I)->isStore() && !MI->mayStore())
       report("Missing mayStore flag", MI);
   }
 
   // Debug values must not have a slot index.
-  // Other instructions must have one.
+  // Other instructions must have one, unless they are inside a bundle.
   if (LiveInts) {
     bool mapped = !LiveInts->isNotInMIMap(MI);
     if (MI->isDebugValue()) {
       if (mapped)
         report("Debug instruction has a slot index", MI);
+    } else if (MI->isInsideBundle()) {
+      if (mapped)
+        report("Instruction inside bundle has a slot index", MI);
     } else {
       if (!mapped)
         report("Missing slot index", MI);
@@ -575,7 +609,9 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
   }
 
   // Ensure non-terminators don't follow terminators.
-  if (MCID.isTerminator()) {
+  // Ignore predicated terminators formed by if conversion.
+  // FIXME: If conversion shouldn't need to violate this rule.
+  if (MI->isTerminator() && !TII->isPredicated(MI)) {
     if (!FirstTerminator)
       FirstTerminator = MI;
   } else if (FirstTerminator) {
@@ -606,7 +642,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
     // Don't check if it's the last operand in a variadic instruction. See,
     // e.g., LDM_RET in the arm back end.
     if (MO->isReg() &&
-        !(MCID.isVariadic() && MONum == MCID.getNumOperands()-1)) {
+        !(MI->isVariadic() && MONum == MCID.getNumOperands()-1)) {
       if (MO->isDef() && !MCOI.isOptionalDef())
           report("Explicit operand marked as def", MO, MONum);
       if (MO->isImplicit())
@@ -614,7 +650,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
     }
   } else {
     // ARM adds %reg0 operands to indicate predicates. We'll allow that.
-    if (MO->isReg() && !MO->isImplicit() && !MCID.isVariadic() && MO->getReg())
+    if (MO->isReg() && !MO->isImplicit() && !MI->isVariadic() && MO->getReg())
       report("Extra explicit operand on non-variadic instruction", MO, MONum);
   }
 
@@ -623,112 +659,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
     const unsigned Reg = MO->getReg();
     if (!Reg)
       return;
+    if (MRI->tracksLiveness() && !MI->isDebugValue())
+      checkLiveness(MO, MONum);
 
-    // Check Live Variables.
-    if (MI->isDebugValue()) {
-      // Liveness checks are not valid for debug values.
-    } else if (MO->isUse() && !MO->isUndef()) {
-      regsLiveInButUnused.erase(Reg);
-
-      bool isKill = false;
-      unsigned defIdx;
-      if (MI->isRegTiedToDefOperand(MONum, &defIdx)) {
-        // A two-addr use counts as a kill if use and def are the same.
-        unsigned DefReg = MI->getOperand(defIdx).getReg();
-        if (Reg == DefReg)
-          isKill = true;
-        else if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
-          report("Two-address instruction operands must be identical",
-                 MO, MONum);
-        }
-      } else
-        isKill = MO->isKill();
-
-      if (isKill)
-        addRegWithSubRegs(regsKilled, Reg);
-
-      // Check that LiveVars knows this kill.
-      if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) &&
-          MO->isKill()) {
-        LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
-        if (std::find(VI.Kills.begin(),
-                      VI.Kills.end(), MI) == VI.Kills.end())
-          report("Kill missing from LiveVariables", MO, MONum);
-      }
-
-      // Check LiveInts liveness and kill.
-      if (TargetRegisterInfo::isVirtualRegister(Reg) &&
-          LiveInts && !LiveInts->isNotInMIMap(MI)) {
-        SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getUseIndex();
-        if (LiveInts->hasInterval(Reg)) {
-          const LiveInterval &LI = LiveInts->getInterval(Reg);
-          if (!LI.liveAt(UseIdx)) {
-            report("No live range at use", MO, MONum);
-            *OS << UseIdx << " is not live in " << LI << '\n';
-          }
-          // Check for extra kill flags.
-          // Note that we allow missing kill flags for now.
-          if (MO->isKill() && !LI.killedAt(UseIdx.getDefIndex())) {
-            report("Live range continues after kill flag", MO, MONum);
-            *OS << "Live range: " << LI << '\n';
-          }
-        } else {
-          report("Virtual register has no Live interval", MO, MONum);
-        }
-      }
-
-      // Use of a dead register.
-      if (!regsLive.count(Reg)) {
-        if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
-          // Reserved registers may be used even when 'dead'.
-          if (!isReserved(Reg))
-            report("Using an undefined physical register", MO, MONum);
-        } else {
-          BBInfo &MInfo = MBBInfoMap[MI->getParent()];
-          // We don't know which virtual registers are live in, so only complain
-          // if vreg was killed in this MBB. Otherwise keep track of vregs that
-          // must be live in. PHI instructions are handled separately.
-          if (MInfo.regsKilled.count(Reg))
-            report("Using a killed virtual register", MO, MONum);
-          else if (!MI->isPHI())
-            MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI));
-        }
-      }
-    } else if (MO->isDef()) {
-      // Register defined.
-      // TODO: verify that earlyclobber ops are not used.
-      if (MO->isDead())
-        addRegWithSubRegs(regsDead, Reg);
-      else
-        addRegWithSubRegs(regsDefined, Reg);
-
-      // Verify SSA form.
-      if (MRI->isSSA() && TargetRegisterInfo::isVirtualRegister(Reg) &&
-          llvm::next(MRI->def_begin(Reg)) != MRI->def_end())
-        report("Multiple virtual register defs in SSA form", MO, MONum);
-
-      // Check LiveInts for a live range, but only for virtual registers.
-      if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) &&
-          !LiveInts->isNotInMIMap(MI)) {
-        SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getDefIndex();
-        if (LiveInts->hasInterval(Reg)) {
-          const LiveInterval &LI = LiveInts->getInterval(Reg);
-          if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) {
-            assert(VNI && "NULL valno is not allowed");
-            if (VNI->def != DefIdx && !MO->isEarlyClobber()) {
-              report("Inconsistent valno->def", MO, MONum);
-              *OS << "Valno " << VNI->id << " is not defined at "
-                  << DefIdx << " in " << LI << '\n';
-            }
-          } else {
-            report("No live range at def", MO, MONum);
-            *OS << DefIdx << " is not live in " << LI << '\n';
-          }
-        } else {
-          report("Virtual register has no Live interval", MO, MONum);
-        }
-      }
-    }
 
     // Check register classes.
     if (MONum < MCID.getNumOperands() && !MO->isImplicit()) {
@@ -790,6 +723,10 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
     break;
   }
 
+  case MachineOperand::MO_RegisterMask:
+    regMasks.push_back(MO->getRegMask());
+    break;
+
   case MachineOperand::MO_MachineBasicBlock:
     if (MI->isPHI() && !MO->getMBB()->isSuccessor(MI->getParent()))
       report("PHI operand is not in the CFG", MO, MONum);
@@ -800,11 +737,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
         LiveInts && !LiveInts->isNotInMIMap(MI)) {
       LiveInterval &LI = LiveStks->getInterval(MO->getIndex());
       SlotIndex Idx = LiveInts->getInstructionIndex(MI);
-      if (MCID.mayLoad() && !LI.liveAt(Idx.getUseIndex())) {
+      if (MI->mayLoad() && !LI.liveAt(Idx.getRegSlot(true))) {
         report("Instruction loads from dead spill slot", MO, MONum);
         *OS << "Live stack: " << LI << '\n';
       }
-      if (MCID.mayStore() && !LI.liveAt(Idx.getDefIndex())) {
+      if (MI->mayStore() && !LI.liveAt(Idx.getRegSlot())) {
         report("Instruction stores to dead spill slot", MO, MONum);
         *OS << "Live stack: " << LI << '\n';
       }
@@ -816,10 +753,127 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
   }
 }
 
+void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
+  const MachineInstr *MI = MO->getParent();
+  const unsigned Reg = MO->getReg();
+
+  // Both use and def operands can read a register.
+  if (MO->readsReg()) {
+    regsLiveInButUnused.erase(Reg);
+
+    bool isKill = false;
+    unsigned defIdx;
+    if (MI->isRegTiedToDefOperand(MONum, &defIdx)) {
+      // A two-addr use counts as a kill if use and def are the same.
+      unsigned DefReg = MI->getOperand(defIdx).getReg();
+      if (Reg == DefReg)
+        isKill = true;
+      else if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+        report("Two-address instruction operands must be identical", MO, MONum);
+      }
+    } else
+      isKill = MO->isKill();
+
+    if (isKill)
+      addRegWithSubRegs(regsKilled, Reg);
+
+    // Check that LiveVars knows this kill.
+    if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) &&
+        MO->isKill()) {
+      LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
+      if (std::find(VI.Kills.begin(), VI.Kills.end(), MI) == VI.Kills.end())
+        report("Kill missing from LiveVariables", MO, MONum);
+    }
+
+    // Check LiveInts liveness and kill.
+    if (TargetRegisterInfo::isVirtualRegister(Reg) &&
+        LiveInts && !LiveInts->isNotInMIMap(MI)) {
+      SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getRegSlot(true);
+      if (LiveInts->hasInterval(Reg)) {
+        const LiveInterval &LI = LiveInts->getInterval(Reg);
+        if (!LI.liveAt(UseIdx)) {
+          report("No live range at use", MO, MONum);
+          *OS << UseIdx << " is not live in " << LI << '\n';
+        }
+        // Check for extra kill flags.
+        // Note that we allow missing kill flags for now.
+        if (MO->isKill() && !LI.killedAt(UseIdx.getRegSlot())) {
+          report("Live range continues after kill flag", MO, MONum);
+          *OS << "Live range: " << LI << '\n';
+        }
+      } else {
+        report("Virtual register has no Live interval", MO, MONum);
+      }
+    }
+
+    // Use of a dead register.
+    if (!regsLive.count(Reg)) {
+      if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+        // Reserved registers may be used even when 'dead'.
+        if (!isReserved(Reg))
+          report("Using an undefined physical register", MO, MONum);
+      } else {
+        BBInfo &MInfo = MBBInfoMap[MI->getParent()];
+        // We don't know which virtual registers are live in, so only complain
+        // if vreg was killed in this MBB. Otherwise keep track of vregs that
+        // must be live in. PHI instructions are handled separately.
+        if (MInfo.regsKilled.count(Reg))
+          report("Using a killed virtual register", MO, MONum);
+        else if (!MI->isPHI())
+          MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI));
+      }
+    }
+  }
+
+  if (MO->isDef()) {
+    // Register defined.
+    // TODO: verify that earlyclobber ops are not used.
+    if (MO->isDead())
+      addRegWithSubRegs(regsDead, Reg);
+    else
+      addRegWithSubRegs(regsDefined, Reg);
+
+    // Verify SSA form.
+    if (MRI->isSSA() && TargetRegisterInfo::isVirtualRegister(Reg) &&
+        llvm::next(MRI->def_begin(Reg)) != MRI->def_end())
+      report("Multiple virtual register defs in SSA form", MO, MONum);
+
+    // Check LiveInts for a live range, but only for virtual registers.
+    if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) &&
+        !LiveInts->isNotInMIMap(MI)) {
+      SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getRegSlot();
+      if (LiveInts->hasInterval(Reg)) {
+        const LiveInterval &LI = LiveInts->getInterval(Reg);
+        if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) {
+          assert(VNI && "NULL valno is not allowed");
+          if (VNI->def != DefIdx && !MO->isEarlyClobber()) {
+            report("Inconsistent valno->def", MO, MONum);
+            *OS << "Valno " << VNI->id << " is not defined at "
+              << DefIdx << " in " << LI << '\n';
+          }
+        } else {
+          report("No live range at def", MO, MONum);
+          *OS << DefIdx << " is not live in " << LI << '\n';
+        }
+      } else {
+        report("Virtual register has no Live interval", MO, MONum);
+      }
+    }
+  }
+}
+
 void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) {
   BBInfo &MInfo = MBBInfoMap[MI->getParent()];
   set_union(MInfo.regsKilled, regsKilled);
   set_subtract(regsLive, regsKilled); regsKilled.clear();
+  // Kill any masked registers.
+  while (!regMasks.empty()) {
+    const uint32_t *Mask = regMasks.pop_back_val();
+    for (RegSet::iterator I = regsLive.begin(), E = regsLive.end(); I != E; ++I)
+      if (TargetRegisterInfo::isPhysicalRegister(*I) &&
+          MachineOperand::clobbersPhysReg(Mask, *I))
+        regsDead.push_back(*I);
+  }
   set_subtract(regsLive, regsDead);   regsDead.clear();
   set_union(regsLive, regsDefined);   regsDefined.clear();
 
@@ -855,7 +909,7 @@ MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) {
 void MachineVerifier::calcRegsPassed() {
   // First push live-out regs to successors' vregsPassed. Remember the MBBs that
   // have any vregsPassed.
-  DenseSet<const MachineBasicBlock*> todo;
+  SmallPtrSet<const MachineBasicBlock*, 8> todo;
   for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
        MFI != MFE; ++MFI) {
     const MachineBasicBlock &MBB(*MFI);
@@ -892,7 +946,7 @@ void MachineVerifier::calcRegsPassed() {
 // similar to calcRegsPassed, only backwards.
 void MachineVerifier::calcRegsRequired() {
   // First push live-in regs to predecessors' vregsRequired.
-  DenseSet<const MachineBasicBlock*> todo;
+  SmallPtrSet<const MachineBasicBlock*, 8> todo;
   for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
        MFI != MFE; ++MFI) {
     const MachineBasicBlock &MBB(*MFI);
@@ -925,9 +979,10 @@ void MachineVerifier::calcRegsRequired() {
 // Check PHI instructions at the beginning of MBB. It is assumed that
 // calcRegsPassed has been run so BBInfo::isLiveOut is valid.
 void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) {
+  SmallPtrSet<const MachineBasicBlock*, 8> seen;
   for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end();
        BBI != BBE && BBI->isPHI(); ++BBI) {
-    DenseSet<const MachineBasicBlock*> seen;
+    seen.clear();
 
     for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
       unsigned Reg = BBI->getOperand(i).getReg();
@@ -968,8 +1023,17 @@ void MachineVerifier::visitMachineFunctionAfter() {
   }
 
   // Now check liveness info if available
-  if (LiveVars || LiveInts)
-    calcRegsRequired();
+  calcRegsRequired();
+
+  if (MRI->isSSA() && !MF->empty()) {
+    BBInfo &MInfo = MBBInfoMap[&MF->front()];
+    for (RegSet::iterator
+         I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E;
+         ++I)
+      report("Virtual register def doesn't dominate all uses.",
+             MRI->getVRegDef(*I));
+  }
+
   if (LiveVars)
     verifyLiveVariables();
   if (LiveInts)
@@ -1065,33 +1129,43 @@ void MachineVerifier::verifyLiveIntervals() {
           report("No instruction at def index", MF);
           *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
               << " in " << LI << '\n';
-        } else if (!MI->modifiesRegister(LI.reg, TRI)) {
-          report("Defining instruction does not modify register", MI);
-          *OS << "Valno #" << VNI->id << " in " << LI << '\n';
+          continue;
         }
 
+        bool hasDef = false;
         bool isEarlyClobber = false;
-        if (MI) {
-          for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
-               MOE = MI->operands_end(); MOI != MOE; ++MOI) {
-            if (MOI->isReg() && MOI->getReg() == LI.reg && MOI->isDef() &&
-                MOI->isEarlyClobber()) {
-              isEarlyClobber = true;
-              break;
-            }
+        for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
+          if (!MOI->isReg() || !MOI->isDef())
+            continue;
+          if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+            if (MOI->getReg() != LI.reg)
+              continue;
+          } else {
+            if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) ||
+                !TRI->regsOverlap(LI.reg, MOI->getReg()))
+              continue;
           }
+          hasDef = true;
+          if (MOI->isEarlyClobber())
+            isEarlyClobber = true;
+        }
+
+        if (!hasDef) {
+          report("Defining instruction does not modify register", MI);
+          *OS << "Valno #" << VNI->id << " in " << LI << '\n';
         }
 
         // Early clobber defs begin at USE slots, but other defs must begin at
         // DEF slots.
         if (isEarlyClobber) {
-          if (!VNI->def.isUse()) {
-            report("Early clobber def must be at a USE slot", MF);
+          if (!VNI->def.isEarlyClobber()) {
+            report("Early clobber def must be at an early-clobber slot", MF);
             *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
                 << " in " << LI << '\n';
           }
-        } else if (!VNI->def.isDef()) {
-          report("Non-PHI, non-early clobber def must be at a DEF slot", MF);
+        } else if (!VNI->def.isRegister()) {
+          report("Non-PHI, non-early clobber def must be at a register slot",
+                 MF);
           *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
               << " in " << LI << '\n';
         }
@@ -1137,32 +1211,76 @@ void MachineVerifier::verifyLiveIntervals() {
         *OS << " in " << LI << '\n';
         continue;
       }
-      if (I->end != LiveInts->getMBBEndIdx(EndMBB)) {
-        // The live segment is ending inside EndMBB
-        const MachineInstr *MI =
-                        LiveInts->getInstructionFromIndex(I->end.getPrevSlot());
-        if (!MI) {
-          report("Live segment doesn't end at a valid instruction", EndMBB);
+
+      // No more checks for live-out segments.
+      if (I->end == LiveInts->getMBBEndIdx(EndMBB))
+        continue;
+
+      // The live segment is ending inside EndMBB
+      const MachineInstr *MI =
+        LiveInts->getInstructionFromIndex(I->end.getPrevSlot());
+      if (!MI) {
+        report("Live segment doesn't end at a valid instruction", EndMBB);
         I->print(*OS);
         *OS << " in " << LI << '\n' << "Basic block starts at "
-            << MBBStartIdx << '\n';
-        } else if (TargetRegisterInfo::isVirtualRegister(LI.reg) &&
-                   !MI->readsVirtualRegister(LI.reg)) {
-          // A live range can end with either a redefinition, a kill flag on a
-          // use, or a dead flag on a def.
-          // FIXME: Should we check for each of these?
-          bool hasDeadDef = false;
-          for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
-               MOE = MI->operands_end(); MOI != MOE; ++MOI) {
-            if (MOI->isReg() && MOI->getReg() == LI.reg && MOI->isDef() && MOI->isDead()) {
-              hasDeadDef = true;
-              break;
-            }
-          }
+          << MBBStartIdx << '\n';
+        continue;
+      }
+
+      // The block slot must refer to a basic block boundary.
+      if (I->end.isBlock()) {
+        report("Live segment ends at B slot of an instruction", MI);
+        I->print(*OS);
+        *OS << " in " << LI << '\n';
+      }
+
+      if (I->end.isDead()) {
+        // Segment ends on the dead slot.
+        // That means there must be a dead def.
+        if (!SlotIndex::isSameInstr(I->start, I->end)) {
+          report("Live segment ending at dead slot spans instructions", MI);
+          I->print(*OS);
+          *OS << " in " << LI << '\n';
+        }
+      }
+
+      // A live segment can only end at an early-clobber slot if it is being
+      // redefined by an early-clobber def.
+      if (I->end.isEarlyClobber()) {
+        if (I+1 == E || (I+1)->start != I->end) {
+          report("Live segment ending at early clobber slot must be "
+                 "redefined by an EC def in the same instruction", MI);
+          I->print(*OS);
+          *OS << " in " << LI << '\n';
+        }
+      }
+
+      // The following checks only apply to virtual registers. Physreg liveness
+      // is too weird to check.
+      if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+        // A live range can end with either a redefinition, a kill flag on a
+        // use, or a dead flag on a def.
+        bool hasRead = false;
+        bool hasDeadDef = false;
+        for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
+          if (!MOI->isReg() || MOI->getReg() != LI.reg)
+            continue;
+          if (MOI->readsReg())
+            hasRead = true;
+          if (MOI->isDef() && MOI->isDead())
+            hasDeadDef = true;
+        }
 
+        if (I->end.isDead()) {
           if (!hasDeadDef) {
-            report("Instruction killing live segment neither defines nor reads "
-                   "register", MI);
+            report("Instruction doesn't have a dead def operand", MI);
+            I->print(*OS);
+            *OS << " in " << LI << '\n';
+          }
+        } else {
+          if (!hasRead) {
+            report("Instruction ending live range doesn't read the register",
+                   MI);
             I->print(*OS);
             *OS << " in " << LI << '\n';
           }
@@ -1192,8 +1310,8 @@ void MachineVerifier::verifyLiveIntervals() {
         // Check that VNI is live-out of all predecessors.
         for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(),
              PE = MFI->pred_end(); PI != PE; ++PI) {
-          SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI).getPrevSlot();
-          const VNInfo *PVNI = LI.getVNInfoAt(PEnd);
+          SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI);
+          const VNInfo *PVNI = LI.getVNInfoBefore(PEnd);
 
           if (VNI->isPHIDef() && VNI->def == LiveInts->getMBBStartIdx(MFI))
             continue;
@@ -1201,7 +1319,7 @@ void MachineVerifier::verifyLiveIntervals() {
           if (!PVNI) {
             report("Register not marked live out of predecessor", *PI);
             *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
-                << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live at "
+                << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before "
                 << PEnd << " in " << LI << '\n';
             continue;
           }
diff --git a/lib/CodeGen/ObjectCodeEmitter.cpp b/lib/CodeGen/ObjectCodeEmitter.cpp
deleted file mode 100644
index cf05275d7a31..000000000000
--- a/lib/CodeGen/ObjectCodeEmitter.cpp
+++ /dev/null
@@ -1,141 +0,0 @@
-//===-- llvm/CodeGen/ObjectCodeEmitter.cpp -------------------- -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/BinaryObject.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineRelocation.h"
-#include "llvm/CodeGen/ObjectCodeEmitter.h"
-
-//===----------------------------------------------------------------------===//
-//                       ObjectCodeEmitter Implementation
-//===----------------------------------------------------------------------===//
-
-namespace llvm {
-
-ObjectCodeEmitter::ObjectCodeEmitter() : BO(0) {}
-ObjectCodeEmitter::ObjectCodeEmitter(BinaryObject *bo) : BO(bo) {}
-ObjectCodeEmitter::~ObjectCodeEmitter() {}
-
-/// setBinaryObject - set the BinaryObject we are writting to
-void ObjectCodeEmitter::setBinaryObject(BinaryObject *bo) { BO = bo; }
-
-/// emitByte - This callback is invoked when a byte needs to be
-/// written to the data stream, without buffer overflow testing.
-void ObjectCodeEmitter::emitByte(uint8_t B) {
-  BO->emitByte(B);
-}
-
-/// emitWordLE - This callback is invoked when a 32-bit word needs to be
-/// written to the data stream in little-endian format.
-void ObjectCodeEmitter::emitWordLE(uint32_t W) {
-  BO->emitWordLE(W);
-}
-
-/// emitWordBE - This callback is invoked when a 32-bit word needs to be
-/// written to the data stream in big-endian format.
-void ObjectCodeEmitter::emitWordBE(uint32_t W) {
-  BO->emitWordBE(W);
-}
-
-/// emitDWordLE - This callback is invoked when a 64-bit word needs to be
-/// written to the data stream in little-endian format.
-void ObjectCodeEmitter::emitDWordLE(uint64_t W) {
-  BO->emitDWordLE(W);
-}
-
-/// emitDWordBE - This callback is invoked when a 64-bit word needs to be
-/// written to the data stream in big-endian format.
-void ObjectCodeEmitter::emitDWordBE(uint64_t W) {
-  BO->emitDWordBE(W);
-}
-
-/// emitAlignment - Align 'BO' to the necessary alignment boundary.
-void ObjectCodeEmitter::emitAlignment(unsigned Alignment /* 0 */,
-                                      uint8_t fill /* 0 */) {
-  BO->emitAlignment(Alignment, fill);
-}
-
-/// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be
-/// written to the data stream.
-void ObjectCodeEmitter::emitULEB128Bytes(uint64_t Value) {
-  BO->emitULEB128Bytes(Value);
-}
-
-/// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be
-/// written to the data stream.
-void ObjectCodeEmitter::emitSLEB128Bytes(uint64_t Value) {
-  BO->emitSLEB128Bytes(Value);
-}
-
-/// emitString - This callback is invoked when a String needs to be
-/// written to the data stream.
-void ObjectCodeEmitter::emitString(const std::string &String) {
-  BO->emitString(String);
-}
-
-/// getCurrentPCValue - This returns the address that the next emitted byte
-/// will be output to.
-uintptr_t ObjectCodeEmitter::getCurrentPCValue() const {
-  return BO->getCurrentPCOffset();
-}
-
-/// getCurrentPCOffset - Return the offset from the start of the emitted
-/// buffer that we are currently writing to.
-uintptr_t ObjectCodeEmitter::getCurrentPCOffset() const {
-  return BO->getCurrentPCOffset();
-}
-
-/// addRelocation - Whenever a relocatable address is needed, it should be
-/// noted with this interface.
-void ObjectCodeEmitter::addRelocation(const MachineRelocation& relocation) {
-  BO->addRelocation(relocation);
-}
-
-/// StartMachineBasicBlock - This should be called by the target when a new
-/// basic block is about to be emitted.  This way the MCE knows where the
-/// start of the block is, and can implement getMachineBasicBlockAddress.
-void ObjectCodeEmitter::StartMachineBasicBlock(MachineBasicBlock *MBB) {
-  if (MBBLocations.size() <= (unsigned)MBB->getNumber())
-    MBBLocations.resize((MBB->getNumber()+1)*2);
-  MBBLocations[MBB->getNumber()] = getCurrentPCOffset();
-}
-
-/// getMachineBasicBlockAddress - Return the address of the specified
-/// MachineBasicBlock, only usable after the label for the MBB has been
-/// emitted.
-uintptr_t
-ObjectCodeEmitter::getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
-  assert(MBBLocations.size() > (unsigned)MBB->getNumber() &&
-         MBBLocations[MBB->getNumber()] && "MBB not emitted!");
-  return MBBLocations[MBB->getNumber()];
-}
-
-/// getJumpTableEntryAddress - Return the address of the jump table with index
-/// 'Index' in the function that last called initJumpTableInfo.
-uintptr_t ObjectCodeEmitter::getJumpTableEntryAddress(unsigned Index) const {
-  assert(JTLocations.size() > Index && "JT not emitted!");
-  return JTLocations[Index];
-}
-
-/// getConstantPoolEntryAddress - Return the address of the 'Index' entry in
-/// the constant pool that was last emitted with the emitConstantPool method.
-uintptr_t ObjectCodeEmitter::getConstantPoolEntryAddress(unsigned Index) const {
-  assert(CPLocations.size() > Index && "CP not emitted!");
-  return CPLocations[Index];
-}
-
-/// getConstantPoolEntrySection - Return the section of the 'Index' entry in
-/// the constant pool that was last emitted with the emitConstantPool method.
-uintptr_t ObjectCodeEmitter::getConstantPoolEntrySection(unsigned Index) const {
-  assert(CPSections.size() > Index && "CP not emitted!");
-  return CPSections[Index];
-}
-
-} // end namespace llvm
-
diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp
index c05be130ec61..6da313e632af 100644
--- a/lib/CodeGen/OptimizePHIs.cpp
+++ b/lib/CodeGen/OptimizePHIs.cpp
@@ -56,11 +56,10 @@ namespace {
 }
 
 char OptimizePHIs::ID = 0;
+char &llvm::OptimizePHIsID = OptimizePHIs::ID;
 INITIALIZE_PASS(OptimizePHIs, "opt-phis",
                 "Optimize machine instruction PHIs", false, false)
 
-FunctionPass *llvm::createOptimizePHIsPass() { return new OptimizePHIs(); }
-
 bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) {
   MRI = &Fn.getRegInfo();
   TII = Fn.getTarget().getInstrInfo();
@@ -165,7 +164,11 @@ bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) {
     InstrSet PHIsInCycle;
     if (IsSingleValuePHICycle(MI, SingleValReg, PHIsInCycle) &&
         SingleValReg != 0) {
-      MRI->replaceRegWith(MI->getOperand(0).getReg(), SingleValReg);
+      unsigned OldReg = MI->getOperand(0).getReg();
+      if (!MRI->constrainRegClass(SingleValReg, MRI->getRegClass(OldReg)))
+        continue;
+
+      MRI->replaceRegWith(OldReg, SingleValReg);
       MI->eraseFromParent();
       ++NumPHICycles;
       Changed = true;
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index 6994aa58fbd5..0ed4c34bb105 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -92,11 +92,15 @@ STATISTIC(NumCriticalEdgesSplit, "Number of critical edges split");
 STATISTIC(NumReused, "Number of reused lowered phis");
 
 char PHIElimination::ID = 0;
-INITIALIZE_PASS(PHIElimination, "phi-node-elimination",
-                "Eliminate PHI nodes for register allocation", false, false)
-
 char& llvm::PHIEliminationID = PHIElimination::ID;
 
+INITIALIZE_PASS_BEGIN(PHIElimination, "phi-node-elimination",
+                      "Eliminate PHI nodes for register allocation",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveVariables)
+INITIALIZE_PASS_END(PHIElimination, "phi-node-elimination",
+                    "Eliminate PHI nodes for register allocation", false, false)
+
 void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<LiveVariables>();
   AU.addPreserved<MachineDominatorTree>();
@@ -241,7 +245,6 @@ void PHIElimination::LowerAtomicPHINode(
       LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg);
 
       // Increment use count of the newly created virtual register.
-      VI.NumUses++;
       LV->setPHIJoin(IncomingReg);
 
       // When we are reusing the incoming register, it may already have been
@@ -410,7 +413,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
     return false;   // Quick exit for basic blocks without PHIs.
 
   bool Changed = false;
-  for (MachineBasicBlock::const_iterator BBI = MBB.begin(), BBE = MBB.end();
+  for (MachineBasicBlock::iterator BBI = MBB.begin(), BBE = MBB.end();
        BBI != BBE && BBI->isPHI(); ++BBI) {
     for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
       unsigned Reg = BBI->getOperand(i).getReg();
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index 315aedddb9ef..53d1fcf7377a 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -12,62 +12,617 @@
 //
 //===---------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 
 using namespace llvm;
 
+static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden,
+    cl::desc("Disable Post Regalloc"));
+static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden,
+    cl::desc("Disable branch folding"));
+static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden,
+    cl::desc("Disable tail duplication"));
+static cl::opt<bool> DisableEarlyTailDup("disable-early-taildup", cl::Hidden,
+    cl::desc("Disable pre-register allocation tail duplication"));
+static cl::opt<bool> EnableBlockPlacement("enable-block-placement",
+    cl::Hidden, cl::desc("Enable probability-driven block placement"));
+static cl::opt<bool> EnableBlockPlacementStats("enable-block-placement-stats",
+    cl::Hidden, cl::desc("Collect probability-driven block placement stats"));
+static cl::opt<bool> DisableCodePlace("disable-code-place", cl::Hidden,
+    cl::desc("Disable code placement"));
+static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden,
+    cl::desc("Disable Stack Slot Coloring"));
+static cl::opt<bool> DisableMachineDCE("disable-machine-dce", cl::Hidden,
+    cl::desc("Disable Machine Dead Code Elimination"));
+static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden,
+    cl::desc("Disable Machine LICM"));
+static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden,
+    cl::desc("Disable Machine Common Subexpression Elimination"));
+static cl::opt<cl::boolOrDefault>
+OptimizeRegAlloc("optimize-regalloc", cl::Hidden,
+    cl::desc("Enable optimized register allocation compilation path."));
+static cl::opt<cl::boolOrDefault>
+EnableMachineSched("enable-misched", cl::Hidden,
+    cl::desc("Enable the machine instruction scheduling pass."));
+static cl::opt<bool> EnableStrongPHIElim("strong-phi-elim", cl::Hidden,
+    cl::desc("Use strong PHI elimination."));
+static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm",
+    cl::Hidden,
+    cl::desc("Disable Machine LICM"));
+static cl::opt<bool> DisableMachineSink("disable-machine-sink", cl::Hidden,
+    cl::desc("Disable Machine Sinking"));
+static cl::opt<bool> DisableLSR("disable-lsr", cl::Hidden,
+    cl::desc("Disable Loop Strength Reduction Pass"));
+static cl::opt<bool> DisableCGP("disable-cgp", cl::Hidden,
+    cl::desc("Disable Codegen Prepare"));
+static cl::opt<bool> DisableCopyProp("disable-copyprop", cl::Hidden,
+    cl::desc("Disable Copy Propagation pass"));
+static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden,
+    cl::desc("Print LLVM IR produced by the loop-reduce pass"));
+static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden,
+    cl::desc("Print LLVM IR input to isel pass"));
+static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
+    cl::desc("Dump garbage collector data"));
+static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
+    cl::desc("Verify generated machine code"),
+    cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
+
+/// Allow standard passes to be disabled by command line options. This supports
+/// simple binary flags that either suppress the pass or do nothing.
+/// i.e. -disable-mypass=false has no effect.
+/// These should be converted to boolOrDefault in order to use applyOverride.
+static AnalysisID applyDisable(AnalysisID ID, bool Override) {
+  if (Override)
+    return &NoPassID;
+  return ID;
+}
+
+/// Allow Pass selection to be overriden by command line options. This supports
+/// flags with ternary conditions. TargetID is passed through by default. The
+/// pass is suppressed when the option is false. When the option is true, the
+/// StandardID is selected if the target provides no default.
+static AnalysisID applyOverride(AnalysisID TargetID, cl::boolOrDefault Override,
+                                AnalysisID StandardID) {
+  switch (Override) {
+  case cl::BOU_UNSET:
+    return TargetID;
+  case cl::BOU_TRUE:
+    if (TargetID != &NoPassID)
+      return TargetID;
+    if (StandardID == &NoPassID)
+      report_fatal_error("Target cannot enable pass");
+    return StandardID;
+  case cl::BOU_FALSE:
+    return &NoPassID;
+  }
+  llvm_unreachable("Invalid command line option state");
+}
+
+/// Allow standard passes to be disabled by the command line, regardless of who
+/// is adding the pass.
+///
+/// StandardID is the pass identified in the standard pass pipeline and provided
+/// to addPass(). It may be a target-specific ID in the case that the target
+/// directly adds its own pass, but in that case we harmlessly fall through.
+///
+/// TargetID is the pass that the target has configured to override StandardID.
+///
+/// StandardID may be a pseudo ID. In that case TargetID is the name of the real
+/// pass to run. This allows multiple options to control a single pass depending
+/// on where in the pipeline that pass is added.
+static AnalysisID overridePass(AnalysisID StandardID, AnalysisID TargetID) {
+  if (StandardID == &PostRASchedulerID)
+    return applyDisable(TargetID, DisablePostRA);
+
+  if (StandardID == &BranchFolderPassID)
+    return applyDisable(TargetID, DisableBranchFold);
+
+  if (StandardID == &TailDuplicateID)
+    return applyDisable(TargetID, DisableTailDuplicate);
+
+  if (StandardID == &TargetPassConfig::EarlyTailDuplicateID)
+    return applyDisable(TargetID, DisableEarlyTailDup);
+
+  if (StandardID == &MachineBlockPlacementID)
+    return applyDisable(TargetID, DisableCodePlace);
+
+  if (StandardID == &CodePlacementOptID)
+    return applyDisable(TargetID, DisableCodePlace);
+
+  if (StandardID == &StackSlotColoringID)
+    return applyDisable(TargetID, DisableSSC);
+
+  if (StandardID == &DeadMachineInstructionElimID)
+    return applyDisable(TargetID, DisableMachineDCE);
+
+  if (StandardID == &MachineLICMID)
+    return applyDisable(TargetID, DisableMachineLICM);
+
+  if (StandardID == &MachineCSEID)
+    return applyDisable(TargetID, DisableMachineCSE);
+
+  if (StandardID == &MachineSchedulerID)
+    return applyOverride(TargetID, EnableMachineSched, StandardID);
+
+  if (StandardID == &TargetPassConfig::PostRAMachineLICMID)
+    return applyDisable(TargetID, DisablePostRAMachineLICM);
+
+  if (StandardID == &MachineSinkingID)
+    return applyDisable(TargetID, DisableMachineSink);
+
+  if (StandardID == &MachineCopyPropagationID)
+    return applyDisable(TargetID, DisableCopyProp);
+
+  return TargetID;
+}
+
 //===---------------------------------------------------------------------===//
+/// TargetPassConfig
+//===---------------------------------------------------------------------===//
+
+INITIALIZE_PASS(TargetPassConfig, "targetpassconfig",
+                "Target Pass Configuration", false, false)
+char TargetPassConfig::ID = 0;
+
+static char NoPassIDAnchor = 0;
+char &llvm::NoPassID = NoPassIDAnchor;
+
+// Pseudo Pass IDs.
+char TargetPassConfig::EarlyTailDuplicateID = 0;
+char TargetPassConfig::PostRAMachineLICMID = 0;
+
+namespace llvm {
+class PassConfigImpl {
+public:
+  // List of passes explicitly substituted by this target. Normally this is
+  // empty, but it is a convenient way to suppress or replace specific passes
+  // that are part of a standard pass pipeline without overridding the entire
+  // pipeline. This mechanism allows target options to inherit a standard pass's
+  // user interface. For example, a target may disable a standard pass by
+  // default by substituting NoPass, and the user may still enable that standard
+  // pass with an explicit command line option.
+  DenseMap<AnalysisID,AnalysisID> TargetPasses;
+};
+} // namespace llvm
+
+// Out of line virtual method.
+TargetPassConfig::~TargetPassConfig() {
+  delete Impl;
+}
+
+// Out of line constructor provides default values for pass options and
+// registers all common codegen passes.
+TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
+  : ImmutablePass(ID), TM(tm), PM(pm), Impl(0), Initialized(false),
+    DisableVerify(false),
+    EnableTailMerge(true) {
+
+  Impl = new PassConfigImpl();
+
+  // Register all target independent codegen passes to activate their PassIDs,
+  // including this pass itself.
+  initializeCodeGen(*PassRegistry::getPassRegistry());
+
+  // Substitute Pseudo Pass IDs for real ones.
+  substitutePass(EarlyTailDuplicateID, TailDuplicateID);
+  substitutePass(PostRAMachineLICMID, MachineLICMID);
+
+  // Temporarily disable experimental passes.
+  substitutePass(MachineSchedulerID, NoPassID);
+}
+
+/// createPassConfig - Create a pass configuration object to be used by
+/// addPassToEmitX methods for generating a pipeline of CodeGen passes.
+///
+/// Targets may override this to extend TargetPassConfig.
+TargetPassConfig *LLVMTargetMachine::createPassConfig(PassManagerBase &PM) {
+  return new TargetPassConfig(this, PM);
+}
+
+TargetPassConfig::TargetPassConfig()
+  : ImmutablePass(ID), PM(*(PassManagerBase*)0) {
+  llvm_unreachable("TargetPassConfig should not be constructed on-the-fly");
+}
+
+// Helper to verify the analysis is really immutable.
+void TargetPassConfig::setOpt(bool &Opt, bool Val) {
+  assert(!Initialized && "PassConfig is immutable");
+  Opt = Val;
+}
+
+void TargetPassConfig::substitutePass(char &StandardID, char &TargetID) {
+  Impl->TargetPasses[&StandardID] = &TargetID;
+}
+
+AnalysisID TargetPassConfig::getPassSubstitution(AnalysisID ID) const {
+  DenseMap<AnalysisID, AnalysisID>::const_iterator
+    I = Impl->TargetPasses.find(ID);
+  if (I == Impl->TargetPasses.end())
+    return ID;
+  return I->second;
+}
+
+/// Add a CodeGen pass at this point in the pipeline after checking for target
+/// and command line overrides.
+AnalysisID TargetPassConfig::addPass(char &ID) {
+  assert(!Initialized && "PassConfig is immutable");
+
+  AnalysisID TargetID = getPassSubstitution(&ID);
+  AnalysisID FinalID = overridePass(&ID, TargetID);
+  if (FinalID == &NoPassID)
+    return FinalID;
+
+  Pass *P = Pass::createPass(FinalID);
+  if (!P)
+    llvm_unreachable("Pass ID not registered");
+  PM.add(P);
+  return FinalID;
+}
+
+void TargetPassConfig::printAndVerify(const char *Banner) const {
+  if (TM->shouldPrintMachineCode())
+    PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
+
+  if (VerifyMachineCode)
+    PM.add(createMachineVerifierPass(Banner));
+}
+
+/// Add common target configurable passes that perform LLVM IR to IR transforms
+/// following machine independent optimization.
+void TargetPassConfig::addIRPasses() {
+  // Basic AliasAnalysis support.
+  // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
+  // BasicAliasAnalysis wins if they disagree. This is intended to help
+  // support "obvious" type-punning idioms.
+  PM.add(createTypeBasedAliasAnalysisPass());
+  PM.add(createBasicAliasAnalysisPass());
+
+  // Before running any passes, run the verifier to determine if the input
+  // coming from the front-end and/or optimizer is valid.
+  if (!DisableVerify)
+    PM.add(createVerifierPass());
+
+  // Run loop strength reduction before anything else.
+  if (getOptLevel() != CodeGenOpt::None && !DisableLSR) {
+    PM.add(createLoopStrengthReducePass(getTargetLowering()));
+    if (PrintLSR)
+      PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
+  }
+
+  PM.add(createGCLoweringPass());
+
+  // Make sure that no unreachable blocks are instruction selected.
+  PM.add(createUnreachableBlockEliminationPass());
+}
+
+/// Add common passes that perform LLVM IR to IR transforms in preparation for
+/// instruction selection.
+void TargetPassConfig::addISelPrepare() {
+  if (getOptLevel() != CodeGenOpt::None && !DisableCGP)
+    PM.add(createCodeGenPreparePass(getTargetLowering()));
+
+  PM.add(createStackProtectorPass(getTargetLowering()));
+
+  addPreISel();
+
+  if (PrintISelInput)
+    PM.add(createPrintFunctionPass("\n\n"
+                                   "*** Final LLVM Code input to ISel ***\n",
+                                   &dbgs()));
+
+  // All passes which modify the LLVM IR are now complete; run the verifier
+  // to ensure that the IR is valid.
+  if (!DisableVerify)
+    PM.add(createVerifierPass());
+}
+
+/// Add the complete set of target-independent postISel code generator passes.
 ///
-/// RegisterRegAlloc class - Track the registration of register allocators.
+/// This can be read as the standard order of major LLVM CodeGen stages. Stages
+/// with nontrivial configuration or multiple passes are broken out below in
+/// add%Stage routines.
 ///
+/// Any TargetPassConfig::addXX routine may be overriden by the Target. The
+/// addPre/Post methods with empty header implementations allow injecting
+/// target-specific fixups just before or after major stages. Additionally,
+/// targets have the flexibility to change pass order within a stage by
+/// overriding default implementation of add%Stage routines below. Each
+/// technique has maintainability tradeoffs because alternate pass orders are
+/// not well supported. addPre/Post works better if the target pass is easily
+/// tied to a common pass. But if it has subtle dependencies on multiple passes,
+/// the target should override the stage instead.
+///
+/// TODO: We could use a single addPre/Post(ID) hook to allow pass injection
+/// before/after any target-independent pass. But it's currently overkill.
+void TargetPassConfig::addMachinePasses() {
+  // Print the instruction selected machine code...
+  printAndVerify("After Instruction Selection");
+
+  // Expand pseudo-instructions emitted by ISel.
+  addPass(ExpandISelPseudosID);
+
+  // Add passes that optimize machine instructions in SSA form.
+  if (getOptLevel() != CodeGenOpt::None) {
+    addMachineSSAOptimization();
+  }
+  else {
+    // If the target requests it, assign local variables to stack slots relative
+    // to one another and simplify frame index references where possible.
+    addPass(LocalStackSlotAllocationID);
+  }
+
+  // Run pre-ra passes.
+  if (addPreRegAlloc())
+    printAndVerify("After PreRegAlloc passes");
+
+  // Run register allocation and passes that are tightly coupled with it,
+  // including phi elimination and scheduling.
+  if (getOptimizeRegAlloc())
+    addOptimizedRegAlloc(createRegAllocPass(true));
+  else
+    addFastRegAlloc(createRegAllocPass(false));
+
+  // Run post-ra passes.
+  if (addPostRegAlloc())
+    printAndVerify("After PostRegAlloc passes");
+
+  // Insert prolog/epilog code.  Eliminate abstract frame index references...
+  addPass(PrologEpilogCodeInserterID);
+  printAndVerify("After PrologEpilogCodeInserter");
+
+  /// Add passes that optimize machine instructions after register allocation.
+  if (getOptLevel() != CodeGenOpt::None)
+    addMachineLateOptimization();
+
+  // Expand pseudo instructions before second scheduling pass.
+  addPass(ExpandPostRAPseudosID);
+  printAndVerify("After ExpandPostRAPseudos");
+
+  // Run pre-sched2 passes.
+  if (addPreSched2())
+    printAndVerify("After PreSched2 passes");
+
+  // Second pass scheduler.
+  if (getOptLevel() != CodeGenOpt::None) {
+    addPass(PostRASchedulerID);
+    printAndVerify("After PostRAScheduler");
+  }
+
+  // GC
+  addPass(GCMachineCodeAnalysisID);
+  if (PrintGCInfo)
+    PM.add(createGCInfoPrinter(dbgs()));
+
+  // Basic block placement.
+  if (getOptLevel() != CodeGenOpt::None)
+    addBlockPlacement();
+
+  if (addPreEmitPass())
+    printAndVerify("After PreEmit passes");
+}
+
+/// Add passes that optimize machine instructions in SSA form.
+void TargetPassConfig::addMachineSSAOptimization() {
+  // Pre-ra tail duplication.
+  if (addPass(EarlyTailDuplicateID) != &NoPassID)
+    printAndVerify("After Pre-RegAlloc TailDuplicate");
+
+  // Optimize PHIs before DCE: removing dead PHI cycles may make more
+  // instructions dead.
+  addPass(OptimizePHIsID);
+
+  // If the target requests it, assign local variables to stack slots relative
+  // to one another and simplify frame index references where possible.
+  addPass(LocalStackSlotAllocationID);
+
+  // With optimization, dead code should already be eliminated. However
+  // there is one known exception: lowered code for arguments that are only
+  // used by tail calls, where the tail calls reuse the incoming stack
+  // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
+  addPass(DeadMachineInstructionElimID);
+  printAndVerify("After codegen DCE pass");
+
+  addPass(MachineLICMID);
+  addPass(MachineCSEID);
+  addPass(MachineSinkingID);
+  printAndVerify("After Machine LICM, CSE and Sinking passes");
+
+  addPass(PeepholeOptimizerID);
+  printAndVerify("After codegen peephole optimization pass");
+}
+
+//===---------------------------------------------------------------------===//
+/// Register Allocation Pass Configuration
 //===---------------------------------------------------------------------===//
+
+bool TargetPassConfig::getOptimizeRegAlloc() const {
+  switch (OptimizeRegAlloc) {
+  case cl::BOU_UNSET: return getOptLevel() != CodeGenOpt::None;
+  case cl::BOU_TRUE:  return true;
+  case cl::BOU_FALSE: return false;
+  }
+  llvm_unreachable("Invalid optimize-regalloc state");
+}
+
+/// RegisterRegAlloc's global Registry tracks allocator registration.
 MachinePassRegistry RegisterRegAlloc::Registry;
 
-static FunctionPass *createDefaultRegisterAllocator() { return 0; }
+/// A dummy default pass factory indicates whether the register allocator is
+/// overridden on the command line.
+static FunctionPass *useDefaultRegisterAllocator() { return 0; }
 static RegisterRegAlloc
 defaultRegAlloc("default",
                 "pick register allocator based on -O option",
-                createDefaultRegisterAllocator);
+                useDefaultRegisterAllocator);
 
-//===---------------------------------------------------------------------===//
-///
-/// RegAlloc command line options.
-///
-//===---------------------------------------------------------------------===//
+/// -regalloc=... command line option.
 static cl::opt<RegisterRegAlloc::FunctionPassCtor, false,
                RegisterPassParser<RegisterRegAlloc> >
 RegAlloc("regalloc",
-         cl::init(&createDefaultRegisterAllocator),
+         cl::init(&useDefaultRegisterAllocator),
          cl::desc("Register allocator to use"));
 
 
-//===---------------------------------------------------------------------===//
+/// Instantiate the default register allocator pass for this target for either
+/// the optimized or unoptimized allocation path. This will be added to the pass
+/// manager by addFastRegAlloc in the unoptimized case or addOptimizedRegAlloc
+/// in the optimized case.
 ///
-/// createRegisterAllocator - choose the appropriate register allocator.
+/// A target that uses the standard regalloc pass order for fast or optimized
+/// allocation may still override this for per-target regalloc
+/// selection. But -regalloc=... always takes precedence.
+FunctionPass *TargetPassConfig::createTargetRegisterAllocator(bool Optimized) {
+  if (Optimized)
+    return createGreedyRegisterAllocator();
+  else
+    return createFastRegisterAllocator();
+}
+
+/// Find and instantiate the register allocation pass requested by this target
+/// at the current optimization level.  Different register allocators are
+/// defined as separate passes because they may require different analysis.
 ///
-//===---------------------------------------------------------------------===//
-FunctionPass *llvm::createRegisterAllocator(CodeGenOpt::Level OptLevel) {
+/// This helper ensures that the regalloc= option is always available,
+/// even for targets that override the default allocator.
+///
+/// FIXME: When MachinePassRegistry register pass IDs instead of function ptrs,
+/// this can be folded into addPass.
+FunctionPass *TargetPassConfig::createRegAllocPass(bool Optimized) {
   RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault();
 
+  // Initialize the global default.
   if (!Ctor) {
     Ctor = RegAlloc;
     RegisterRegAlloc::setDefault(RegAlloc);
   }
+  if (Ctor != useDefaultRegisterAllocator)
+    return Ctor();
 
-  // This forces linking of the linear scan register allocator,
-  // so -regalloc=linearscan still works in clang.
-  if (Ctor == createLinearScanRegisterAllocator)
-    return createLinearScanRegisterAllocator();
+  // With no -regalloc= override, ask the target for a regalloc pass.
+  return createTargetRegisterAllocator(Optimized);
+}
 
-  if (Ctor != createDefaultRegisterAllocator)
-    return Ctor();
+/// Add the minimum set of target-independent passes that are required for
+/// register allocation. No coalescing or scheduling.
+void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
+  addPass(PHIEliminationID);
+  addPass(TwoAddressInstructionPassID);
 
-  // When the 'default' allocator is requested, pick one based on OptLevel.
-  switch (OptLevel) {
-  case CodeGenOpt::None:
-    return createFastRegisterAllocator();
-  default:
-    return createGreedyRegisterAllocator();
+  PM.add(RegAllocPass);
+  printAndVerify("After Register Allocation");
+}
+
+/// Add standard target-independent passes that are tightly coupled with
+/// optimized register allocation, including coalescing, machine instruction
+/// scheduling, and register allocation itself.
+void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
+  // LiveVariables currently requires pure SSA form.
+  //
+  // FIXME: Once TwoAddressInstruction pass no longer uses kill flags,
+  // LiveVariables can be removed completely, and LiveIntervals can be directly
+  // computed. (We still either need to regenerate kill flags after regalloc, or
+  // preferably fix the scavenger to not depend on them).
+  addPass(LiveVariablesID);
+
+  // Add passes that move from transformed SSA into conventional SSA. This is a
+  // "copy coalescing" problem.
+  //
+  if (!EnableStrongPHIElim) {
+    // Edge splitting is smarter with machine loop info.
+    addPass(MachineLoopInfoID);
+    addPass(PHIEliminationID);
+  }
+  addPass(TwoAddressInstructionPassID);
+
+  // FIXME: Either remove this pass completely, or fix it so that it works on
+  // SSA form. We could modify LiveIntervals to be independent of this pass, But
+  // it would be even better to simply eliminate *all* IMPLICIT_DEFs before
+  // leaving SSA.
+  addPass(ProcessImplicitDefsID);
+
+  if (EnableStrongPHIElim)
+    addPass(StrongPHIEliminationID);
+
+  addPass(RegisterCoalescerID);
+
+  // PreRA instruction scheduling.
+  if (addPass(MachineSchedulerID) != &NoPassID)
+    printAndVerify("After Machine Scheduling");
+
+  // Add the selected register allocation pass.
+  PM.add(RegAllocPass);
+  printAndVerify("After Register Allocation");
+
+  // FinalizeRegAlloc is convenient until MachineInstrBundles is more mature,
+  // but eventually, all users of it should probably be moved to addPostRA and
+  // it can go away.  Currently, it's the intended place for targets to run
+  // FinalizeMachineBundles, because passes other than MachineScheduling an
+  // RegAlloc itself may not be aware of bundles.
+  if (addFinalizeRegAlloc())
+    printAndVerify("After RegAlloc finalization");
+
+  // Perform stack slot coloring and post-ra machine LICM.
+  //
+  // FIXME: Re-enable coloring with register when it's capable of adding
+  // kill markers.
+  addPass(StackSlotColoringID);
+
+  // Run post-ra machine LICM to hoist reloads / remats.
+  //
+  // FIXME: can this move into MachineLateOptimization?
+  addPass(PostRAMachineLICMID);
+
+  printAndVerify("After StackSlotColoring and postra Machine LICM");
+}
+
+//===---------------------------------------------------------------------===//
+/// Post RegAlloc Pass Configuration
+//===---------------------------------------------------------------------===//
+
+/// Add passes that optimize machine instructions after register allocation.
+void TargetPassConfig::addMachineLateOptimization() {
+  // Branch folding must be run after regalloc and prolog/epilog insertion.
+  if (addPass(BranchFolderPassID) != &NoPassID)
+    printAndVerify("After BranchFolding");
+
+  // Tail duplication.
+  if (addPass(TailDuplicateID) != &NoPassID)
+    printAndVerify("After TailDuplicate");
+
+  // Copy propagation.
+  if (addPass(MachineCopyPropagationID) != &NoPassID)
+    printAndVerify("After copy propagation pass");
+}
+
+/// Add standard basic block placement passes.
+void TargetPassConfig::addBlockPlacement() {
+  AnalysisID ID = &NoPassID;
+  if (EnableBlockPlacement) {
+    // MachineBlockPlacement is an experimental pass which is disabled by
+    // default currently. Eventually it should subsume CodePlacementOpt, so
+    // when enabled, the other is disabled.
+    ID = addPass(MachineBlockPlacementID);
+  } else {
+    ID = addPass(CodePlacementOptID);
+  }
+  if (ID != &NoPassID) {
+    // Run a separate pass to collect block placement statistics.
+    if (EnableBlockPlacementStats)
+      addPass(MachineBlockPlacementStatsID);
+
+    printAndVerify("After machine block placement.");
   }
 }
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index bbc7ce2d0a42..9c5c029000c0 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -39,7 +39,7 @@
 //   =>
 //     v1 = bitcast v0
 //        = v0
-// 
+//
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "peephole-opt"
@@ -68,7 +68,7 @@ DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
 STATISTIC(NumReuse,      "Number of extension results reused");
 STATISTIC(NumBitcasts,   "Number of bitcasts eliminated");
 STATISTIC(NumCmps,       "Number of compares eliminated");
-STATISTIC(NumImmFold,    "Number of move immediate foled");
+STATISTIC(NumImmFold,    "Number of move immediate folded");
 
 namespace {
   class PeepholeOptimizer : public MachineFunctionPass {
@@ -109,22 +109,19 @@ namespace {
 }
 
 char PeepholeOptimizer::ID = 0;
+char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID;
 INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts",
                 "Peephole Optimizations", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
 INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts",
                 "Peephole Optimizations", false, false)
 
-FunctionPass *llvm::createPeepholeOptimizerPass() {
-  return new PeepholeOptimizer();
-}
-
 /// OptimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads
 /// a single register and writes a single register and it does not modify the
 /// source, and if the source value is preserved as a sub-register of the
 /// result, then replace all reachable uses of the source with the subreg of the
 /// result.
-/// 
+///
 /// Do not generate an EXTRACT that is used only in a debug use, as this changes
 /// the code. Since this code does not currently share EXTRACTs, just ignore all
 /// debug uses.
@@ -134,7 +131,7 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
   unsigned SrcReg, DstReg, SubIdx;
   if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
     return false;
-  
+
   if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
       TargetRegisterInfo::isPhysicalRegister(SrcReg))
     return false;
@@ -240,6 +237,10 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
       if (PHIBBs.count(UseMBB))
         continue;
 
+      // About to add uses of DstReg, clear DstReg's kill flags.
+      if (!Changed)
+        MRI->clearKillFlags(DstReg);
+
       unsigned NewVR = MRI->createVirtualRegister(RC);
       BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
               TII->get(TargetOpcode::COPY), NewVR)
@@ -292,7 +293,7 @@ bool PeepholeOptimizer::OptimizeBitcastInstr(MachineInstr *MI,
   assert(Def && Src && "Malformed bitcast instruction!");
 
   MachineInstr *DefMI = MRI->getVRegDef(Src);
-  if (!DefMI || !DefMI->getDesc().isBitcast())
+  if (!DefMI || !DefMI->isBitcast())
     return false;
 
   unsigned SrcSrc = 0;
@@ -353,7 +354,7 @@ bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
                                         SmallSet<unsigned, 4> &ImmDefRegs,
                                  DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
   const MCInstrDesc &MCID = MI->getDesc();
-  if (!MCID.isMoveImmediate())
+  if (!MI->isMoveImmediate())
     return false;
   if (MCID.getNumDefs() != 1)
     return false;
@@ -363,7 +364,7 @@ bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
     ImmDefRegs.insert(Reg);
     return true;
   }
-  
+
   return false;
 }
 
@@ -395,7 +396,7 @@ bool PeepholeOptimizer::FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
 bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
   if (DisablePeephole)
     return false;
-  
+
   TM  = &MF.getTarget();
   TII = TM->getInstrInfo();
   MRI = &MF.getRegInfo();
@@ -408,7 +409,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
   DenseMap<unsigned, MachineInstr*> ImmDefMIs;
   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
     MachineBasicBlock *MBB = &*I;
-    
+
     bool SeenMoveImm = false;
     LocalMIs.clear();
     ImmDefRegs.clear();
@@ -428,17 +429,15 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
         continue;
       }
 
-      const MCInstrDesc &MCID = MI->getDesc();
-
-      if (MCID.isBitcast()) {
+      if (MI->isBitcast()) {
         if (OptimizeBitcastInstr(MI, MBB)) {
           // MI is deleted.
           LocalMIs.erase(MI);
           Changed = true;
           MII = First ? I->begin() : llvm::next(PMII);
           continue;
-        }        
-      } else if (MCID.isCompare()) {
+        }
+      } else if (MI->isCompare()) {
         if (OptimizeCmpInstr(MI, MBB)) {
           // MI is deleted.
           LocalMIs.erase(MI);
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index c73e87733cb4..24d3e5ab0c9d 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -23,7 +23,6 @@
 #include "AggressiveAntiDepBreaker.h"
 #include "CriticalAntiDepBreaker.h"
 #include "RegisterClassInfo.h"
-#include "ScheduleDAGInstrs.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/LatencyPriorityQueue.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
@@ -32,6 +31,7 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Target/TargetLowering.h"
@@ -45,7 +45,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/Statistic.h"
-#include <set>
 using namespace llvm;
 
 STATISTIC(NumNoops, "Number of noops inserted");
@@ -82,16 +81,15 @@ namespace {
     AliasAnalysis *AA;
     const TargetInstrInfo *TII;
     RegisterClassInfo RegClassInfo;
-    CodeGenOpt::Level OptLevel;
 
   public:
     static char ID;
-    PostRAScheduler(CodeGenOpt::Level ol) :
-      MachineFunctionPass(ID), OptLevel(ol) {}
+    PostRAScheduler() : MachineFunctionPass(ID) {}
 
     void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
       AU.addRequired<AliasAnalysis>();
+      AU.addRequired<TargetPassConfig>();
       AU.addRequired<MachineDominatorTree>();
       AU.addPreserved<MachineDominatorTree>();
       AU.addRequired<MachineLoopInfo>();
@@ -99,10 +97,6 @@ namespace {
       MachineFunctionPass::getAnalysisUsage(AU);
     }
 
-    const char *getPassName() const {
-      return "Post RA top-down list latency scheduler";
-    }
-
     bool runOnMachineFunction(MachineFunction &Fn);
   };
   char PostRAScheduler::ID = 0;
@@ -130,36 +124,49 @@ namespace {
     /// AA - AliasAnalysis for making memory reference queries.
     AliasAnalysis *AA;
 
-    /// KillIndices - The index of the most recent kill (proceding bottom-up),
-    /// or ~0u if the register is not live.
-    std::vector<unsigned> KillIndices;
+    /// LiveRegs - true if the register is live.
+    BitVector LiveRegs;
+
+    /// The schedule. Null SUnit*'s represent noop instructions.
+    std::vector<SUnit*> Sequence;
 
   public:
     SchedulePostRATDList(
       MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
       AliasAnalysis *AA, const RegisterClassInfo&,
       TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
-      SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs);
+      SmallVectorImpl<const TargetRegisterClass*> &CriticalPathRCs);
 
     ~SchedulePostRATDList();
 
-    /// StartBlock - Initialize register live-range state for scheduling in
+    /// startBlock - Initialize register live-range state for scheduling in
     /// this block.
     ///
-    void StartBlock(MachineBasicBlock *BB);
+    void startBlock(MachineBasicBlock *BB);
+
+    /// Initialize the scheduler state for the next scheduling region.
+    virtual void enterRegion(MachineBasicBlock *bb,
+                             MachineBasicBlock::iterator begin,
+                             MachineBasicBlock::iterator end,
+                             unsigned endcount);
+
+    /// Notify that the scheduler has finished scheduling the current region.
+    virtual void exitRegion();
 
     /// Schedule - Schedule the instruction range using list scheduling.
     ///
-    void Schedule();
+    void schedule();
+
+    void EmitSchedule();
 
     /// Observe - Update liveness information to account for the current
     /// instruction, which will not be scheduled.
     ///
     void Observe(MachineInstr *MI, unsigned Count);
 
-    /// FinishBlock - Clean up register live-range state.
+    /// finishBlock - Clean up register live-range state.
     ///
-    void FinishBlock();
+    void finishBlock();
 
     /// FixupKills - Fix register kill flags that have been made
     /// invalid due to scheduling
@@ -177,16 +184,23 @@ namespace {
     // adjustments may be made to the instruction if necessary. Return
     // true if the operand has been deleted, false if not.
     bool ToggleKillFlag(MachineInstr *MI, MachineOperand &MO);
+
+    void dumpSchedule() const;
   };
 }
 
+char &llvm::PostRASchedulerID = PostRAScheduler::ID;
+
+INITIALIZE_PASS(PostRAScheduler, "post-RA-sched",
+                "Post RA top-down list latency scheduler", false, false)
+
 SchedulePostRATDList::SchedulePostRATDList(
   MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
   AliasAnalysis *AA, const RegisterClassInfo &RCI,
   TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
-  SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs)
-  : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), AA(AA),
-    KillIndices(TRI->getNumRegs())
+  SmallVectorImpl<const TargetRegisterClass*> &CriticalPathRCs)
+  : ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), Topo(SUnits), AA(AA),
+    LiveRegs(TRI->getNumRegs())
 {
   const TargetMachine &TM = MF.getTarget();
   const InstrItineraryData *InstrItins = TM.getInstrItineraryData();
@@ -204,16 +218,48 @@ SchedulePostRATDList::~SchedulePostRATDList() {
   delete AntiDepBreak;
 }
 
+/// Initialize state associated with the next scheduling region.
+void SchedulePostRATDList::enterRegion(MachineBasicBlock *bb,
+                 MachineBasicBlock::iterator begin,
+                 MachineBasicBlock::iterator end,
+                 unsigned endcount) {
+  ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount);
+  Sequence.clear();
+}
+
+/// Print the schedule before exiting the region.
+void SchedulePostRATDList::exitRegion() {
+  DEBUG({
+      dbgs() << "*** Final schedule ***\n";
+      dumpSchedule();
+      dbgs() << '\n';
+    });
+  ScheduleDAGInstrs::exitRegion();
+}
+
+/// dumpSchedule - dump the scheduled Sequence.
+void SchedulePostRATDList::dumpSchedule() const {
+  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+    if (SUnit *SU = Sequence[i])
+      SU->dump(this);
+    else
+      dbgs() << "**** NOOP ****\n";
+  }
+}
+
 bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
   TII = Fn.getTarget().getInstrInfo();
   MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
   MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
   AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();
+  TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
+
   RegClassInfo.runOnMachineFunction(Fn);
 
   // Check for explicit enable/disable of post-ra scheduling.
-  TargetSubtargetInfo::AntiDepBreakMode AntiDepMode = TargetSubtargetInfo::ANTIDEP_NONE;
-  SmallVector<TargetRegisterClass*, 4> CriticalPathRCs;
+  TargetSubtargetInfo::AntiDepBreakMode AntiDepMode =
+    TargetSubtargetInfo::ANTIDEP_NONE;
+  SmallVector<const TargetRegisterClass*, 4> CriticalPathRCs;
   if (EnablePostRAScheduler.getPosition() > 0) {
     if (!EnablePostRAScheduler)
       return false;
@@ -221,7 +267,8 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
     // Check that post-RA scheduling is enabled for this target.
     // This may upgrade the AntiDepMode.
     const TargetSubtargetInfo &ST = Fn.getTarget().getSubtarget<TargetSubtargetInfo>();
-    if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode, CriticalPathRCs))
+    if (!ST.enablePostRAScheduler(PassConfig->getOptLevel(), AntiDepMode,
+                                  CriticalPathRCs))
       return false;
   }
 
@@ -248,13 +295,13 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
       static int bbcnt = 0;
       if (bbcnt++ % DebugDiv != DebugMod)
         continue;
-      dbgs() << "*** DEBUG scheduling " << Fn.getFunction()->getNameStr() <<
-        ":BB#" << MBB->getNumber() << " ***\n";
+      dbgs() << "*** DEBUG scheduling " << Fn.getFunction()->getName()
+             << ":BB#" << MBB->getNumber() << " ***\n";
     }
 #endif
 
     // Initialize register live-range state for scheduling in this block.
-    Scheduler.StartBlock(MBB);
+    Scheduler.startBlock(MBB);
 
     // Schedule each sequence of instructions not interrupted by a label
     // or anything else that effectively needs to shut down scheduling.
@@ -262,8 +309,13 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
     unsigned Count = MBB->size(), CurrentCount = Count;
     for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) {
       MachineInstr *MI = llvm::prior(I);
-      if (TII->isSchedulingBoundary(MI, MBB, Fn)) {
-        Scheduler.Run(MBB, I, Current, CurrentCount);
+      // Calls are not scheduling boundaries before register allocation, but
+      // post-ra we don't gain anything by scheduling across calls since we
+      // don't need to worry about register pressure.
+      if (MI->isCall() || TII->isSchedulingBoundary(MI, MBB, Fn)) {
+        Scheduler.enterRegion(MBB, I, Current, CurrentCount);
+        Scheduler.schedule();
+        Scheduler.exitRegion();
         Scheduler.EmitSchedule();
         Current = MI;
         CurrentCount = Count - 1;
@@ -271,15 +323,19 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
       }
       I = MI;
       --Count;
+      if (MI->isBundle())
+        Count -= MI->getBundleSize();
     }
     assert(Count == 0 && "Instruction count mismatch!");
     assert((MBB->begin() == Current || CurrentCount != 0) &&
            "Instruction count mismatch!");
-    Scheduler.Run(MBB, MBB->begin(), Current, CurrentCount);
+    Scheduler.enterRegion(MBB, MBB->begin(), Current, CurrentCount);
+    Scheduler.schedule();
+    Scheduler.exitRegion();
     Scheduler.EmitSchedule();
 
     // Clean up register live-range state.
-    Scheduler.FinishBlock();
+    Scheduler.finishBlock();
 
     // Update register kills
     Scheduler.FixupKills(MBB);
@@ -291,9 +347,9 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
 /// StartBlock - Initialize register live-range state for scheduling in
 /// this block.
 ///
-void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) {
+void SchedulePostRATDList::startBlock(MachineBasicBlock *BB) {
   // Call the superclass.
-  ScheduleDAGInstrs::StartBlock(BB);
+  ScheduleDAGInstrs::startBlock(BB);
 
   // Reset the hazard recognizer and anti-dep breaker.
   HazardRec->Reset();
@@ -303,14 +359,14 @@ void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) {
 
 /// Schedule - Schedule the instruction range using list scheduling.
 ///
-void SchedulePostRATDList::Schedule() {
+void SchedulePostRATDList::schedule() {
   // Build the scheduling graph.
-  BuildSchedGraph(AA);
+  buildSchedGraph(AA);
 
   if (AntiDepBreak != NULL) {
     unsigned Broken =
-      AntiDepBreak->BreakAntiDependencies(SUnits, Begin, InsertPos,
-                                          InsertPosIndex, DbgValues);
+      AntiDepBreak->BreakAntiDependencies(SUnits, RegionBegin, RegionEnd,
+                                          EndIndex, DbgValues);
 
     if (Broken != 0) {
       // We made changes. Update the dependency graph.
@@ -319,11 +375,8 @@ void SchedulePostRATDList::Schedule() {
       // the def's anti-dependence *and* output-dependence edges due to
       // that register, and add new anti-dependence and output-dependence
       // edges based on the next live range of the register.
-      SUnits.clear();
-      Sequence.clear();
-      EntrySU = SUnit();
-      ExitSU = SUnit();
-      BuildSchedGraph(AA);
+      ScheduleDAG::clearDAG();
+      buildSchedGraph(AA);
 
       NumFixedAnti += Broken;
     }
@@ -343,38 +396,36 @@ void SchedulePostRATDList::Schedule() {
 ///
 void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) {
   if (AntiDepBreak != NULL)
-    AntiDepBreak->Observe(MI, Count, InsertPosIndex);
+    AntiDepBreak->Observe(MI, Count, EndIndex);
 }
 
 /// FinishBlock - Clean up register live-range state.
 ///
-void SchedulePostRATDList::FinishBlock() {
+void SchedulePostRATDList::finishBlock() {
   if (AntiDepBreak != NULL)
     AntiDepBreak->FinishBlock();
 
   // Call the superclass.
-  ScheduleDAGInstrs::FinishBlock();
+  ScheduleDAGInstrs::finishBlock();
 }
 
 /// StartBlockForKills - Initialize register live-range state for updating kills
 ///
 void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) {
-  // Initialize the indices to indicate that no registers are live.
-  for (unsigned i = 0; i < TRI->getNumRegs(); ++i)
-    KillIndices[i] = ~0u;
+  // Start with no live registers.
+  LiveRegs.reset();
 
   // Determine the live-out physregs for this block.
-  if (!BB->empty() && BB->back().getDesc().isReturn()) {
+  if (!BB->empty() && BB->back().isReturn()) {
     // In a return block, examine the function live-out regs.
     for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
            E = MRI.liveout_end(); I != E; ++I) {
       unsigned Reg = *I;
-      KillIndices[Reg] = BB->size();
+      LiveRegs.set(Reg);
       // Repeat, for all subregs.
-      for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
-           *Subreg; ++Subreg) {
-        KillIndices[*Subreg] = BB->size();
-      }
+      for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
+           *Subreg; ++Subreg)
+        LiveRegs.set(*Subreg);
     }
   }
   else {
@@ -384,12 +435,11 @@ void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) {
       for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
              E = (*SI)->livein_end(); I != E; ++I) {
         unsigned Reg = *I;
-        KillIndices[Reg] = BB->size();
+        LiveRegs.set(Reg);
         // Repeat, for all subregs.
-        for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
-             *Subreg; ++Subreg) {
-          KillIndices[*Subreg] = BB->size();
-        }
+        for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
+             *Subreg; ++Subreg)
+          LiveRegs.set(*Subreg);
       }
     }
   }
@@ -404,7 +454,7 @@ bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI,
   }
 
   // If MO itself is live, clear the kill flag...
-  if (KillIndices[MO.getReg()] != ~0u) {
+  if (LiveRegs.test(MO.getReg())) {
     MO.setIsKill(false);
     return false;
   }
@@ -414,9 +464,9 @@ bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI,
   MO.setIsKill(false);
   bool AllDead = true;
   const unsigned SuperReg = MO.getReg();
-  for (const unsigned *Subreg = TRI->getSubRegisters(SuperReg);
+  for (const uint16_t *Subreg = TRI->getSubRegisters(SuperReg);
        *Subreg; ++Subreg) {
-    if (KillIndices[*Subreg] != ~0u) {
+    if (LiveRegs.test(*Subreg)) {
       MI->addOperand(MachineOperand::CreateReg(*Subreg,
                                                true  /*IsDef*/,
                                                true  /*IsImp*/,
@@ -437,7 +487,7 @@ bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI,
 void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
   DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n');
 
-  std::set<unsigned> killedRegs;
+  BitVector killedRegs(TRI->getNumRegs());
   BitVector ReservedRegs = TRI->getReservedRegs(MF);
 
   StartBlockForKills(MBB);
@@ -455,6 +505,8 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
     // are completely defined.
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
       MachineOperand &MO = MI->getOperand(i);
+      if (MO.isRegMask())
+        LiveRegs.clearBitsNotInMask(MO.getRegMask());
       if (!MO.isReg()) continue;
       unsigned Reg = MO.getReg();
       if (Reg == 0) continue;
@@ -462,19 +514,18 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
       // Ignore two-addr defs.
       if (MI->isRegTiedToUseOperand(i)) continue;
 
-      KillIndices[Reg] = ~0u;
+      LiveRegs.reset(Reg);
 
       // Repeat for all subregs.
-      for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
-           *Subreg; ++Subreg) {
-        KillIndices[*Subreg] = ~0u;
-      }
+      for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
+           *Subreg; ++Subreg)
+        LiveRegs.reset(*Subreg);
     }
 
     // Examine all used registers and set/clear kill flag. When a
     // register is used multiple times we only set the kill flag on
     // the first use.
-    killedRegs.clear();
+    killedRegs.reset();
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
       MachineOperand &MO = MI->getOperand(i);
       if (!MO.isReg() || !MO.isUse()) continue;
@@ -482,12 +533,12 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
       if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
 
       bool kill = false;
-      if (killedRegs.find(Reg) == killedRegs.end()) {
+      if (!killedRegs.test(Reg)) {
         kill = true;
         // A register is not killed if any subregs are live...
-        for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+        for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
              *Subreg; ++Subreg) {
-          if (KillIndices[*Subreg] != ~0u) {
+          if (LiveRegs.test(*Subreg)) {
             kill = false;
             break;
           }
@@ -496,7 +547,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
         // If subreg is not live, then register is killed if it became
         // live in this instruction
         if (kill)
-          kill = (KillIndices[Reg] == ~0u);
+          kill = !LiveRegs.test(Reg);
       }
 
       if (MO.isKill() != kill) {
@@ -506,7 +557,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
         DEBUG(MI->dump());
       }
 
-      killedRegs.insert(Reg);
+      killedRegs.set(Reg);
     }
 
     // Mark any used register (that is not using undef) and subregs as
@@ -517,12 +568,11 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
       unsigned Reg = MO.getReg();
       if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
 
-      KillIndices[Reg] = Count;
+      LiveRegs.set(Reg);
 
-      for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
-           *Subreg; ++Subreg) {
-        KillIndices[*Subreg] = Count;
-      }
+      for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
+           *Subreg; ++Subreg)
+        LiveRegs.set(*Subreg);
     }
   }
 }
@@ -585,7 +635,7 @@ void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
 
   ReleaseSuccessors(SU);
   SU->isScheduled = true;
-  AvailableQueue.ScheduledNode(SU);
+  AvailableQueue.scheduledNode(SU);
 }
 
 /// ListScheduleTopDown - The main loop of list scheduling for top-down
@@ -699,14 +749,46 @@ void SchedulePostRATDList::ListScheduleTopDown() {
   }
 
 #ifndef NDEBUG
-  VerifySchedule(/*isBottomUp=*/false);
-#endif
+  unsigned ScheduledNodes = VerifyScheduledDAG(/*isBottomUp=*/false);
+  unsigned Noops = 0;
+  for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
+    if (!Sequence[i])
+      ++Noops;
+  assert(Sequence.size() - Noops == ScheduledNodes &&
+         "The number of nodes scheduled doesn't match the expected number!");
+#endif // NDEBUG
 }
 
-//===----------------------------------------------------------------------===//
-//                         Public Constructor Functions
-//===----------------------------------------------------------------------===//
+// EmitSchedule - Emit the machine code in scheduled order.
+void SchedulePostRATDList::EmitSchedule() {
+  RegionBegin = RegionEnd;
+
+  // If first instruction was a DBG_VALUE then put it back.
+  if (FirstDbgValue)
+    BB->splice(RegionEnd, BB, FirstDbgValue);
+
+  // Then re-insert them according to the given schedule.
+  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+    if (SUnit *SU = Sequence[i])
+      BB->splice(RegionEnd, BB, SU->getInstr());
+    else
+      // Null SUnit* is a noop.
+      TII->insertNoop(*BB, RegionEnd);
+
+    // Update the Begin iterator, as the first instruction in the block
+    // may have been scheduled later.
+    if (i == 0)
+      RegionBegin = prior(RegionEnd);
+  }
 
-FunctionPass *llvm::createPostRAScheduler(CodeGenOpt::Level OptLevel) {
-  return new PostRAScheduler(OptLevel);
+  // Reinsert any remaining debug_values.
+  for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
+         DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
+    std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
+    MachineInstr *DbgValue = P.first;
+    MachineBasicBlock::iterator OrigPrivMI = P.second;
+    BB->splice(++OrigPrivMI, BB, DbgValue);
+  }
+  DbgValues.clear();
+  FirstDbgValue = NULL;
 }
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index b1d8c9760225..1ad3479afb4c 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -26,6 +26,8 @@
 using namespace llvm;
 
 char ProcessImplicitDefs::ID = 0;
+char &llvm::ProcessImplicitDefsID = ProcessImplicitDefs::ID;
+
 INITIALIZE_PASS_BEGIN(ProcessImplicitDefs, "processimpdefs",
                 "Process Implicit Definitions", false, false)
 INITIALIZE_PASS_DEPENDENCY(LiveVariables)
@@ -36,7 +38,6 @@ void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
   AU.addPreserved<AliasAnalysis>();
   AU.addPreserved<LiveVariables>();
-  AU.addRequired<LiveVariables>();
   AU.addPreservedID(MachineLoopInfoID);
   AU.addPreservedID(MachineDominatorsID);
   AU.addPreservedID(TwoAddressInstructionPassID);
@@ -50,10 +51,10 @@ ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI,
                                             SmallSet<unsigned, 8> &ImpDefRegs) {
   switch(OpIdx) {
   case 1:
-    return MI->isCopy() && (MI->getOperand(0).getSubReg() == 0 ||
+    return MI->isCopy() && (!MI->getOperand(0).readsReg() ||
                             ImpDefRegs.count(MI->getOperand(0).getReg()));
   case 2:
-    return MI->isSubregToReg() && (MI->getOperand(0).getSubReg() == 0 ||
+    return MI->isSubregToReg() && (!MI->getOperand(0).readsReg() ||
                                   ImpDefRegs.count(MI->getOperand(0).getReg()));
   default: return false;
   }
@@ -66,7 +67,7 @@ static bool isUndefCopy(MachineInstr *MI, unsigned Reg,
     MachineOperand &MO1 = MI->getOperand(1);
     if (MO1.getReg() != Reg)
       return false;
-    if (!MO0.getSubReg() || ImpDefRegs.count(MO0.getReg()))
+    if (!MO0.readsReg() || ImpDefRegs.count(MO0.getReg()))
       return true;
     return false;
   }
@@ -87,7 +88,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
   TII = fn.getTarget().getInstrInfo();
   TRI = fn.getTarget().getRegisterInfo();
   MRI = &fn.getRegInfo();
-  LV = &getAnalysis<LiveVariables>();
+  LV = getAnalysisIfAvailable<LiveVariables>();
 
   SmallSet<unsigned, 8> ImpDefRegs;
   SmallVector<MachineInstr*, 8> ImpDefMIs;
@@ -105,23 +106,24 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
       MachineInstr *MI = &*I;
       ++I;
       if (MI->isImplicitDef()) {
-        if (MI->getOperand(0).getSubReg())
+        ImpDefMIs.push_back(MI);
+        // Is this a sub-register read-modify-write?
+        if (MI->getOperand(0).readsReg())
           continue;
         unsigned Reg = MI->getOperand(0).getReg();
         ImpDefRegs.insert(Reg);
         if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
-          for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS)
+          for (const uint16_t *SS = TRI->getSubRegisters(Reg); *SS; ++SS)
             ImpDefRegs.insert(*SS);
         }
-        ImpDefMIs.push_back(MI);
         continue;
       }
 
       // Eliminate %reg1032:sub<def> = COPY undef.
-      if (MI->isCopy() && MI->getOperand(0).getSubReg()) {
+      if (MI->isCopy() && MI->getOperand(0).readsReg()) {
         MachineOperand &MO = MI->getOperand(1);
         if (MO.isUndef() || ImpDefRegs.count(MO.getReg())) {
-          if (MO.isKill()) {
+          if (LV && MO.isKill()) {
             LiveVariables::VarInfo& vi = LV->getVarInfo(MO.getReg());
             vi.removeKill(MI);
           }
@@ -140,7 +142,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
       bool ChangedToImpDef = false;
       for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
         MachineOperand& MO = MI->getOperand(i);
-        if (!MO.isReg() || (MO.isDef() && !MO.getSubReg()) || MO.isUndef())
+        if (!MO.isReg() || !MO.readsReg())
           continue;
         unsigned Reg = MO.getReg();
         if (!Reg)
@@ -155,8 +157,10 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
             MI->RemoveOperand(j);
           if (isKill) {
             ImpDefRegs.erase(Reg);
-            LiveVariables::VarInfo& vi = LV->getVarInfo(Reg);
-            vi.removeKill(MI);
+            if (LV) {
+              LiveVariables::VarInfo& vi = LV->getVarInfo(Reg);
+              vi.removeKill(MI);
+            }
           }
           ChangedToImpDef = true;
           Changed = true;
@@ -172,10 +176,10 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
           continue;
         }
         if (MO.isKill() || MI->isRegTiedToDefOperand(i)) {
-          // Make sure other uses of 
+          // Make sure other reads of Reg are also marked <undef>.
           for (unsigned j = i+1; j != e; ++j) {
             MachineOperand &MOJ = MI->getOperand(j);
-            if (MOJ.isReg() && MOJ.isUse() && MOJ.getReg() == Reg)
+            if (MOJ.isReg() && MOJ.getReg() == Reg && MOJ.readsReg())
               MOJ.setIsUndef();
           }
           ImpDefRegs.erase(Reg);
@@ -265,7 +269,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
           }
 
           // Update LiveVariables varinfo if the instruction is a kill.
-          if (isKill) {
+          if (LV && isKill) {
             LiveVariables::VarInfo& vi = LV->getVarInfo(Reg);
             vi.removeKill(RMI);
           }
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 32c932552bed..458915ea5d93 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -45,24 +45,22 @@
 using namespace llvm;
 
 char PEI::ID = 0;
+char &llvm::PrologEpilogCodeInserterID = PEI::ID;
 
 INITIALIZE_PASS_BEGIN(PEI, "prologepilog",
                 "Prologue/Epilogue Insertion", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
 INITIALIZE_PASS_END(PEI, "prologepilog",
-                "Prologue/Epilogue Insertion", false, false)
+                    "Prologue/Epilogue Insertion & Frame Finalization",
+                    false, false)
 
 STATISTIC(NumVirtualFrameRegs, "Number of virtual frame regs encountered");
 STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
 STATISTIC(NumBytesStackSpace,
           "Number of bytes used for stack in all functions");
 
-/// createPrologEpilogCodeInserter - This function returns a pass that inserts
-/// prolog and epilog code, and eliminates abstract frame references.
-///
-FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); }
-
 /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
 /// frame indexes with appropriate references.
 ///
@@ -71,6 +69,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
   const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
   const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
 
+  assert(!Fn.getRegInfo().getNumVirtRegs() && "Regalloc must assign all vregs");
+
   RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL;
   FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
 
@@ -125,6 +125,9 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
   if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging)
     scavengeFrameVirtualRegs(Fn);
 
+  // Clear any vregs created by virtual scavenging.
+  Fn.getRegInfo().clearVirtRegs();
+
   delete RS;
   clearAllSets();
   return true;
@@ -207,7 +210,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
   MachineFrameInfo *MFI = Fn.getFrameInfo();
 
   // Get the callee saved register list...
-  const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn);
+  const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&Fn);
 
   // These are used to keep track the callee-save area. Initialize them.
   MinCSFrameIndex = INT_MAX;
@@ -224,17 +227,9 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
   std::vector<CalleeSavedInfo> CSI;
   for (unsigned i = 0; CSRegs[i]; ++i) {
     unsigned Reg = CSRegs[i];
-    if (Fn.getRegInfo().isPhysRegUsed(Reg)) {
+    if (Fn.getRegInfo().isPhysRegOrOverlapUsed(Reg)) {
       // If the reg is modified, save it!
       CSI.push_back(CalleeSavedInfo(Reg));
-    } else {
-      for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg);
-           *AliasSet; ++AliasSet) {  // Check alias registers too.
-        if (Fn.getRegInfo().isPhysRegUsed(*AliasSet)) {
-          CSI.push_back(CalleeSavedInfo(Reg));
-          break;
-        }
-      }
     }
   }
 
@@ -332,7 +327,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
       // Skip over all terminator instructions, which are part of the return
       // sequence.
       MachineBasicBlock::iterator I2 = I;
-      while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator())
+      while (I2 != MBB->begin() && (--I2)->isTerminator())
         I = I2;
 
       bool AtStart = I == MBB->begin();
@@ -426,11 +421,11 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
 
       // Skip over all terminator instructions, which are part of the
       // return sequence.
-      if (! I->getDesc().isTerminator()) {
+      if (! I->isTerminator()) {
         ++I;
       } else {
         MachineBasicBlock::iterator I2 = I;
-        while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator())
+        while (I2 != MBB->begin() && (--I2)->isTerminator())
           I = I2;
       }
     }
@@ -698,7 +693,7 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
   // Add epilogue to restore the callee-save registers in each exiting block
   for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
     // If last instruction is a return instruction, add an epilogue
-    if (!I->empty() && I->back().getDesc().isReturn())
+    if (!I->empty() && I->back().isReturn())
       TFI.emitEpilogue(Fn, *I);
   }
 
@@ -706,7 +701,7 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
   // we've been asked for it.  This, when linked with a runtime with support
   // for segmented stacks (libgcc is one), will result in allocating stack
   // space in small chunks instead of one large contiguous block.
-  if (EnableSegmentedStacks)
+  if (Fn.getTarget().Options.EnableSegmentedStacks)
     TFI.adjustForSegmentedStacks(Fn);
 }
 
@@ -813,6 +808,10 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
 /// scavengeFrameVirtualRegs - Replace all frame index virtual registers
 /// with physical registers. Use the register scavenger to find an
 /// appropriate register to use.
+///
+/// FIXME: Iterating over the instruction stream is unnecessary. We can simply
+/// iterate over the vreg use list, which at this point only contains machine
+/// operands for which eliminateFrameIndex need a new scratch reg.
 void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
   // Run through the instructions and find any virtual registers.
   for (MachineFunction::iterator BB = Fn.begin(),
diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h
index e2391591ad06..0d140a9bb481 100644
--- a/lib/CodeGen/PrologEpilogInserter.h
+++ b/lib/CodeGen/PrologEpilogInserter.h
@@ -40,10 +40,6 @@ namespace llvm {
       initializePEIPass(*PassRegistry::getPassRegistry());
     }
 
-    const char *getPassName() const {
-      return "Prolog/Epilog Insertion & Frame Finalization";
-    }
-
     virtual void getAnalysisUsage(AnalysisUsage &AU) const;
 
     /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp
index 73b66d868f3d..49599b3ab980 100644
--- a/lib/CodeGen/PseudoSourceValue.cpp
+++ b/lib/CodeGen/PseudoSourceValue.cpp
@@ -87,7 +87,6 @@ bool PseudoSourceValue::isConstant(const MachineFrameInfo *) const {
       this == getJumpTable())
     return true;
   llvm_unreachable("Unknown PseudoSourceValue!");
-  return false;
 }
 
 bool PseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const {
@@ -97,7 +96,6 @@ bool PseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const {
       this == getJumpTable())
     return false;
   llvm_unreachable("Unknown PseudoSourceValue!");
-  return true;
 }
 
 bool PseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const {
diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp
new file mode 100644
index 000000000000..b00eceb17f11
--- /dev/null
+++ b/lib/CodeGen/RegAllocBase.cpp
@@ -0,0 +1,280 @@
+//===-- RegAllocBase.cpp - Register Allocator Base Class ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RegAllocBase class which provides comon functionality
+// for LiveIntervalUnion-based register allocators.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "RegAllocBase.h"
+#include "Spiller.h"
+#include "VirtRegMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#ifndef NDEBUG
+#include "llvm/ADT/SparseBitVector.h"
+#endif
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Timer.h"
+
+using namespace llvm;
+
+STATISTIC(NumAssigned     , "Number of registers assigned");
+STATISTIC(NumUnassigned   , "Number of registers unassigned");
+STATISTIC(NumNewQueued    , "Number of new live ranges queued");
+
+// Temporary verification option until we can put verification inside
+// MachineVerifier.
+static cl::opt<bool, true>
+VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled),
+               cl::desc("Verify during register allocation"));
+
+const char *RegAllocBase::TimerGroupName = "Register Allocation";
+bool RegAllocBase::VerifyEnabled = false;
+
+#ifndef NDEBUG
+// Verify each LiveIntervalUnion.
+void RegAllocBase::verify() {
+  LiveVirtRegBitSet VisitedVRegs;
+  OwningArrayPtr<LiveVirtRegBitSet>
+    unionVRegs(new LiveVirtRegBitSet[PhysReg2LiveUnion.numRegs()]);
+
+  // Verify disjoint unions.
+  for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
+    DEBUG(PhysReg2LiveUnion[PhysReg].print(dbgs(), TRI));
+    LiveVirtRegBitSet &VRegs = unionVRegs[PhysReg];
+    PhysReg2LiveUnion[PhysReg].verify(VRegs);
+    // Union + intersection test could be done efficiently in one pass, but
+    // don't add a method to SparseBitVector unless we really need it.
+    assert(!VisitedVRegs.intersects(VRegs) && "vreg in multiple unions");
+    VisitedVRegs |= VRegs;
+  }
+
+  // Verify vreg coverage.
+  for (LiveIntervals::iterator liItr = LIS->begin(), liEnd = LIS->end();
+       liItr != liEnd; ++liItr) {
+    unsigned reg = liItr->first;
+    if (TargetRegisterInfo::isPhysicalRegister(reg)) continue;
+    if (!VRM->hasPhys(reg)) continue; // spilled?
+    unsigned PhysReg = VRM->getPhys(reg);
+    if (!unionVRegs[PhysReg].test(reg)) {
+      dbgs() << "LiveVirtReg " << reg << " not in union " <<
+        TRI->getName(PhysReg) << "\n";
+      llvm_unreachable("unallocated live vreg");
+    }
+  }
+  // FIXME: I'm not sure how to verify spilled intervals.
+}
+#endif //!NDEBUG
+
+//===----------------------------------------------------------------------===//
+//                         RegAllocBase Implementation
+//===----------------------------------------------------------------------===//
+
+// Instantiate a LiveIntervalUnion for each physical register.
+void RegAllocBase::LiveUnionArray::init(LiveIntervalUnion::Allocator &allocator,
+                                        unsigned NRegs) {
+  NumRegs = NRegs;
+  Array =
+    static_cast<LiveIntervalUnion*>(malloc(sizeof(LiveIntervalUnion)*NRegs));
+  for (unsigned r = 0; r != NRegs; ++r)
+    new(Array + r) LiveIntervalUnion(r, allocator);
+}
+
+void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis) {
+  NamedRegionTimer T("Initialize", TimerGroupName, TimePassesIsEnabled);
+  TRI = &vrm.getTargetRegInfo();
+  MRI = &vrm.getRegInfo();
+  VRM = &vrm;
+  LIS = &lis;
+  MRI->freezeReservedRegs(vrm.getMachineFunction());
+  RegClassInfo.runOnMachineFunction(vrm.getMachineFunction());
+
+  const unsigned NumRegs = TRI->getNumRegs();
+  if (NumRegs != PhysReg2LiveUnion.numRegs()) {
+    PhysReg2LiveUnion.init(UnionAllocator, NumRegs);
+    // Cache an interferece query for each physical reg
+    Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]);
+  }
+}
+
+void RegAllocBase::LiveUnionArray::clear() {
+  if (!Array)
+    return;
+  for (unsigned r = 0; r != NumRegs; ++r)
+    Array[r].~LiveIntervalUnion();
+  free(Array);
+  NumRegs =  0;
+  Array = 0;
+}
+
+void RegAllocBase::releaseMemory() {
+  for (unsigned r = 0, e = PhysReg2LiveUnion.numRegs(); r != e; ++r)
+    PhysReg2LiveUnion[r].clear();
+}
+
+// Visit all the live registers. If they are already assigned to a physical
+// register, unify them with the corresponding LiveIntervalUnion, otherwise push
+// them on the priority queue for later assignment.
+void RegAllocBase::seedLiveRegs() {
+  NamedRegionTimer T("Seed Live Regs", TimerGroupName, TimePassesIsEnabled);
+  for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); I != E; ++I) {
+    unsigned RegNum = I->first;
+    LiveInterval &VirtReg = *I->second;
+    if (TargetRegisterInfo::isPhysicalRegister(RegNum))
+      PhysReg2LiveUnion[RegNum].unify(VirtReg);
+    else
+      enqueue(&VirtReg);
+  }
+}
+
+void RegAllocBase::assign(LiveInterval &VirtReg, unsigned PhysReg) {
+  DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI)
+               << " to " << PrintReg(PhysReg, TRI) << '\n');
+  assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
+  VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
+  MRI->setPhysRegUsed(PhysReg);
+  PhysReg2LiveUnion[PhysReg].unify(VirtReg);
+  ++NumAssigned;
+}
+
+void RegAllocBase::unassign(LiveInterval &VirtReg, unsigned PhysReg) {
+  DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI)
+               << " from " << PrintReg(PhysReg, TRI) << '\n');
+  assert(VRM->getPhys(VirtReg.reg) == PhysReg && "Inconsistent unassign");
+  PhysReg2LiveUnion[PhysReg].extract(VirtReg);
+  VRM->clearVirt(VirtReg.reg);
+  ++NumUnassigned;
+}
+
+// Top-level driver to manage the queue of unassigned VirtRegs and call the
+// selectOrSplit implementation.
+void RegAllocBase::allocatePhysRegs() {
+  seedLiveRegs();
+
+  // Continue assigning vregs one at a time to available physical registers.
+  while (LiveInterval *VirtReg = dequeue()) {
+    assert(!VRM->hasPhys(VirtReg->reg) && "Register already assigned");
+
+    // Unused registers can appear when the spiller coalesces snippets.
+    if (MRI->reg_nodbg_empty(VirtReg->reg)) {
+      DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n');
+      LIS->removeInterval(VirtReg->reg);
+      continue;
+    }
+
+    // Invalidate all interference queries, live ranges could have changed.
+    invalidateVirtRegs();
+
+    // selectOrSplit requests the allocator to return an available physical
+    // register if possible and populate a list of new live intervals that
+    // result from splitting.
+    DEBUG(dbgs() << "\nselectOrSplit "
+                 << MRI->getRegClass(VirtReg->reg)->getName()
+                 << ':' << *VirtReg << '\n');
+    typedef SmallVector<LiveInterval*, 4> VirtRegVec;
+    VirtRegVec SplitVRegs;
+    unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs);
+
+    if (AvailablePhysReg == ~0u) {
+      // selectOrSplit failed to find a register!
+      const char *Msg = "ran out of registers during register allocation";
+      // Probably caused by an inline asm.
+      MachineInstr *MI;
+      for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(VirtReg->reg);
+           (MI = I.skipInstruction());)
+        if (MI->isInlineAsm())
+          break;
+      if (MI)
+        MI->emitError(Msg);
+      else
+        report_fatal_error(Msg);
+      // Keep going after reporting the error.
+      VRM->assignVirt2Phys(VirtReg->reg,
+                 RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front());
+      continue;
+    }
+
+    if (AvailablePhysReg)
+      assign(*VirtReg, AvailablePhysReg);
+
+    for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end();
+         I != E; ++I) {
+      LiveInterval *SplitVirtReg = *I;
+      assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned");
+      if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) {
+        DEBUG(dbgs() << "not queueing unused  " << *SplitVirtReg << '\n');
+        LIS->removeInterval(SplitVirtReg->reg);
+        continue;
+      }
+      DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n");
+      assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) &&
+             "expect split value in virtual register");
+      enqueue(SplitVirtReg);
+      ++NumNewQueued;
+    }
+  }
+}
+
+// Check if this live virtual register interferes with a physical register. If
+// not, then check for interference on each register that aliases with the
+// physical register. Return the interfering register.
+unsigned RegAllocBase::checkPhysRegInterference(LiveInterval &VirtReg,
+                                                unsigned PhysReg) {
+  for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI)
+    if (query(VirtReg, *AliasI).checkInterference())
+      return *AliasI;
+  return 0;
+}
+
+// Add newly allocated physical registers to the MBB live in sets.
+void RegAllocBase::addMBBLiveIns(MachineFunction *MF) {
+  NamedRegionTimer T("MBB Live Ins", TimerGroupName, TimePassesIsEnabled);
+  SlotIndexes *Indexes = LIS->getSlotIndexes();
+  if (MF->size() <= 1)
+    return;
+
+  LiveIntervalUnion::SegmentIter SI;
+  for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
+    LiveIntervalUnion &LiveUnion = PhysReg2LiveUnion[PhysReg];
+    if (LiveUnion.empty())
+      continue;
+    DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " live-in:");
+    MachineFunction::iterator MBB = llvm::next(MF->begin());
+    MachineFunction::iterator MFE = MF->end();
+    SlotIndex Start, Stop;
+    tie(Start, Stop) = Indexes->getMBBRange(MBB);
+    SI.setMap(LiveUnion.getMap());
+    SI.find(Start);
+    while (SI.valid()) {
+      if (SI.start() <= Start) {
+        if (!MBB->isLiveIn(PhysReg))
+          MBB->addLiveIn(PhysReg);
+        DEBUG(dbgs() << "\tBB#" << MBB->getNumber() << ':'
+                     << PrintReg(SI.value()->reg, TRI));
+      } else if (SI.start() > Stop)
+        MBB = Indexes->getMBBFromIndex(SI.start().getPrevIndex());
+      if (++MBB == MFE)
+        break;
+      tie(Start, Stop) = Indexes->getMBBRange(MBB);
+      SI.advanceTo(Start);
+    }
+    DEBUG(dbgs() << '\n');
+  }
+}
+
diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h
index 031642117efc..072fe2bdb656 100644
--- a/lib/CodeGen/RegAllocBase.h
+++ b/lib/CodeGen/RegAllocBase.h
@@ -49,11 +49,6 @@ class VirtRegMap;
 class LiveIntervals;
 class Spiller;
 
-// Forward declare a priority queue of live virtual registers. If an
-// implementation needs to prioritize by anything other than spill weight, then
-// this will become an abstract base class with virtual calls to push/get.
-class LiveVirtRegQueue;
-
 /// RegAllocBase provides the register allocation driver and interface that can
 /// be extended to add interesting heuristics.
 ///
@@ -67,7 +62,6 @@ class RegAllocBase {
   // registers may have changed.
   unsigned UserTag;
 
-protected:
   // Array of LiveIntervalUnions indexed by physical register.
   class LiveUnionArray {
     unsigned NumRegs;
@@ -88,17 +82,19 @@ protected:
     }
   };
 
-  const TargetRegisterInfo *TRI;
-  MachineRegisterInfo *MRI;
-  VirtRegMap *VRM;
-  LiveIntervals *LIS;
-  RegisterClassInfo RegClassInfo;
   LiveUnionArray PhysReg2LiveUnion;
 
   // Current queries, one per physreg. They must be reinitialized each time we
   // query on a new live virtual register.
   OwningArrayPtr<LiveIntervalUnion::Query> Queries;
 
+protected:
+  const TargetRegisterInfo *TRI;
+  MachineRegisterInfo *MRI;
+  VirtRegMap *VRM;
+  LiveIntervals *LIS;
+  RegisterClassInfo RegClassInfo;
+
   RegAllocBase(): UserTag(0), TRI(0), MRI(0), VRM(0), LIS(0) {}
 
   virtual ~RegAllocBase() {}
@@ -115,16 +111,17 @@ protected:
     return Queries[PhysReg];
   }
 
+  // Get direct access to the underlying LiveIntervalUnion for PhysReg.
+  LiveIntervalUnion &getLiveUnion(unsigned PhysReg) {
+    return PhysReg2LiveUnion[PhysReg];
+  }
+
   // Invalidate all cached information about virtual registers - live ranges may
   // have changed.
   void invalidateVirtRegs() { ++UserTag; }
 
   // The top-level driver. The output is a VirtRegMap that us updated with
   // physical register assignments.
-  //
-  // If an implementation wants to override the LiveInterval comparator, we
-  // should modify this interface to allow passing in an instance derived from
-  // LiveVirtRegQueue.
   void allocatePhysRegs();
 
   // Get a temporary reference to a Spiller instance.
@@ -160,12 +157,6 @@ protected:
   /// allocation is making progress.
   void unassign(LiveInterval &VirtReg, unsigned PhysReg);
 
-  // Helper for spilling all live virtual registers currently unified under preg
-  // that interfere with the most recently queried lvr.  Return true if spilling
-  // was successful, and append any new spilled/split intervals to splitLVRs.
-  bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
-                          SmallVectorImpl<LiveInterval*> &SplitVRegs);
-
   /// addMBBLiveIns - Add physreg liveins to basic blocks.
   void addMBBLiveIns(MachineFunction *);
 
@@ -183,9 +174,6 @@ public:
 
 private:
   void seedLiveRegs();
-
-  void spillReg(LiveInterval &VirtReg, unsigned PhysReg,
-                SmallVectorImpl<LiveInterval*> &SplitVRegs);
 };
 
 } // end namespace llvm
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index 5496d69fd3df..77ee3148f31a 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -1,4 +1,4 @@
-//===-- RegAllocBasic.cpp - basic register allocator ----------------------===//
+//===-- RegAllocBasic.cpp - Basic Register Allocator ----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,18 +15,15 @@
 #define DEBUG_TYPE "regalloc"
 #include "RegAllocBase.h"
 #include "LiveDebugVariables.h"
-#include "LiveIntervalUnion.h"
-#include "LiveRangeEdit.h"
 #include "RenderMachineFunction.h"
 #include "Spiller.h"
 #include "VirtRegMap.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Function.h"
 #include "llvm/PassAnalysisSupport.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -37,35 +34,17 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#ifndef NDEBUG
-#include "llvm/ADT/SparseBitVector.h"
-#endif
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Timer.h"
 
 #include <cstdlib>
 #include <queue>
 
 using namespace llvm;
 
-STATISTIC(NumAssigned     , "Number of registers assigned");
-STATISTIC(NumUnassigned   , "Number of registers unassigned");
-STATISTIC(NumNewQueued    , "Number of new live ranges queued");
-
 static RegisterRegAlloc basicRegAlloc("basic", "basic register allocator",
                                       createBasicRegisterAllocator);
 
-// Temporary verification option until we can put verification inside
-// MachineVerifier.
-static cl::opt<bool, true>
-VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled),
-               cl::desc("Verify during register allocation"));
-
-const char *RegAllocBase::TimerGroupName = "Register Allocation";
-bool RegAllocBase::VerifyEnabled = false;
-
 namespace {
   struct CompSpillWeight {
     bool operator()(LiveInterval *A, LiveInterval *B) const {
@@ -93,6 +72,11 @@ class RABasic : public MachineFunctionPass, public RegAllocBase
   std::auto_ptr<Spiller> SpillerInstance;
   std::priority_queue<LiveInterval*, std::vector<LiveInterval*>,
                       CompSpillWeight> Queue;
+
+  // Scratch space.  Allocated here to avoid repeated malloc calls in
+  // selectOrSplit().
+  BitVector UsableRegs;
+
 public:
   RABasic();
 
@@ -128,6 +112,15 @@ public:
   /// Perform register allocation.
   virtual bool runOnMachineFunction(MachineFunction &mf);
 
+  // Helper for spilling all live virtual registers currently unified under preg
+  // that interfere with the most recently queried lvr.  Return true if spilling
+  // was successful, and append any new spilled/split intervals to splitLVRs.
+  bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
+                          SmallVectorImpl<LiveInterval*> &SplitVRegs);
+
+  void spillReg(LiveInterval &VirtReg, unsigned PhysReg,
+                SmallVectorImpl<LiveInterval*> &SplitVRegs);
+
   static char ID;
 };
 
@@ -139,8 +132,8 @@ RABasic::RABasic(): MachineFunctionPass(ID) {
   initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
   initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
   initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
-  initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
   initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
+  initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
   initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
   initializeLiveStacksPass(*PassRegistry::getPassRegistry());
   initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
@@ -157,9 +150,6 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<SlotIndexes>();
   AU.addRequired<LiveDebugVariables>();
   AU.addPreserved<LiveDebugVariables>();
-  if (StrongPHIElim)
-    AU.addRequiredID(StrongPHIEliminationID);
-  AU.addRequiredTransitiveID(RegisterCoalescerPassID);
   AU.addRequired<CalculateSpillWeights>();
   AU.addRequired<LiveStacks>();
   AU.addPreserved<LiveStacks>();
@@ -178,204 +168,10 @@ void RABasic::releaseMemory() {
   RegAllocBase::releaseMemory();
 }
 
-#ifndef NDEBUG
-// Verify each LiveIntervalUnion.
-void RegAllocBase::verify() {
-  LiveVirtRegBitSet VisitedVRegs;
-  OwningArrayPtr<LiveVirtRegBitSet>
-    unionVRegs(new LiveVirtRegBitSet[PhysReg2LiveUnion.numRegs()]);
-
-  // Verify disjoint unions.
-  for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
-    DEBUG(PhysReg2LiveUnion[PhysReg].print(dbgs(), TRI));
-    LiveVirtRegBitSet &VRegs = unionVRegs[PhysReg];
-    PhysReg2LiveUnion[PhysReg].verify(VRegs);
-    // Union + intersection test could be done efficiently in one pass, but
-    // don't add a method to SparseBitVector unless we really need it.
-    assert(!VisitedVRegs.intersects(VRegs) && "vreg in multiple unions");
-    VisitedVRegs |= VRegs;
-  }
-
-  // Verify vreg coverage.
-  for (LiveIntervals::iterator liItr = LIS->begin(), liEnd = LIS->end();
-       liItr != liEnd; ++liItr) {
-    unsigned reg = liItr->first;
-    if (TargetRegisterInfo::isPhysicalRegister(reg)) continue;
-    if (!VRM->hasPhys(reg)) continue; // spilled?
-    unsigned PhysReg = VRM->getPhys(reg);
-    if (!unionVRegs[PhysReg].test(reg)) {
-      dbgs() << "LiveVirtReg " << reg << " not in union " <<
-        TRI->getName(PhysReg) << "\n";
-      llvm_unreachable("unallocated live vreg");
-    }
-  }
-  // FIXME: I'm not sure how to verify spilled intervals.
-}
-#endif //!NDEBUG
-
-//===----------------------------------------------------------------------===//
-//                         RegAllocBase Implementation
-//===----------------------------------------------------------------------===//
-
-// Instantiate a LiveIntervalUnion for each physical register.
-void RegAllocBase::LiveUnionArray::init(LiveIntervalUnion::Allocator &allocator,
-                                        unsigned NRegs) {
-  NumRegs = NRegs;
-  Array =
-    static_cast<LiveIntervalUnion*>(malloc(sizeof(LiveIntervalUnion)*NRegs));
-  for (unsigned r = 0; r != NRegs; ++r)
-    new(Array + r) LiveIntervalUnion(r, allocator);
-}
-
-void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis) {
-  NamedRegionTimer T("Initialize", TimerGroupName, TimePassesIsEnabled);
-  TRI = &vrm.getTargetRegInfo();
-  MRI = &vrm.getRegInfo();
-  VRM = &vrm;
-  LIS = &lis;
-  RegClassInfo.runOnMachineFunction(vrm.getMachineFunction());
-
-  const unsigned NumRegs = TRI->getNumRegs();
-  if (NumRegs != PhysReg2LiveUnion.numRegs()) {
-    PhysReg2LiveUnion.init(UnionAllocator, NumRegs);
-    // Cache an interferece query for each physical reg
-    Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]);
-  }
-}
-
-void RegAllocBase::LiveUnionArray::clear() {
-  if (!Array)
-    return;
-  for (unsigned r = 0; r != NumRegs; ++r)
-    Array[r].~LiveIntervalUnion();
-  free(Array);
-  NumRegs =  0;
-  Array = 0;
-}
-
-void RegAllocBase::releaseMemory() {
-  for (unsigned r = 0, e = PhysReg2LiveUnion.numRegs(); r != e; ++r)
-    PhysReg2LiveUnion[r].clear();
-}
-
-// Visit all the live registers. If they are already assigned to a physical
-// register, unify them with the corresponding LiveIntervalUnion, otherwise push
-// them on the priority queue for later assignment.
-void RegAllocBase::seedLiveRegs() {
-  NamedRegionTimer T("Seed Live Regs", TimerGroupName, TimePassesIsEnabled);
-  for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); I != E; ++I) {
-    unsigned RegNum = I->first;
-    LiveInterval &VirtReg = *I->second;
-    if (TargetRegisterInfo::isPhysicalRegister(RegNum))
-      PhysReg2LiveUnion[RegNum].unify(VirtReg);
-    else
-      enqueue(&VirtReg);
-  }
-}
-
-void RegAllocBase::assign(LiveInterval &VirtReg, unsigned PhysReg) {
-  DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI)
-               << " to " << PrintReg(PhysReg, TRI) << '\n');
-  assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
-  VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
-  MRI->setPhysRegUsed(PhysReg);
-  PhysReg2LiveUnion[PhysReg].unify(VirtReg);
-  ++NumAssigned;
-}
-
-void RegAllocBase::unassign(LiveInterval &VirtReg, unsigned PhysReg) {
-  DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI)
-               << " from " << PrintReg(PhysReg, TRI) << '\n');
-  assert(VRM->getPhys(VirtReg.reg) == PhysReg && "Inconsistent unassign");
-  PhysReg2LiveUnion[PhysReg].extract(VirtReg);
-  VRM->clearVirt(VirtReg.reg);
-  ++NumUnassigned;
-}
-
-// Top-level driver to manage the queue of unassigned VirtRegs and call the
-// selectOrSplit implementation.
-void RegAllocBase::allocatePhysRegs() {
-  seedLiveRegs();
-
-  // Continue assigning vregs one at a time to available physical registers.
-  while (LiveInterval *VirtReg = dequeue()) {
-    assert(!VRM->hasPhys(VirtReg->reg) && "Register already assigned");
-
-    // Unused registers can appear when the spiller coalesces snippets.
-    if (MRI->reg_nodbg_empty(VirtReg->reg)) {
-      DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n');
-      LIS->removeInterval(VirtReg->reg);
-      continue;
-    }
-
-    // Invalidate all interference queries, live ranges could have changed.
-    invalidateVirtRegs();
-
-    // selectOrSplit requests the allocator to return an available physical
-    // register if possible and populate a list of new live intervals that
-    // result from splitting.
-    DEBUG(dbgs() << "\nselectOrSplit "
-                 << MRI->getRegClass(VirtReg->reg)->getName()
-                 << ':' << *VirtReg << '\n');
-    typedef SmallVector<LiveInterval*, 4> VirtRegVec;
-    VirtRegVec SplitVRegs;
-    unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs);
-
-    if (AvailablePhysReg == ~0u) {
-      // selectOrSplit failed to find a register!
-      const char *Msg = "ran out of registers during register allocation";
-      // Probably caused by an inline asm.
-      MachineInstr *MI;
-      for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(VirtReg->reg);
-           (MI = I.skipInstruction());)
-        if (MI->isInlineAsm())
-          break;
-      if (MI)
-        MI->emitError(Msg);
-      else
-        report_fatal_error(Msg);
-      // Keep going after reporting the error.
-      VRM->assignVirt2Phys(VirtReg->reg,
-                 RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front());
-      continue;
-    }
-
-    if (AvailablePhysReg)
-      assign(*VirtReg, AvailablePhysReg);
-
-    for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end();
-         I != E; ++I) {
-      LiveInterval *SplitVirtReg = *I;
-      assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned");
-      if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) {
-        DEBUG(dbgs() << "not queueing unused  " << *SplitVirtReg << '\n');
-        LIS->removeInterval(SplitVirtReg->reg);
-        continue;
-      }
-      DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n");
-      assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) &&
-             "expect split value in virtual register");
-      enqueue(SplitVirtReg);
-      ++NumNewQueued;
-    }
-  }
-}
-
-// Check if this live virtual register interferes with a physical register. If
-// not, then check for interference on each register that aliases with the
-// physical register. Return the interfering register.
-unsigned RegAllocBase::checkPhysRegInterference(LiveInterval &VirtReg,
-                                                unsigned PhysReg) {
-  for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI)
-    if (query(VirtReg, *AliasI).checkInterference())
-      return *AliasI;
-  return 0;
-}
-
-// Helper for spillInteferences() that spills all interfering vregs currently
+// Helper for spillInterferences() that spills all interfering vregs currently
 // assigned to this physical register.
-void RegAllocBase::spillReg(LiveInterval& VirtReg, unsigned PhysReg,
-                            SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+void RABasic::spillReg(LiveInterval& VirtReg, unsigned PhysReg,
+                       SmallVectorImpl<LiveInterval*> &SplitVRegs) {
   LiveIntervalUnion::Query &Q = query(VirtReg, PhysReg);
   assert(Q.seenAllInterferences() && "need collectInterferences()");
   const SmallVectorImpl<LiveInterval*> &PendingSpills = Q.interferingVRegs();
@@ -391,7 +187,7 @@ void RegAllocBase::spillReg(LiveInterval& VirtReg, unsigned PhysReg,
     unassign(SpilledVReg, PhysReg);
 
     // Spill the extracted interval.
-    LiveRangeEdit LRE(SpilledVReg, SplitVRegs, 0, &PendingSpills);
+    LiveRangeEdit LRE(SpilledVReg, SplitVRegs, *MF, *LIS, VRM);
     spiller().spill(LRE);
   }
   // After extracting segments, the query's results are invalid. But keep the
@@ -402,14 +198,13 @@ void RegAllocBase::spillReg(LiveInterval& VirtReg, unsigned PhysReg,
 // Spill or split all live virtual registers currently unified under PhysReg
 // that interfere with VirtReg. The newly spilled or split live intervals are
 // returned by appending them to SplitVRegs.
-bool
-RegAllocBase::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
+bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
                                  SmallVectorImpl<LiveInterval*> &SplitVRegs) {
   // Record each interference and determine if all are spillable before mutating
   // either the union or live intervals.
   unsigned NumInterferences = 0;
   // Collect interferences assigned to any alias of the physical register.
-  for (const unsigned *asI = TRI->getOverlaps(PhysReg); *asI; ++asI) {
+  for (const uint16_t *asI = TRI->getOverlaps(PhysReg); *asI; ++asI) {
     LiveIntervalUnion::Query &QAlias = query(VirtReg, *asI);
     NumInterferences += QAlias.collectInterferingVRegs();
     if (QAlias.seenUnspillableVReg()) {
@@ -421,52 +216,11 @@ RegAllocBase::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
   assert(NumInterferences > 0 && "expect interference");
 
   // Spill each interfering vreg allocated to PhysReg or an alias.
-  for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI)
+  for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI)
     spillReg(VirtReg, *AliasI, SplitVRegs);
   return true;
 }
 
-// Add newly allocated physical registers to the MBB live in sets.
-void RegAllocBase::addMBBLiveIns(MachineFunction *MF) {
-  NamedRegionTimer T("MBB Live Ins", TimerGroupName, TimePassesIsEnabled);
-  SlotIndexes *Indexes = LIS->getSlotIndexes();
-  if (MF->size() <= 1)
-    return;
-
-  LiveIntervalUnion::SegmentIter SI;
-  for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
-    LiveIntervalUnion &LiveUnion = PhysReg2LiveUnion[PhysReg];
-    if (LiveUnion.empty())
-      continue;
-    DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " live-in:");
-    MachineFunction::iterator MBB = llvm::next(MF->begin());
-    MachineFunction::iterator MFE = MF->end();
-    SlotIndex Start, Stop;
-    tie(Start, Stop) = Indexes->getMBBRange(MBB);
-    SI.setMap(LiveUnion.getMap());
-    SI.find(Start);
-    while (SI.valid()) {
-      if (SI.start() <= Start) {
-        if (!MBB->isLiveIn(PhysReg))
-          MBB->addLiveIn(PhysReg);
-        DEBUG(dbgs() << "\tBB#" << MBB->getNumber() << ':'
-                     << PrintReg(SI.value()->reg, TRI));
-      } else if (SI.start() > Stop)
-        MBB = Indexes->getMBBFromIndex(SI.start().getPrevIndex());
-      if (++MBB == MFE)
-        break;
-      tie(Start, Stop) = Indexes->getMBBRange(MBB);
-      SI.advanceTo(Start);
-    }
-    DEBUG(dbgs() << '\n');
-  }
-}
-
-
-//===----------------------------------------------------------------------===//
-//                         RABasic Implementation
-//===----------------------------------------------------------------------===//
-
 // Driver for the register assignment and splitting heuristics.
 // Manages iteration over the LiveIntervalUnions.
 //
@@ -481,6 +235,10 @@ void RegAllocBase::addMBBLiveIns(MachineFunction *MF) {
 // selectOrSplit().
 unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
                                 SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+  // Check for register mask interference.  When live ranges cross calls, the
+  // set of usable registers is reduced to the callee-saved ones.
+  bool CrossRegMasks = LIS->checkRegMaskInterference(VirtReg, UsableRegs);
+
   // Populate a list of physical register spill candidates.
   SmallVector<unsigned, 8> PhysRegSpillCands;
 
@@ -491,6 +249,11 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
        ++I) {
     unsigned PhysReg = *I;
 
+    // If PhysReg is clobbered by a register mask, it isn't useful for
+    // allocation or spilling.
+    if (CrossRegMasks && !UsableRegs.test(PhysReg))
+      continue;
+
     // Check interference and as a side effect, intialize queries for this
     // VirtReg and its aliases.
     unsigned interfReg = checkPhysRegInterference(VirtReg, PhysReg);
@@ -498,9 +261,9 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
       // Found an available register.
       return PhysReg;
     }
-    Queries[interfReg].collectInterferingVRegs(1);
-    LiveInterval *interferingVirtReg =
-      Queries[interfReg].interferingVRegs().front();
+    LiveIntervalUnion::Query &IntfQ = query(VirtReg, interfReg);
+    IntfQ.collectInterferingVRegs(1);
+    LiveInterval *interferingVirtReg = IntfQ.interferingVRegs().front();
 
     // The current VirtReg must either be spillable, or one of its interferences
     // must have less spill weight.
@@ -524,7 +287,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
   DEBUG(dbgs() << "spilling: " << VirtReg << '\n');
   if (!VirtReg.isSpillable())
     return ~0u;
-  LiveRangeEdit LRE(VirtReg, SplitVRegs);
+  LiveRangeEdit LRE(VirtReg, SplitVRegs, *MF, *LIS, VRM);
   spiller().spill(LRE);
 
   // The live virtual register requesting allocation was spilled, so tell
@@ -579,7 +342,10 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) {
   // Write out new DBG_VALUE instructions.
   getAnalysis<LiveDebugVariables>().emitDebugValues(VRM);
 
-  // The pass output is in VirtRegMap. Release all the transient data.
+  // All machine operands and other references to virtual registers have been
+  // replaced. Remove the virtual registers and release all the transient data.
+  VRM->clearAllVirt();
+  MRI->clearVirtRegs();
   releaseMemory();
 
   return true;
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index b36a445291b7..e09b7f8d26be 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -32,6 +32,7 @@
 #include "llvm/ADT/IndexedMap.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SparseSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
 #include <algorithm>
@@ -49,10 +50,7 @@ namespace {
   public:
     static char ID;
     RAFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1),
-               isBulkSpilling(false) {
-      initializePHIEliminationPass(*PassRegistry::getPassRegistry());
-      initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
-    }
+               isBulkSpilling(false) {}
   private:
     const TargetMachine *TM;
     MachineFunction *MF;
@@ -71,16 +69,20 @@ namespace {
     // Everything we know about a live virtual register.
     struct LiveReg {
       MachineInstr *LastUse;    // Last instr to use reg.
+      unsigned VirtReg;         // Virtual register number.
       unsigned PhysReg;         // Currently held here.
       unsigned short LastOpNum; // OpNum on LastUse.
       bool Dirty;               // Register needs spill.
 
-      LiveReg(unsigned p=0) : LastUse(0), PhysReg(p), LastOpNum(0),
-                              Dirty(false) {}
+      explicit LiveReg(unsigned v)
+        : LastUse(0), VirtReg(v), PhysReg(0), LastOpNum(0), Dirty(false) {}
+
+      unsigned getSparseSetKey() const {
+        return TargetRegisterInfo::virtReg2Index(VirtReg);
+      }
     };
 
-    typedef DenseMap<unsigned, LiveReg> LiveRegMap;
-    typedef LiveRegMap::value_type LiveRegEntry;
+    typedef SparseSet<LiveReg> LiveRegMap;
 
     // LiveVirtRegs - This map contains entries for each virtual register
     // that is currently available in a physical register.
@@ -137,8 +139,6 @@ namespace {
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
-      AU.addRequiredID(PHIEliminationID);
-      AU.addRequiredID(TwoAddressInstructionPassID);
       MachineFunctionPass::getAnalysisUsage(AU);
     }
 
@@ -159,14 +159,23 @@ namespace {
     void usePhysReg(MachineOperand&);
     void definePhysReg(MachineInstr *MI, unsigned PhysReg, RegState NewState);
     unsigned calcSpillCost(unsigned PhysReg) const;
-    void assignVirtToPhysReg(LiveRegEntry &LRE, unsigned PhysReg);
-    void allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint);
+    void assignVirtToPhysReg(LiveReg&, unsigned PhysReg);
+    LiveRegMap::iterator findLiveVirtReg(unsigned VirtReg) {
+      return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg));
+    }
+    LiveRegMap::const_iterator findLiveVirtReg(unsigned VirtReg) const {
+      return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg));
+    }
+    LiveRegMap::iterator assignVirtToPhysReg(unsigned VReg, unsigned PhysReg);
+    LiveRegMap::iterator allocVirtReg(MachineInstr *MI, LiveRegMap::iterator,
+                                      unsigned Hint);
     LiveRegMap::iterator defineVirtReg(MachineInstr *MI, unsigned OpNum,
                                        unsigned VirtReg, unsigned Hint);
     LiveRegMap::iterator reloadVirtReg(MachineInstr *MI, unsigned OpNum,
                                        unsigned VirtReg, unsigned Hint);
     void spillAll(MachineInstr *MI);
     bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg);
+    void addRetOperands(MachineBasicBlock *MBB);
   };
   char RAFast::ID = 0;
 }
@@ -222,10 +231,10 @@ void RAFast::addKillFlag(const LiveReg &LR) {
 
 /// killVirtReg - Mark virtreg as no longer available.
 void RAFast::killVirtReg(LiveRegMap::iterator LRI) {
-  addKillFlag(LRI->second);
-  const LiveReg &LR = LRI->second;
-  assert(PhysRegState[LR.PhysReg] == LRI->first && "Broken RegState mapping");
-  PhysRegState[LR.PhysReg] = regFree;
+  addKillFlag(*LRI);
+  assert(PhysRegState[LRI->PhysReg] == LRI->VirtReg &&
+         "Broken RegState mapping");
+  PhysRegState[LRI->PhysReg] = regFree;
   // Erase from LiveVirtRegs unless we're spilling in bulk.
   if (!isBulkSpilling)
     LiveVirtRegs.erase(LRI);
@@ -235,7 +244,7 @@ void RAFast::killVirtReg(LiveRegMap::iterator LRI) {
 void RAFast::killVirtReg(unsigned VirtReg) {
   assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
          "killVirtReg needs a virtual register");
-  LiveRegMap::iterator LRI = LiveVirtRegs.find(VirtReg);
+  LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
   if (LRI != LiveVirtRegs.end())
     killVirtReg(LRI);
 }
@@ -245,7 +254,7 @@ void RAFast::killVirtReg(unsigned VirtReg) {
 void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) {
   assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
          "Spilling a physical register is illegal!");
-  LiveRegMap::iterator LRI = LiveVirtRegs.find(VirtReg);
+  LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
   assert(LRI != LiveVirtRegs.end() && "Spilling unmapped virtual register");
   spillVirtReg(MI, LRI);
 }
@@ -253,18 +262,18 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) {
 /// spillVirtReg - Do the actual work of spilling.
 void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
                           LiveRegMap::iterator LRI) {
-  LiveReg &LR = LRI->second;
-  assert(PhysRegState[LR.PhysReg] == LRI->first && "Broken RegState mapping");
+  LiveReg &LR = *LRI;
+  assert(PhysRegState[LR.PhysReg] == LRI->VirtReg && "Broken RegState mapping");
 
   if (LR.Dirty) {
     // If this physreg is used by the instruction, we want to kill it on the
     // instruction, not on the spill.
     bool SpillKill = LR.LastUse != MI;
     LR.Dirty = false;
-    DEBUG(dbgs() << "Spilling " << PrintReg(LRI->first, TRI)
+    DEBUG(dbgs() << "Spilling " << PrintReg(LRI->VirtReg, TRI)
                  << " in " << PrintReg(LR.PhysReg, TRI));
-    const TargetRegisterClass *RC = MRI->getRegClass(LRI->first);
-    int FI = getStackSpaceFor(LRI->first, RC);
+    const TargetRegisterClass *RC = MRI->getRegClass(LRI->VirtReg);
+    int FI = getStackSpaceFor(LRI->VirtReg, RC);
     DEBUG(dbgs() << " to stack slot #" << FI << "\n");
     TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, RC, TRI);
     ++NumStores;   // Update statistics
@@ -272,7 +281,8 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
     // If this register is used by DBG_VALUE then insert new DBG_VALUE to
     // identify spilled location as the place to find corresponding variable's
     // value.
-    SmallVector<MachineInstr *, 4> &LRIDbgValues = LiveDbgValueMap[LRI->first];
+    SmallVector<MachineInstr *, 4> &LRIDbgValues =
+      LiveDbgValueMap[LRI->VirtReg];
     for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) {
       MachineInstr *DBG = LRIDbgValues[li];
       const MDNode *MDPtr =
@@ -295,8 +305,9 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
         DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV);
       }
     }
-    // Now this register is spilled there is should not be any DBG_VALUE pointing
-    // to this register because they are all pointing to spilled value now.
+    // Now this register is spilled there is should not be any DBG_VALUE
+    // pointing to this register because they are all pointing to spilled value
+    // now.
     LRIDbgValues.clear();
     if (SpillKill)
       LR.LastUse = 0; // Don't kill register again
@@ -343,7 +354,7 @@ void RAFast::usePhysReg(MachineOperand &MO) {
   }
 
   // Maybe a superregister is reserved?
-  for (const unsigned *AS = TRI->getAliasSet(PhysReg);
+  for (const uint16_t *AS = TRI->getAliasSet(PhysReg);
        unsigned Alias = *AS; ++AS) {
     switch (PhysRegState[Alias]) {
     case regDisabled:
@@ -397,7 +408,7 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg,
 
   // This is a disabled register, disable all aliases.
   PhysRegState[PhysReg] = NewState;
-  for (const unsigned *AS = TRI->getAliasSet(PhysReg);
+  for (const uint16_t *AS = TRI->getAliasSet(PhysReg);
        unsigned Alias = *AS; ++AS) {
     switch (unsigned VirtReg = PhysRegState[Alias]) {
     case regDisabled:
@@ -435,14 +446,17 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
     DEBUG(dbgs() << PrintReg(VirtReg, TRI) << " corresponding "
                  << PrintReg(PhysReg, TRI) << " is reserved already.\n");
     return spillImpossible;
-  default:
-    return LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean;
+  default: {
+    LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg);
+    assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
+    return I->Dirty ? spillDirty : spillClean;
+  }
   }
 
   // This is a disabled register, add up cost of aliases.
   DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is disabled.\n");
   unsigned Cost = 0;
-  for (const unsigned *AS = TRI->getAliasSet(PhysReg);
+  for (const uint16_t *AS = TRI->getAliasSet(PhysReg);
        unsigned Alias = *AS; ++AS) {
     if (UsedInInstr.test(Alias))
       return spillImpossible;
@@ -454,10 +468,13 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
       break;
     case regReserved:
       return spillImpossible;
-    default:
-      Cost += LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean;
+    default: {
+      LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg);
+      assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
+      Cost += I->Dirty ? spillDirty : spillClean;
       break;
     }
+    }
   }
   return Cost;
 }
@@ -467,17 +484,27 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
 /// that PhysReg is the proper container for VirtReg now.  The physical
 /// register must not be used for anything else when this is called.
 ///
-void RAFast::assignVirtToPhysReg(LiveRegEntry &LRE, unsigned PhysReg) {
-  DEBUG(dbgs() << "Assigning " << PrintReg(LRE.first, TRI) << " to "
+void RAFast::assignVirtToPhysReg(LiveReg &LR, unsigned PhysReg) {
+  DEBUG(dbgs() << "Assigning " << PrintReg(LR.VirtReg, TRI) << " to "
                << PrintReg(PhysReg, TRI) << "\n");
-  PhysRegState[PhysReg] = LRE.first;
-  assert(!LRE.second.PhysReg && "Already assigned a physreg");
-  LRE.second.PhysReg = PhysReg;
+  PhysRegState[PhysReg] = LR.VirtReg;
+  assert(!LR.PhysReg && "Already assigned a physreg");
+  LR.PhysReg = PhysReg;
+}
+
+RAFast::LiveRegMap::iterator
+RAFast::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) {
+  LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
+  assert(LRI != LiveVirtRegs.end() && "VirtReg disappeared");
+  assignVirtToPhysReg(*LRI, PhysReg);
+  return LRI;
 }
 
 /// allocVirtReg - Allocate a physical register for VirtReg.
-void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
-  const unsigned VirtReg = LRE.first;
+RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI,
+                                                  LiveRegMap::iterator LRI,
+                                                  unsigned Hint) {
+  const unsigned VirtReg = LRI->VirtReg;
 
   assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
          "Can only allocate virtual registers");
@@ -496,7 +523,9 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
     if (Cost < spillDirty) {
       if (Cost)
         definePhysReg(MI, Hint, regFree);
-      return assignVirtToPhysReg(LRE, Hint);
+      // definePhysReg may kill virtual registers and modify LiveVirtRegs.
+      // That invalidates LRI, so run a new lookup for VirtReg.
+      return assignVirtToPhysReg(VirtReg, Hint);
     }
   }
 
@@ -505,8 +534,10 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
   // First try to find a completely free register.
   for (ArrayRef<unsigned>::iterator I = AO.begin(), E = AO.end(); I != E; ++I) {
     unsigned PhysReg = *I;
-    if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg))
-      return assignVirtToPhysReg(LRE, PhysReg);
+    if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg)) {
+      assignVirtToPhysReg(*LRI, PhysReg);
+      return LRI;
+    }
   }
 
   DEBUG(dbgs() << "Allocating " << PrintReg(VirtReg) << " from "
@@ -519,21 +550,25 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
     DEBUG(dbgs() << "\tCost: " << Cost << "\n");
     DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n");
     // Cost is 0 when all aliases are already disabled.
-    if (Cost == 0)
-      return assignVirtToPhysReg(LRE, *I);
+    if (Cost == 0) {
+      assignVirtToPhysReg(*LRI, *I);
+      return LRI;
+    }
     if (Cost < BestCost)
       BestReg = *I, BestCost = Cost;
   }
 
   if (BestReg) {
     definePhysReg(MI, BestReg, regFree);
-    return assignVirtToPhysReg(LRE, BestReg);
+    // definePhysReg may kill virtual registers and modify LiveVirtRegs.
+    // That invalidates LRI, so run a new lookup for VirtReg.
+    return assignVirtToPhysReg(VirtReg, BestReg);
   }
 
   // Nothing we can do. Report an error and keep going with a bad allocation.
   MI->emitError("ran out of registers during register allocation");
   definePhysReg(MI, *AO.begin(), regFree);
-  assignVirtToPhysReg(LRE, *AO.begin());
+  return assignVirtToPhysReg(VirtReg, *AO.begin());
 }
 
 /// defineVirtReg - Allocate a register for VirtReg and mark it as dirty.
@@ -544,8 +579,7 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum,
          "Not a virtual register");
   LiveRegMap::iterator LRI;
   bool New;
-  tie(LRI, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg()));
-  LiveReg &LR = LRI->second;
+  tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
   if (New) {
     // If there is no hint, peek at the only use of this register.
     if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) &&
@@ -555,18 +589,18 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum,
       if (UseMI.isCopyLike())
         Hint = UseMI.getOperand(0).getReg();
     }
-    allocVirtReg(MI, *LRI, Hint);
-  } else if (LR.LastUse) {
+    LRI = allocVirtReg(MI, LRI, Hint);
+  } else if (LRI->LastUse) {
     // Redefining a live register - kill at the last use, unless it is this
     // instruction defining VirtReg multiple times.
-    if (LR.LastUse != MI || LR.LastUse->getOperand(LR.LastOpNum).isUse())
-      addKillFlag(LR);
+    if (LRI->LastUse != MI || LRI->LastUse->getOperand(LRI->LastOpNum).isUse())
+      addKillFlag(*LRI);
   }
-  assert(LR.PhysReg && "Register not assigned");
-  LR.LastUse = MI;
-  LR.LastOpNum = OpNum;
-  LR.Dirty = true;
-  UsedInInstr.set(LR.PhysReg);
+  assert(LRI->PhysReg && "Register not assigned");
+  LRI->LastUse = MI;
+  LRI->LastOpNum = OpNum;
+  LRI->Dirty = true;
+  UsedInInstr.set(LRI->PhysReg);
   return LRI;
 }
 
@@ -578,18 +612,17 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum,
          "Not a virtual register");
   LiveRegMap::iterator LRI;
   bool New;
-  tie(LRI, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg()));
-  LiveReg &LR = LRI->second;
+  tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
   MachineOperand &MO = MI->getOperand(OpNum);
   if (New) {
-    allocVirtReg(MI, *LRI, Hint);
+    LRI = allocVirtReg(MI, LRI, Hint);
     const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
     int FrameIndex = getStackSpaceFor(VirtReg, RC);
     DEBUG(dbgs() << "Reloading " << PrintReg(VirtReg, TRI) << " into "
-                 << PrintReg(LR.PhysReg, TRI) << "\n");
-    TII->loadRegFromStackSlot(*MBB, MI, LR.PhysReg, FrameIndex, RC, TRI);
+                 << PrintReg(LRI->PhysReg, TRI) << "\n");
+    TII->loadRegFromStackSlot(*MBB, MI, LRI->PhysReg, FrameIndex, RC, TRI);
     ++NumLoads;
-  } else if (LR.Dirty) {
+  } else if (LRI->Dirty) {
     if (isLastUseOfLocalReg(MO)) {
       DEBUG(dbgs() << "Killing last use: " << MO << "\n");
       if (MO.isUse())
@@ -614,10 +647,10 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum,
     DEBUG(dbgs() << "Clearing clean dead: " << MO << "\n");
     MO.setIsDead(false);
   }
-  assert(LR.PhysReg && "Register not assigned");
-  LR.LastUse = MI;
-  LR.LastOpNum = OpNum;
-  UsedInInstr.set(LR.PhysReg);
+  assert(LRI->PhysReg && "Register not assigned");
+  LRI->LastUse = MI;
+  LRI->LastOpNum = OpNum;
+  UsedInInstr.set(LRI->PhysReg);
   return LRI;
 }
 
@@ -674,7 +707,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
     UsedInInstr.set(Reg);
     if (ThroughRegs.count(PhysRegState[Reg]))
       definePhysReg(MI, Reg, regFree);
-    for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+    for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
       UsedInInstr.set(*AS);
       if (ThroughRegs.count(PhysRegState[*AS]))
         definePhysReg(MI, *AS, regFree);
@@ -682,7 +715,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
   }
 
   SmallVector<unsigned, 8> PartialDefs;
-  DEBUG(dbgs() << "Allocating tied uses and early clobbers.\n");
+  DEBUG(dbgs() << "Allocating tied uses.\n");
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     MachineOperand &MO = MI->getOperand(i);
     if (!MO.isReg()) continue;
@@ -694,7 +727,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
       DEBUG(dbgs() << "Operand " << i << "("<< MO << ") is tied to operand "
         << DefIdx << ".\n");
       LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0);
-      unsigned PhysReg = LRI->second.PhysReg;
+      unsigned PhysReg = LRI->PhysReg;
       setPhysReg(MI, i, PhysReg);
       // Note: we don't update the def operand yet. That would cause the normal
       // def-scan to attempt spilling.
@@ -703,16 +736,25 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
       // Reload the register, but don't assign to the operand just yet.
       // That would confuse the later phys-def processing pass.
       LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0);
-      PartialDefs.push_back(LRI->second.PhysReg);
-    } else if (MO.isEarlyClobber()) {
-      // Note: defineVirtReg may invalidate MO.
-      LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0);
-      unsigned PhysReg = LRI->second.PhysReg;
-      if (setPhysReg(MI, i, PhysReg))
-        VirtDead.push_back(Reg);
+      PartialDefs.push_back(LRI->PhysReg);
     }
   }
 
+  DEBUG(dbgs() << "Allocating early clobbers.\n");
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg()) continue;
+    unsigned Reg = MO.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
+    if (!MO.isEarlyClobber())
+      continue;
+    // Note: defineVirtReg may invalidate MO.
+    LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0);
+    unsigned PhysReg = LRI->PhysReg;
+    if (setPhysReg(MI, i, PhysReg))
+      VirtDead.push_back(Reg);
+  }
+
   // Restore UsedInInstr to a state usable for allocating normal virtual uses.
   UsedInInstr.reset();
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -730,32 +772,66 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
     UsedInInstr.set(PartialDefs[i]);
 }
 
-void RAFast::AllocateBasicBlock() {
-  DEBUG(dbgs() << "\nAllocating " << *MBB);
+/// addRetOperand - ensure that a return instruction has an operand for each
+/// value live out of the function.
+///
+/// Things marked both call and return are tail calls; do not do this for them.
+/// The tail callee need not take the same registers as input that it produces
+/// as output, and there are dependencies for its input registers elsewhere.
+///
+/// FIXME: This should be done as part of instruction selection, and this helper
+/// should be deleted. Until then, we use custom logic here to create the proper
+/// operand under all circumstances. We can't use addRegisterKilled because that
+/// doesn't make sense for undefined values. We can't simply avoid calling it
+/// for undefined values, because we must ensure that the operand always exists.
+void RAFast::addRetOperands(MachineBasicBlock *MBB) {
+  if (MBB->empty() || !MBB->back().isReturn() || MBB->back().isCall())
+    return;
+
+  MachineInstr *MI = &MBB->back();
+
+  for (MachineRegisterInfo::liveout_iterator
+         I = MBB->getParent()->getRegInfo().liveout_begin(),
+         E = MBB->getParent()->getRegInfo().liveout_end(); I != E; ++I) {
+    unsigned Reg = *I;
+    assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+           "Cannot have a live-out virtual register.");
+
+    bool hasDef = PhysRegState[Reg] == regReserved;
+
+    // Check if this register already has an operand.
+    bool Found = false;
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isUse())
+        continue;
+
+      unsigned OperReg = MO.getReg();
+      if (!TargetRegisterInfo::isPhysicalRegister(OperReg))
+        continue;
 
-  // FIXME: This should probably be added by instruction selection instead?
-  // If the last instruction in the block is a return, make sure to mark it as
-  // using all of the live-out values in the function.  Things marked both call
-  // and return are tail calls; do not do this for them.  The tail callee need
-  // not take the same registers as input that it produces as output, and there
-  // are dependencies for its input registers elsewhere.
-  if (!MBB->empty() && MBB->back().getDesc().isReturn() &&
-      !MBB->back().getDesc().isCall()) {
-    MachineInstr *Ret = &MBB->back();
-
-    for (MachineRegisterInfo::liveout_iterator
-         I = MF->getRegInfo().liveout_begin(),
-         E = MF->getRegInfo().liveout_end(); I != E; ++I) {
-      assert(TargetRegisterInfo::isPhysicalRegister(*I) &&
-             "Cannot have a live-out virtual register.");
-
-      // Add live-out registers as implicit uses.
-      Ret->addRegisterKilled(*I, TRI, true);
+      if (OperReg == Reg || TRI->isSuperRegister(OperReg, Reg)) {
+        // If the ret already has an operand for this physreg or a superset,
+        // don't duplicate it. Set the kill flag if the value is defined.
+        if (hasDef && !MO.isKill())
+          MO.setIsKill();
+        Found = true;
+        break;
+      }
     }
+    if (!Found)
+      MI->addOperand(MachineOperand::CreateReg(Reg,
+                                               false /*IsDef*/,
+                                               true  /*IsImp*/,
+                                               hasDef/*IsKill*/));
   }
+}
+
+void RAFast::AllocateBasicBlock() {
+  DEBUG(dbgs() << "\nAllocating " << *MBB);
 
   PhysRegState.assign(TRI->getNumRegs(), regDisabled);
-  assert(LiveVirtRegs.empty() && "Mapping not cleared form last block?");
+  assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?");
 
   MachineBasicBlock::iterator MII = MBB->begin();
 
@@ -783,25 +859,26 @@ void RAFast::AllocateBasicBlock() {
           case regReserved:
             dbgs() << "*";
             break;
-          default:
+          default: {
             dbgs() << '=' << PrintReg(PhysRegState[Reg]);
-            if (LiveVirtRegs[PhysRegState[Reg]].Dirty)
+            LiveRegMap::iterator I = findLiveVirtReg(PhysRegState[Reg]);
+            assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
+            if (I->Dirty)
               dbgs() << "*";
-            assert(LiveVirtRegs[PhysRegState[Reg]].PhysReg == Reg &&
-                   "Bad inverse map");
+            assert(I->PhysReg == Reg && "Bad inverse map");
             break;
           }
+          }
         }
         dbgs() << '\n';
         // Check that LiveVirtRegs is the inverse.
         for (LiveRegMap::iterator i = LiveVirtRegs.begin(),
              e = LiveVirtRegs.end(); i != e; ++i) {
-           assert(TargetRegisterInfo::isVirtualRegister(i->first) &&
+           assert(TargetRegisterInfo::isVirtualRegister(i->VirtReg) &&
                   "Bad map key");
-           assert(TargetRegisterInfo::isPhysicalRegister(i->second.PhysReg) &&
+           assert(TargetRegisterInfo::isPhysicalRegister(i->PhysReg) &&
                   "Bad map value");
-           assert(PhysRegState[i->second.PhysReg] == i->first &&
-                  "Bad inverse map");
+           assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map");
         }
       });
 
@@ -815,10 +892,9 @@ void RAFast::AllocateBasicBlock() {
           if (!MO.isReg()) continue;
           unsigned Reg = MO.getReg();
           if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
-          LiveDbgValueMap[Reg].push_back(MI);
-          LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg);
+          LiveRegMap::iterator LRI = findLiveVirtReg(Reg);
           if (LRI != LiveVirtRegs.end())
-            setPhysReg(MI, i, LRI->second.PhysReg);
+            setPhysReg(MI, i, LRI->PhysReg);
           else {
             int SS = StackSlotForVirtReg[Reg];
             if (SS == -1) {
@@ -849,6 +925,7 @@ void RAFast::AllocateBasicBlock() {
               }
             }
           }
+          LiveDbgValueMap[Reg].push_back(MI);
         }
       }
       // Next instruction.
@@ -932,7 +1009,7 @@ void RAFast::AllocateBasicBlock() {
       if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
       if (MO.isUse()) {
         LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, CopyDst);
-        unsigned PhysReg = LRI->second.PhysReg;
+        unsigned PhysReg = LRI->PhysReg;
         CopySrc = (CopySrc == Reg || CopySrc == PhysReg) ? PhysReg : 0;
         if (setPhysReg(MI, i, PhysReg))
           killVirtReg(LRI);
@@ -953,13 +1030,13 @@ void RAFast::AllocateBasicBlock() {
         // Look for physreg defs and tied uses.
         if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue;
         UsedInInstr.set(Reg);
-        for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+        for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS)
           UsedInInstr.set(*AS);
       }
     }
 
     unsigned DefOpEnd = MI->getNumOperands();
-    if (MCID.isCall()) {
+    if (MI->isCall()) {
       // Spill all virtregs before a call. This serves two purposes: 1. If an
       // exception is thrown, the landing pad is going to expect to find
       // registers in their spill slots, and 2. we don't have to wade through
@@ -988,7 +1065,7 @@ void RAFast::AllocateBasicBlock() {
         continue;
       }
       LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, CopySrc);
-      unsigned PhysReg = LRI->second.PhysReg;
+      unsigned PhysReg = LRI->PhysReg;
       if (setPhysReg(MI, i, PhysReg)) {
         VirtDead.push_back(Reg);
         CopyDst = 0; // cancel coalescing;
@@ -1024,6 +1101,9 @@ void RAFast::AllocateBasicBlock() {
     MBB->erase(Coalesced[i]);
   NumCopies += Coalesced.size();
 
+  // addRetOperands must run after we've seen all defs in this block.
+  addRetOperands(MBB);
+
   DEBUG(MBB->dump());
 }
 
@@ -1038,12 +1118,16 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
   TM = &Fn.getTarget();
   TRI = TM->getRegisterInfo();
   TII = TM->getInstrInfo();
+  MRI->freezeReservedRegs(Fn);
   RegClassInfo.runOnMachineFunction(Fn);
   UsedInInstr.resize(TRI->getNumRegs());
 
+  assert(!MRI->isSSA() && "regalloc requires leaving SSA");
+
   // initialize the virtual->physical register map to have a 'null'
   // mapping for all virtual registers
   StackSlotForVirtReg.resize(MRI->getNumVirtRegs());
+  LiveVirtRegs.setUniverse(MRI->getNumVirtRegs());
 
   // Loop over all of the basic blocks, eliminating virtual register references
   for (MachineFunction::iterator MBBi = Fn.begin(), MBBe = Fn.end();
@@ -1052,16 +1136,17 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
     AllocateBasicBlock();
   }
 
-  // Make sure the set of used physregs is closed under subreg operations.
-  MRI->closePhysRegsUsed(*TRI);
-
   // Add the clobber lists for all the instructions we skipped earlier.
   for (SmallPtrSet<const MCInstrDesc*, 4>::const_iterator
        I = SkippedInstrs.begin(), E = SkippedInstrs.end(); I != E; ++I)
-    if (const unsigned *Defs = (*I)->getImplicitDefs())
+    if (const uint16_t *Defs = (*I)->getImplicitDefs())
       while (*Defs)
         MRI->setPhysRegUsed(*Defs++);
 
+  // All machine operands and other references to virtual registers have been
+  // replaced. Remove the virtual registers.
+  MRI->clearVirtRegs();
+
   SkippedInstrs.clear();
   StackSlotForVirtReg.clear();
   LiveDbgValueMap.clear();
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index f54a2c85d100..3f2a617100c3 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -16,7 +16,6 @@
 #include "AllocationOrder.h"
 #include "InterferenceCache.h"
 #include "LiveDebugVariables.h"
-#include "LiveRangeEdit.h"
 #include "RegAllocBase.h"
 #include "Spiller.h"
 #include "SpillPlacement.h"
@@ -29,6 +28,7 @@
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/EdgeBundles.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -168,6 +168,19 @@ class RAGreedy : public MachineFunctionPass,
     }
   };
 
+  // Register mask interference. The current VirtReg is checked for register
+  // mask interference on entry to selectOrSplit().  If there is no
+  // interference, UsableRegs is left empty.  If there is interference,
+  // UsableRegs has a bit mask of registers that can be used without register
+  // mask interference.
+  BitVector UsableRegs;
+
+  /// clobberedByRegMask - Returns true if PhysReg is not directly usable
+  /// because of register mask clobbers.
+  bool clobberedByRegMask(unsigned PhysReg) const {
+    return !UsableRegs.empty() && !UsableRegs.test(PhysReg);
+  }
+
   // splitting state.
   std::auto_ptr<SplitAnalysis> SA;
   std::auto_ptr<SplitEditor> SE;
@@ -248,7 +261,6 @@ public:
   static char ID;
 
 private:
-  void LRE_WillEraseInstruction(MachineInstr*);
   bool LRE_CanEraseVirtReg(unsigned);
   void LRE_WillShrinkVirtReg(unsigned);
   void LRE_DidCloneVirtReg(unsigned, unsigned);
@@ -308,8 +320,8 @@ RAGreedy::RAGreedy(): MachineFunctionPass(ID) {
   initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
   initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
   initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
-  initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
   initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
+  initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
   initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
   initializeLiveStacksPass(*PassRegistry::getPassRegistry());
   initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
@@ -328,9 +340,6 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<SlotIndexes>();
   AU.addRequired<LiveDebugVariables>();
   AU.addPreserved<LiveDebugVariables>();
-  if (StrongPHIElim)
-    AU.addRequiredID(StrongPHIEliminationID);
-  AU.addRequiredTransitiveID(RegisterCoalescerPassID);
   AU.addRequired<CalculateSpillWeights>();
   AU.addRequired<LiveStacks>();
   AU.addPreserved<LiveStacks>();
@@ -350,11 +359,6 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
 //                     LiveRangeEdit delegate methods
 //===----------------------------------------------------------------------===//
 
-void RAGreedy::LRE_WillEraseInstruction(MachineInstr *MI) {
-  // LRE itself will remove from SlotIndexes and parent basic block.
-  VRM->RemoveMachineInstrFromMaps(MI);
-}
-
 bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) {
   if (unsigned PhysReg = VRM->getPhys(VirtReg)) {
     unassign(LIS->getInterval(VirtReg), PhysReg);
@@ -424,13 +428,13 @@ void RAGreedy::enqueue(LiveInterval *LI) {
       Prio |= (1u << 30);
   }
 
-  Queue.push(std::make_pair(Prio, Reg));
+  Queue.push(std::make_pair(Prio, ~Reg));
 }
 
 LiveInterval *RAGreedy::dequeue() {
   if (Queue.empty())
     return 0;
-  LiveInterval *LI = &LIS->getInterval(Queue.top().second);
+  LiveInterval *LI = &LIS->getInterval(~Queue.top().second);
   Queue.pop();
   return LI;
 }
@@ -446,9 +450,12 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
                              SmallVectorImpl<LiveInterval*> &NewVRegs) {
   Order.rewind();
   unsigned PhysReg;
-  while ((PhysReg = Order.next()))
+  while ((PhysReg = Order.next())) {
+    if (clobberedByRegMask(PhysReg))
+      continue;
     if (!checkPhysRegInterference(VirtReg, PhysReg))
       break;
+  }
   if (!PhysReg || Order.isHint(PhysReg))
     return PhysReg;
 
@@ -457,7 +464,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
   // If we missed a simple hint, try to cheaply evict interference from the
   // preferred register.
   if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg))
-    if (Order.isHint(Hint)) {
+    if (Order.isHint(Hint) && !clobberedByRegMask(Hint)) {
       DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n');
       EvictionCost MaxCost(1);
       if (canEvictInterference(VirtReg, Hint, true, MaxCost)) {
@@ -532,7 +539,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
     Cascade = NextCascade;
 
   EvictionCost Cost;
-  for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
+  for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
     LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
     // If there is 10 or more interferences, chances are one is heavier.
     if (Q.collectInterferingVRegs(10) >= 10)
@@ -590,7 +597,7 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg,
 
   DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI)
                << " interference: Cascade " << Cascade << '\n');
-  for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
+  for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
     LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
     assert(Q.seenAllInterferences() && "Didn't check all interfererences.");
     for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) {
@@ -629,6 +636,8 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
 
   Order.rewind();
   while (unsigned PhysReg = Order.next()) {
+    if (clobberedByRegMask(PhysReg))
+      continue;
     if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit)
       continue;
     // The first use of a callee-saved register in a function has cost 1.
@@ -1118,6 +1127,8 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
       }
       --NumCands;
       GlobalCand[Worst] = GlobalCand[NumCands];
+      if (BestCand == NumCands)
+        BestCand = Worst;
     }
 
     if (GlobalCand.size() <= NumCands)
@@ -1172,7 +1183,7 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
     return 0;
 
   // Prepare split editor.
-  LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
+  LiveRangeEdit LREdit(VirtReg, NewVRegs, *MF, *LIS, VRM, this);
   SE->reset(LREdit, SplitSpillMode);
 
   // Assign all edge bundles to the preferred candidate, or NoCand.
@@ -1220,7 +1231,7 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
   assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
   unsigned Reg = VirtReg.reg;
   bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
-  LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
+  LiveRangeEdit LREdit(VirtReg, NewVRegs, *MF, *LIS, VRM, this);
   SE->reset(LREdit, SplitSpillMode);
   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
   for (unsigned i = 0; i != UseBlocks.size(); ++i) {
@@ -1268,7 +1279,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg,
                               SmallVectorImpl<float> &GapWeight) {
   assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
   const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
-  const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
+  ArrayRef<SlotIndex> Uses = SA->getUseSlots();
   const unsigned NumGaps = Uses.size()-1;
 
   // Start and end points for the interference check.
@@ -1280,7 +1291,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg,
   GapWeight.assign(NumGaps, 0.0f);
 
   // Add interference from each overlapping register.
-  for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) {
+  for (const uint16_t *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) {
     if (!query(const_cast<LiveInterval&>(SA->getParent()), *AI)
            .checkInterference())
       continue;
@@ -1292,7 +1303,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg,
     // surrounding the instruction. The exception is interference before
     // StartIdx and after StopIdx.
     //
-    LiveIntervalUnion::SegmentIter IntI = PhysReg2LiveUnion[*AI].find(StartIdx);
+    LiveIntervalUnion::SegmentIter IntI = getLiveUnion(*AI).find(StartIdx);
     for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
       // Skip the gaps before IntI.
       while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
@@ -1329,7 +1340,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
   // that the interval is continuous from FirstInstr to LastInstr. We should
   // make sure that we don't do anything illegal to such an interval, though.
 
-  const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
+  ArrayRef<SlotIndex> Uses = SA->getUseSlots();
   if (Uses.size() <= 2)
     return 0;
   const unsigned NumGaps = Uses.size()-1;
@@ -1337,10 +1348,40 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
   DEBUG({
     dbgs() << "tryLocalSplit: ";
     for (unsigned i = 0, e = Uses.size(); i != e; ++i)
-      dbgs() << ' ' << SA->UseSlots[i];
+      dbgs() << ' ' << Uses[i];
     dbgs() << '\n';
   });
 
+  // If VirtReg is live across any register mask operands, compute a list of
+  // gaps with register masks.
+  SmallVector<unsigned, 8> RegMaskGaps;
+  if (!UsableRegs.empty()) {
+    // Get regmask slots for the whole block.
+    ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber());
+    DEBUG(dbgs() << RMS.size() << " regmasks in block:");
+    // Constrain to VirtReg's live range.
+    unsigned ri = std::lower_bound(RMS.begin(), RMS.end(),
+                                   Uses.front().getRegSlot()) - RMS.begin();
+    unsigned re = RMS.size();
+    for (unsigned i = 0; i != NumGaps && ri != re; ++i) {
+      // Look for Uses[i] <= RMS <= Uses[i+1].
+      assert(!SlotIndex::isEarlierInstr(RMS[ri], Uses[i]));
+      if (SlotIndex::isEarlierInstr(Uses[i+1], RMS[ri]))
+        continue;
+      // Skip a regmask on the same instruction as the last use. It doesn't
+      // overlap the live range.
+      if (SlotIndex::isSameInstr(Uses[i+1], RMS[ri]) && i+1 == NumGaps)
+        break;
+      DEBUG(dbgs() << ' ' << RMS[ri] << ':' << Uses[i] << '-' << Uses[i+1]);
+      RegMaskGaps.push_back(i);
+      // Advance ri to the next gap. A regmask on one of the uses counts in
+      // both gaps.
+      while (ri != re && SlotIndex::isEarlierInstr(RMS[ri], Uses[i+1]))
+        ++ri;
+    }
+    DEBUG(dbgs() << '\n');
+  }
+
   // Since we allow local split results to be split again, there is a risk of
   // creating infinite loops. It is tempting to require that the new live
   // ranges have less instructions than the original. That would guarantee
@@ -1375,6 +1416,11 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
     // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1].
     calcGapWeights(PhysReg, GapWeight);
 
+    // Remove any gaps with regmask clobbers.
+    if (clobberedByRegMask(PhysReg))
+      for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i)
+        GapWeight[RegMaskGaps[i]] = HUGE_VALF;
+
     // Try to find the best sequence of gaps to close.
     // The new spill weight must be larger than any gap interference.
 
@@ -1466,7 +1512,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
                << '-' << Uses[BestAfter] << ", " << BestDiff
                << ", " << (BestAfter - BestBefore + 1) << " instrs\n");
 
-  LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
+  LiveRangeEdit LREdit(VirtReg, NewVRegs, *MF, *LIS, VRM, this);
   SE->reset(LREdit);
 
   SE->openIntv();
@@ -1553,6 +1599,11 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
 
 unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
                                  SmallVectorImpl<LiveInterval*> &NewVRegs) {
+  // Check if VirtReg is live across any calls.
+  UsableRegs.clear();
+  if (LIS->checkRegMaskInterference(VirtReg, UsableRegs))
+    DEBUG(dbgs() << "Live across regmasks.\n");
+
   // First try assigning a free register.
   AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo);
   if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs))
@@ -1593,7 +1644,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
 
   // Finally spill VirtReg itself.
   NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
-  LiveRangeEdit LRE(VirtReg, NewVRegs, this);
+  LiveRangeEdit LRE(VirtReg, NewVRegs, *MF, *LIS, VRM, this);
   spiller().spill(LRE);
   setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
 
@@ -1628,7 +1679,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
   ExtraRegInfo.clear();
   ExtraRegInfo.resize(MRI->getNumVirtRegs());
   NextCascade = 1;
-  IntfCache.init(MF, &PhysReg2LiveUnion[0], Indexes, TRI);
+  IntfCache.init(MF, &getLiveUnion(0), Indexes, LIS, TRI);
   GlobalCand.resize(32);  // This will grow as needed.
 
   allocatePhysRegs();
@@ -1647,7 +1698,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
     DebugVars->emitDebugValues(VRM);
   }
 
-  // The pass output is in VirtRegMap. Release all the transient data.
+  // All machine operands and other references to virtual registers have been
+  // replaced. Remove the virtual registers and release all the transient data.
+  VRM->clearAllVirt();
+  MRI->clearVirtRegs();
   releaseMemory();
 
   return true;
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
deleted file mode 100644
index ce3fb90b1126..000000000000
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ /dev/null
@@ -1,1543 +0,0 @@
-//===-- RegAllocLinearScan.cpp - Linear Scan register allocator -----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a linear scan register allocator.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "regalloc"
-#include "LiveDebugVariables.h"
-#include "LiveRangeEdit.h"
-#include "VirtRegMap.h"
-#include "VirtRegRewriter.h"
-#include "RegisterClassInfo.h"
-#include "Spiller.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/CalcSpillWeights.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/RegAllocRegistry.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/ADT/EquivalenceClasses.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <queue>
-#include <memory>
-#include <cmath>
-
-using namespace llvm;
-
-STATISTIC(NumIters     , "Number of iterations performed");
-STATISTIC(NumBacktracks, "Number of times we had to backtrack");
-STATISTIC(NumCoalesce,   "Number of copies coalesced");
-STATISTIC(NumDowngrade,  "Number of registers downgraded");
-
-static cl::opt<bool>
-NewHeuristic("new-spilling-heuristic",
-             cl::desc("Use new spilling heuristic"),
-             cl::init(false), cl::Hidden);
-
-static cl::opt<bool>
-TrivCoalesceEnds("trivial-coalesce-ends",
-                  cl::desc("Attempt trivial coalescing of interval ends"),
-                  cl::init(false), cl::Hidden);
-
-static cl::opt<bool>
-AvoidWAWHazard("avoid-waw-hazard",
-               cl::desc("Avoid write-write hazards for some register classes"),
-               cl::init(false), cl::Hidden);
-
-static RegisterRegAlloc
-linearscanRegAlloc("linearscan", "linear scan register allocator",
-                   createLinearScanRegisterAllocator);
-
-namespace {
-  // When we allocate a register, add it to a fixed-size queue of
-  // registers to skip in subsequent allocations. This trades a small
-  // amount of register pressure and increased spills for flexibility in
-  // the post-pass scheduler.
-  //
-  // Note that in a the number of registers used for reloading spills
-  // will be one greater than the value of this option.
-  //
-  // One big limitation of this is that it doesn't differentiate between
-  // different register classes. So on x86-64, if there is xmm register
-  // pressure, it can caused fewer GPRs to be held in the queue.
-  static cl::opt<unsigned>
-  NumRecentlyUsedRegs("linearscan-skip-count",
-                      cl::desc("Number of registers for linearscan to remember"
-                               "to skip."),
-                      cl::init(0),
-                      cl::Hidden);
-
-  struct RALinScan : public MachineFunctionPass {
-    static char ID;
-    RALinScan() : MachineFunctionPass(ID) {
-      initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
-      initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
-      initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
-      initializeRegisterCoalescerPass(
-        *PassRegistry::getPassRegistry());
-      initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
-      initializeLiveStacksPass(*PassRegistry::getPassRegistry());
-      initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
-      initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
-      initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
-      initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
-      
-      // Initialize the queue to record recently-used registers.
-      if (NumRecentlyUsedRegs > 0)
-        RecentRegs.resize(NumRecentlyUsedRegs, 0);
-      RecentNext = RecentRegs.begin();
-      avoidWAW_ = 0;
-    }
-
-    typedef std::pair<LiveInterval*, LiveInterval::iterator> IntervalPtr;
-    typedef SmallVector<IntervalPtr, 32> IntervalPtrs;
-  private:
-    /// RelatedRegClasses - This structure is built the first time a function is
-    /// compiled, and keeps track of which register classes have registers that
-    /// belong to multiple classes or have aliases that are in other classes.
-    EquivalenceClasses<const TargetRegisterClass*> RelatedRegClasses;
-    DenseMap<unsigned, const TargetRegisterClass*> OneClassForEachPhysReg;
-
-    // NextReloadMap - For each register in the map, it maps to the another
-    // register which is defined by a reload from the same stack slot and
-    // both reloads are in the same basic block.
-    DenseMap<unsigned, unsigned> NextReloadMap;
-
-    // DowngradedRegs - A set of registers which are being "downgraded", i.e.
-    // un-favored for allocation.
-    SmallSet<unsigned, 8> DowngradedRegs;
-
-    // DowngradeMap - A map from virtual registers to physical registers being
-    // downgraded for the virtual registers.
-    DenseMap<unsigned, unsigned> DowngradeMap;
-
-    MachineFunction* mf_;
-    MachineRegisterInfo* mri_;
-    const TargetMachine* tm_;
-    const TargetRegisterInfo* tri_;
-    const TargetInstrInfo* tii_;
-    BitVector allocatableRegs_;
-    BitVector reservedRegs_;
-    LiveIntervals* li_;
-    MachineLoopInfo *loopInfo;
-    RegisterClassInfo RegClassInfo;
-
-    /// handled_ - Intervals are added to the handled_ set in the order of their
-    /// start value.  This is uses for backtracking.
-    std::vector<LiveInterval*> handled_;
-
-    /// fixed_ - Intervals that correspond to machine registers.
-    ///
-    IntervalPtrs fixed_;
-
-    /// active_ - Intervals that are currently being processed, and which have a
-    /// live range active for the current point.
-    IntervalPtrs active_;
-
-    /// inactive_ - Intervals that are currently being processed, but which have
-    /// a hold at the current point.
-    IntervalPtrs inactive_;
-
-    typedef std::priority_queue<LiveInterval*,
-                                SmallVector<LiveInterval*, 64>,
-                                greater_ptr<LiveInterval> > IntervalHeap;
-    IntervalHeap unhandled_;
-
-    /// regUse_ - Tracks register usage.
-    SmallVector<unsigned, 32> regUse_;
-    SmallVector<unsigned, 32> regUseBackUp_;
-
-    /// vrm_ - Tracks register assignments.
-    VirtRegMap* vrm_;
-
-    std::auto_ptr<VirtRegRewriter> rewriter_;
-
-    std::auto_ptr<Spiller> spiller_;
-
-    // The queue of recently-used registers.
-    SmallVector<unsigned, 4> RecentRegs;
-    SmallVector<unsigned, 4>::iterator RecentNext;
-
-    // Last write-after-write register written.
-    unsigned avoidWAW_;
-
-    // Record that we just picked this register.
-    void recordRecentlyUsed(unsigned reg) {
-      assert(reg != 0 && "Recently used register is NOREG!");
-      if (!RecentRegs.empty()) {
-        *RecentNext++ = reg;
-        if (RecentNext == RecentRegs.end())
-          RecentNext = RecentRegs.begin();
-      }
-    }
-
-  public:
-    virtual const char* getPassName() const {
-      return "Linear Scan Register Allocator";
-    }
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesCFG();
-      AU.addRequired<AliasAnalysis>();
-      AU.addPreserved<AliasAnalysis>();
-      AU.addRequired<LiveIntervals>();
-      AU.addPreserved<SlotIndexes>();
-      if (StrongPHIElim)
-        AU.addRequiredID(StrongPHIEliminationID);
-      // Make sure PassManager knows which analyses to make available
-      // to coalescing and which analyses coalescing invalidates.
-      AU.addRequiredTransitiveID(RegisterCoalescerPassID);
-      AU.addRequired<CalculateSpillWeights>();
-      AU.addRequiredID(LiveStacksID);
-      AU.addPreservedID(LiveStacksID);
-      AU.addRequired<MachineLoopInfo>();
-      AU.addPreserved<MachineLoopInfo>();
-      AU.addRequired<VirtRegMap>();
-      AU.addPreserved<VirtRegMap>();
-      AU.addRequired<LiveDebugVariables>();
-      AU.addPreserved<LiveDebugVariables>();
-      AU.addRequiredID(MachineDominatorsID);
-      AU.addPreservedID(MachineDominatorsID);
-      MachineFunctionPass::getAnalysisUsage(AU);
-    }
-
-    /// runOnMachineFunction - register allocate the whole function
-    bool runOnMachineFunction(MachineFunction&);
-
-    // Determine if we skip this register due to its being recently used.
-    bool isRecentlyUsed(unsigned reg) const {
-      return reg == avoidWAW_ ||
-       std::find(RecentRegs.begin(), RecentRegs.end(), reg) != RecentRegs.end();
-    }
-
-  private:
-    /// linearScan - the linear scan algorithm
-    void linearScan();
-
-    /// initIntervalSets - initialize the interval sets.
-    ///
-    void initIntervalSets();
-
-    /// processActiveIntervals - expire old intervals and move non-overlapping
-    /// ones to the inactive list.
-    void processActiveIntervals(SlotIndex CurPoint);
-
-    /// processInactiveIntervals - expire old intervals and move overlapping
-    /// ones to the active list.
-    void processInactiveIntervals(SlotIndex CurPoint);
-
-    /// hasNextReloadInterval - Return the next liveinterval that's being
-    /// defined by a reload from the same SS as the specified one.
-    LiveInterval *hasNextReloadInterval(LiveInterval *cur);
-
-    /// DowngradeRegister - Downgrade a register for allocation.
-    void DowngradeRegister(LiveInterval *li, unsigned Reg);
-
-    /// UpgradeRegister - Upgrade a register for allocation.
-    void UpgradeRegister(unsigned Reg);
-
-    /// assignRegOrStackSlotAtInterval - assign a register if one
-    /// is available, or spill.
-    void assignRegOrStackSlotAtInterval(LiveInterval* cur);
-
-    void updateSpillWeights(std::vector<float> &Weights,
-                            unsigned reg, float weight,
-                            const TargetRegisterClass *RC);
-
-    /// findIntervalsToSpill - Determine the intervals to spill for the
-    /// specified interval. It's passed the physical registers whose spill
-    /// weight is the lowest among all the registers whose live intervals
-    /// conflict with the interval.
-    void findIntervalsToSpill(LiveInterval *cur,
-                            std::vector<std::pair<unsigned,float> > &Candidates,
-                            unsigned NumCands,
-                            SmallVector<LiveInterval*, 8> &SpillIntervals);
-
-    /// attemptTrivialCoalescing - If a simple interval is defined by a copy,
-    /// try to allocate the definition to the same register as the source,
-    /// if the register is not defined during the life time of the interval.
-    /// This eliminates a copy, and is used to coalesce copies which were not
-    /// coalesced away before allocation either due to dest and src being in
-    /// different register classes or because the coalescer was overly
-    /// conservative.
-    unsigned attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg);
-
-    ///
-    /// Register usage / availability tracking helpers.
-    ///
-
-    void initRegUses() {
-      regUse_.resize(tri_->getNumRegs(), 0);
-      regUseBackUp_.resize(tri_->getNumRegs(), 0);
-    }
-
-    void finalizeRegUses() {
-#ifndef NDEBUG
-      // Verify all the registers are "freed".
-      bool Error = false;
-      for (unsigned i = 0, e = tri_->getNumRegs(); i != e; ++i) {
-        if (regUse_[i] != 0) {
-          dbgs() << tri_->getName(i) << " is still in use!\n";
-          Error = true;
-        }
-      }
-      if (Error)
-        llvm_unreachable(0);
-#endif
-      regUse_.clear();
-      regUseBackUp_.clear();
-    }
-
-    void addRegUse(unsigned physReg) {
-      assert(TargetRegisterInfo::isPhysicalRegister(physReg) &&
-             "should be physical register!");
-      ++regUse_[physReg];
-      for (const unsigned* as = tri_->getAliasSet(physReg); *as; ++as)
-        ++regUse_[*as];
-    }
-
-    void delRegUse(unsigned physReg) {
-      assert(TargetRegisterInfo::isPhysicalRegister(physReg) &&
-             "should be physical register!");
-      assert(regUse_[physReg] != 0);
-      --regUse_[physReg];
-      for (const unsigned* as = tri_->getAliasSet(physReg); *as; ++as) {
-        assert(regUse_[*as] != 0);
-        --regUse_[*as];
-      }
-    }
-
-    bool isRegAvail(unsigned physReg) const {
-      assert(TargetRegisterInfo::isPhysicalRegister(physReg) &&
-             "should be physical register!");
-      return regUse_[physReg] == 0;
-    }
-
-    void backUpRegUses() {
-      regUseBackUp_ = regUse_;
-    }
-
-    void restoreRegUses() {
-      regUse_ = regUseBackUp_;
-    }
-
-    ///
-    /// Register handling helpers.
-    ///
-
-    /// getFreePhysReg - return a free physical register for this virtual
-    /// register interval if we have one, otherwise return 0.
-    unsigned getFreePhysReg(LiveInterval* cur);
-    unsigned getFreePhysReg(LiveInterval* cur,
-                            const TargetRegisterClass *RC,
-                            unsigned MaxInactiveCount,
-                            SmallVector<unsigned, 256> &inactiveCounts,
-                            bool SkipDGRegs);
-
-    /// getFirstNonReservedPhysReg - return the first non-reserved physical
-    /// register in the register class.
-    unsigned getFirstNonReservedPhysReg(const TargetRegisterClass *RC) {
-      ArrayRef<unsigned> O = RegClassInfo.getOrder(RC);
-      assert(!O.empty() && "All registers reserved?!");
-      return O.front();
-    }
-
-    void ComputeRelatedRegClasses();
-
-    template <typename ItTy>
-    void printIntervals(const char* const str, ItTy i, ItTy e) const {
-      DEBUG({
-          if (str)
-            dbgs() << str << " intervals:\n";
-
-          for (; i != e; ++i) {
-            dbgs() << '\t' << *i->first << " -> ";
-
-            unsigned reg = i->first->reg;
-            if (TargetRegisterInfo::isVirtualRegister(reg))
-              reg = vrm_->getPhys(reg);
-
-            dbgs() << tri_->getName(reg) << '\n';
-          }
-        });
-    }
-  };
-  char RALinScan::ID = 0;
-}
-
-INITIALIZE_PASS_BEGIN(RALinScan, "linearscan-regalloc",
-                      "Linear Scan Register Allocator", false, false)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination)
-INITIALIZE_PASS_DEPENDENCY(CalculateSpillWeights)
-INITIALIZE_PASS_DEPENDENCY(LiveStacks)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
-INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_PASS_END(RALinScan, "linearscan-regalloc",
-                    "Linear Scan Register Allocator", false, false)
-
-void RALinScan::ComputeRelatedRegClasses() {
-  // First pass, add all reg classes to the union, and determine at least one
-  // reg class that each register is in.
-  bool HasAliases = false;
-  for (TargetRegisterInfo::regclass_iterator RCI = tri_->regclass_begin(),
-       E = tri_->regclass_end(); RCI != E; ++RCI) {
-    RelatedRegClasses.insert(*RCI);
-    for (TargetRegisterClass::iterator I = (*RCI)->begin(), E = (*RCI)->end();
-         I != E; ++I) {
-      HasAliases = HasAliases || *tri_->getAliasSet(*I) != 0;
-
-      const TargetRegisterClass *&PRC = OneClassForEachPhysReg[*I];
-      if (PRC) {
-        // Already processed this register.  Just make sure we know that
-        // multiple register classes share a register.
-        RelatedRegClasses.unionSets(PRC, *RCI);
-      } else {
-        PRC = *RCI;
-      }
-    }
-  }
-
-  // Second pass, now that we know conservatively what register classes each reg
-  // belongs to, add info about aliases.  We don't need to do this for targets
-  // without register aliases.
-  if (HasAliases)
-    for (DenseMap<unsigned, const TargetRegisterClass*>::iterator
-         I = OneClassForEachPhysReg.begin(), E = OneClassForEachPhysReg.end();
-         I != E; ++I)
-      for (const unsigned *AS = tri_->getAliasSet(I->first); *AS; ++AS) {
-        const TargetRegisterClass *AliasClass = 
-          OneClassForEachPhysReg.lookup(*AS);
-        if (AliasClass)
-          RelatedRegClasses.unionSets(I->second, AliasClass);
-      }
-}
-
-/// attemptTrivialCoalescing - If a simple interval is defined by a copy, try
-/// allocate the definition the same register as the source register if the
-/// register is not defined during live time of the interval. If the interval is
-/// killed by a copy, try to use the destination register. This eliminates a
-/// copy. This is used to coalesce copies which were not coalesced away before
-/// allocation either due to dest and src being in different register classes or
-/// because the coalescer was overly conservative.
-unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
-  unsigned Preference = vrm_->getRegAllocPref(cur.reg);
-  if ((Preference && Preference == Reg) || !cur.containsOneValue())
-    return Reg;
-
-  // We cannot handle complicated live ranges. Simple linear stuff only.
-  if (cur.ranges.size() != 1)
-    return Reg;
-
-  const LiveRange &range = cur.ranges.front();
-
-  VNInfo *vni = range.valno;
-  if (vni->isUnused() || !vni->def.isValid())
-    return Reg;
-
-  unsigned CandReg;
-  {
-    MachineInstr *CopyMI;
-    if ((CopyMI = li_->getInstructionFromIndex(vni->def)) && CopyMI->isCopy())
-      // Defined by a copy, try to extend SrcReg forward
-      CandReg = CopyMI->getOperand(1).getReg();
-    else if (TrivCoalesceEnds &&
-            (CopyMI = li_->getInstructionFromIndex(range.end.getBaseIndex())) &&
-             CopyMI->isCopy() && cur.reg == CopyMI->getOperand(1).getReg())
-      // Only used by a copy, try to extend DstReg backwards
-      CandReg = CopyMI->getOperand(0).getReg();
-    else
-      return Reg;
-
-    // If the target of the copy is a sub-register then don't coalesce.
-    if(CopyMI->getOperand(0).getSubReg())
-      return Reg;
-  }
-
-  if (TargetRegisterInfo::isVirtualRegister(CandReg)) {
-    if (!vrm_->isAssignedReg(CandReg))
-      return Reg;
-    CandReg = vrm_->getPhys(CandReg);
-  }
-  if (Reg == CandReg)
-    return Reg;
-
-  const TargetRegisterClass *RC = mri_->getRegClass(cur.reg);
-  if (!RC->contains(CandReg))
-    return Reg;
-
-  if (li_->conflictsWithPhysReg(cur, *vrm_, CandReg))
-    return Reg;
-
-  // Try to coalesce.
-  DEBUG(dbgs() << "Coalescing: " << cur << " -> " << tri_->getName(CandReg)
-        << '\n');
-  vrm_->clearVirt(cur.reg);
-  vrm_->assignVirt2Phys(cur.reg, CandReg);
-
-  ++NumCoalesce;
-  return CandReg;
-}
-
-bool RALinScan::runOnMachineFunction(MachineFunction &fn) {
-  mf_ = &fn;
-  mri_ = &fn.getRegInfo();
-  tm_ = &fn.getTarget();
-  tri_ = tm_->getRegisterInfo();
-  tii_ = tm_->getInstrInfo();
-  allocatableRegs_ = tri_->getAllocatableSet(fn);
-  reservedRegs_ = tri_->getReservedRegs(fn);
-  li_ = &getAnalysis<LiveIntervals>();
-  loopInfo = &getAnalysis<MachineLoopInfo>();
-  RegClassInfo.runOnMachineFunction(fn);
-
-  // We don't run the coalescer here because we have no reason to
-  // interact with it.  If the coalescer requires interaction, it
-  // won't do anything.  If it doesn't require interaction, we assume
-  // it was run as a separate pass.
-
-  // If this is the first function compiled, compute the related reg classes.
-  if (RelatedRegClasses.empty())
-    ComputeRelatedRegClasses();
-
-  // Also resize register usage trackers.
-  initRegUses();
-
-  vrm_ = &getAnalysis<VirtRegMap>();
-  if (!rewriter_.get()) rewriter_.reset(createVirtRegRewriter());
-
-  spiller_.reset(createSpiller(*this, *mf_, *vrm_));
-
-  initIntervalSets();
-
-  linearScan();
-
-  // Rewrite spill code and update the PhysRegsUsed set.
-  rewriter_->runOnMachineFunction(*mf_, *vrm_, li_);
-
-  // Write out new DBG_VALUE instructions.
-  getAnalysis<LiveDebugVariables>().emitDebugValues(vrm_);
-
-  assert(unhandled_.empty() && "Unhandled live intervals remain!");
-
-  finalizeRegUses();
-
-  fixed_.clear();
-  active_.clear();
-  inactive_.clear();
-  handled_.clear();
-  NextReloadMap.clear();
-  DowngradedRegs.clear();
-  DowngradeMap.clear();
-  spiller_.reset(0);
-
-  return true;
-}
-
-/// initIntervalSets - initialize the interval sets.
-///
-void RALinScan::initIntervalSets()
-{
-  assert(unhandled_.empty() && fixed_.empty() &&
-         active_.empty() && inactive_.empty() &&
-         "interval sets should be empty on initialization");
-
-  handled_.reserve(li_->getNumIntervals());
-
-  for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) {
-    if (TargetRegisterInfo::isPhysicalRegister(i->second->reg)) {
-      if (!i->second->empty() && allocatableRegs_.test(i->second->reg)) {
-        mri_->setPhysRegUsed(i->second->reg);
-        fixed_.push_back(std::make_pair(i->second, i->second->begin()));
-      }
-    } else {
-      if (i->second->empty()) {
-        assignRegOrStackSlotAtInterval(i->second);
-      }
-      else
-        unhandled_.push(i->second);
-    }
-  }
-}
-
-void RALinScan::linearScan() {
-  // linear scan algorithm
-  DEBUG({
-      dbgs() << "********** LINEAR SCAN **********\n"
-             << "********** Function: "
-             << mf_->getFunction()->getName() << '\n';
-      printIntervals("fixed", fixed_.begin(), fixed_.end());
-    });
-
-  while (!unhandled_.empty()) {
-    // pick the interval with the earliest start point
-    LiveInterval* cur = unhandled_.top();
-    unhandled_.pop();
-    ++NumIters;
-    DEBUG(dbgs() << "\n*** CURRENT ***: " << *cur << '\n');
-
-    assert(!cur->empty() && "Empty interval in unhandled set.");
-
-    processActiveIntervals(cur->beginIndex());
-    processInactiveIntervals(cur->beginIndex());
-
-    assert(TargetRegisterInfo::isVirtualRegister(cur->reg) &&
-           "Can only allocate virtual registers!");
-
-    // Allocating a virtual register. try to find a free
-    // physical register or spill an interval (possibly this one) in order to
-    // assign it one.
-    assignRegOrStackSlotAtInterval(cur);
-
-    DEBUG({
-        printIntervals("active", active_.begin(), active_.end());
-        printIntervals("inactive", inactive_.begin(), inactive_.end());
-      });
-  }
-
-  // Expire any remaining active intervals
-  while (!active_.empty()) {
-    IntervalPtr &IP = active_.back();
-    unsigned reg = IP.first->reg;
-    DEBUG(dbgs() << "\tinterval " << *IP.first << " expired\n");
-    assert(TargetRegisterInfo::isVirtualRegister(reg) &&
-           "Can only allocate virtual registers!");
-    reg = vrm_->getPhys(reg);
-    delRegUse(reg);
-    active_.pop_back();
-  }
-
-  // Expire any remaining inactive intervals
-  DEBUG({
-      for (IntervalPtrs::reverse_iterator
-             i = inactive_.rbegin(); i != inactive_.rend(); ++i)
-        dbgs() << "\tinterval " << *i->first << " expired\n";
-    });
-  inactive_.clear();
-
-  // Add live-ins to every BB except for entry. Also perform trivial coalescing.
-  MachineFunction::iterator EntryMBB = mf_->begin();
-  SmallVector<MachineBasicBlock*, 8> LiveInMBBs;
-  for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) {
-    LiveInterval &cur = *i->second;
-    unsigned Reg = 0;
-    bool isPhys = TargetRegisterInfo::isPhysicalRegister(cur.reg);
-    if (isPhys)
-      Reg = cur.reg;
-    else if (vrm_->isAssignedReg(cur.reg))
-      Reg = attemptTrivialCoalescing(cur, vrm_->getPhys(cur.reg));
-    if (!Reg)
-      continue;
-    // Ignore splited live intervals.
-    if (!isPhys && vrm_->getPreSplitReg(cur.reg))
-      continue;
-
-    for (LiveInterval::Ranges::const_iterator I = cur.begin(), E = cur.end();
-         I != E; ++I) {
-      const LiveRange &LR = *I;
-      if (li_->findLiveInMBBs(LR.start, LR.end, LiveInMBBs)) {
-        for (unsigned i = 0, e = LiveInMBBs.size(); i != e; ++i)
-          if (LiveInMBBs[i] != EntryMBB) {
-            assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
-                   "Adding a virtual register to livein set?");
-            LiveInMBBs[i]->addLiveIn(Reg);
-          }
-        LiveInMBBs.clear();
-      }
-    }
-  }
-
-  DEBUG(dbgs() << *vrm_);
-
-  // Look for physical registers that end up not being allocated even though
-  // register allocator had to spill other registers in its register class.
-  if (!vrm_->FindUnusedRegisters(li_))
-    return;
-}
-
-/// processActiveIntervals - expire old intervals and move non-overlapping ones
-/// to the inactive list.
-void RALinScan::processActiveIntervals(SlotIndex CurPoint)
-{
-  DEBUG(dbgs() << "\tprocessing active intervals:\n");
-
-  for (unsigned i = 0, e = active_.size(); i != e; ++i) {
-    LiveInterval *Interval = active_[i].first;
-    LiveInterval::iterator IntervalPos = active_[i].second;
-    unsigned reg = Interval->reg;
-
-    IntervalPos = Interval->advanceTo(IntervalPos, CurPoint);
-
-    if (IntervalPos == Interval->end()) {     // Remove expired intervals.
-      DEBUG(dbgs() << "\t\tinterval " << *Interval << " expired\n");
-      assert(TargetRegisterInfo::isVirtualRegister(reg) &&
-             "Can only allocate virtual registers!");
-      reg = vrm_->getPhys(reg);
-      delRegUse(reg);
-
-      // Pop off the end of the list.
-      active_[i] = active_.back();
-      active_.pop_back();
-      --i; --e;
-
-    } else if (IntervalPos->start > CurPoint) {
-      // Move inactive intervals to inactive list.
-      DEBUG(dbgs() << "\t\tinterval " << *Interval << " inactive\n");
-      assert(TargetRegisterInfo::isVirtualRegister(reg) &&
-             "Can only allocate virtual registers!");
-      reg = vrm_->getPhys(reg);
-      delRegUse(reg);
-      // add to inactive.
-      inactive_.push_back(std::make_pair(Interval, IntervalPos));
-
-      // Pop off the end of the list.
-      active_[i] = active_.back();
-      active_.pop_back();
-      --i; --e;
-    } else {
-      // Otherwise, just update the iterator position.
-      active_[i].second = IntervalPos;
-    }
-  }
-}
-
-/// processInactiveIntervals - expire old intervals and move overlapping
-/// ones to the active list.
-void RALinScan::processInactiveIntervals(SlotIndex CurPoint)
-{
-  DEBUG(dbgs() << "\tprocessing inactive intervals:\n");
-
-  for (unsigned i = 0, e = inactive_.size(); i != e; ++i) {
-    LiveInterval *Interval = inactive_[i].first;
-    LiveInterval::iterator IntervalPos = inactive_[i].second;
-    unsigned reg = Interval->reg;
-
-    IntervalPos = Interval->advanceTo(IntervalPos, CurPoint);
-
-    if (IntervalPos == Interval->end()) {       // remove expired intervals.
-      DEBUG(dbgs() << "\t\tinterval " << *Interval << " expired\n");
-
-      // Pop off the end of the list.
-      inactive_[i] = inactive_.back();
-      inactive_.pop_back();
-      --i; --e;
-    } else if (IntervalPos->start <= CurPoint) {
-      // move re-activated intervals in active list
-      DEBUG(dbgs() << "\t\tinterval " << *Interval << " active\n");
-      assert(TargetRegisterInfo::isVirtualRegister(reg) &&
-             "Can only allocate virtual registers!");
-      reg = vrm_->getPhys(reg);
-      addRegUse(reg);
-      // add to active
-      active_.push_back(std::make_pair(Interval, IntervalPos));
-
-      // Pop off the end of the list.
-      inactive_[i] = inactive_.back();
-      inactive_.pop_back();
-      --i; --e;
-    } else {
-      // Otherwise, just update the iterator position.
-      inactive_[i].second = IntervalPos;
-    }
-  }
-}
-
-/// updateSpillWeights - updates the spill weights of the specifed physical
-/// register and its weight.
-void RALinScan::updateSpillWeights(std::vector<float> &Weights,
-                                   unsigned reg, float weight,
-                                   const TargetRegisterClass *RC) {
-  SmallSet<unsigned, 4> Processed;
-  SmallSet<unsigned, 4> SuperAdded;
-  SmallVector<unsigned, 4> Supers;
-  Weights[reg] += weight;
-  Processed.insert(reg);
-  for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as) {
-    Weights[*as] += weight;
-    Processed.insert(*as);
-    if (tri_->isSubRegister(*as, reg) &&
-        SuperAdded.insert(*as) &&
-        RC->contains(*as)) {
-      Supers.push_back(*as);
-    }
-  }
-
-  // If the alias is a super-register, and the super-register is in the
-  // register class we are trying to allocate. Then add the weight to all
-  // sub-registers of the super-register even if they are not aliases.
-  // e.g. allocating for GR32, bh is not used, updating bl spill weight.
-  //      bl should get the same spill weight otherwise it will be chosen
-  //      as a spill candidate since spilling bh doesn't make ebx available.
-  for (unsigned i = 0, e = Supers.size(); i != e; ++i) {
-    for (const unsigned *sr = tri_->getSubRegisters(Supers[i]); *sr; ++sr)
-      if (!Processed.count(*sr))
-        Weights[*sr] += weight;
-  }
-}
-
-static
-RALinScan::IntervalPtrs::iterator
-FindIntervalInVector(RALinScan::IntervalPtrs &IP, LiveInterval *LI) {
-  for (RALinScan::IntervalPtrs::iterator I = IP.begin(), E = IP.end();
-       I != E; ++I)
-    if (I->first == LI) return I;
-  return IP.end();
-}
-
-static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V,
-                                    SlotIndex Point){
-  for (unsigned i = 0, e = V.size(); i != e; ++i) {
-    RALinScan::IntervalPtr &IP = V[i];
-    LiveInterval::iterator I = std::upper_bound(IP.first->begin(),
-                                                IP.second, Point);
-    if (I != IP.first->begin()) --I;
-    IP.second = I;
-  }
-}
-
-/// getConflictWeight - Return the number of conflicts between cur
-/// live interval and defs and uses of Reg weighted by loop depthes.
-static
-float getConflictWeight(LiveInterval *cur, unsigned Reg, LiveIntervals *li_,
-                        MachineRegisterInfo *mri_,
-                        MachineLoopInfo *loopInfo) {
-  float Conflicts = 0;
-  for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(Reg),
-         E = mri_->reg_end(); I != E; ++I) {
-    MachineInstr *MI = &*I;
-    if (cur->liveAt(li_->getInstructionIndex(MI))) {
-      unsigned loopDepth = loopInfo->getLoopDepth(MI->getParent());
-      Conflicts += std::pow(10.0f, (float)loopDepth);
-    }
-  }
-  return Conflicts;
-}
-
-/// findIntervalsToSpill - Determine the intervals to spill for the
-/// specified interval. It's passed the physical registers whose spill
-/// weight is the lowest among all the registers whose live intervals
-/// conflict with the interval.
-void RALinScan::findIntervalsToSpill(LiveInterval *cur,
-                            std::vector<std::pair<unsigned,float> > &Candidates,
-                            unsigned NumCands,
-                            SmallVector<LiveInterval*, 8> &SpillIntervals) {
-  // We have figured out the *best* register to spill. But there are other
-  // registers that are pretty good as well (spill weight within 3%). Spill
-  // the one that has fewest defs and uses that conflict with cur.
-  float Conflicts[3] = { 0.0f, 0.0f, 0.0f };
-  SmallVector<LiveInterval*, 8> SLIs[3];
-
-  DEBUG({
-      dbgs() << "\tConsidering " << NumCands << " candidates: ";
-      for (unsigned i = 0; i != NumCands; ++i)
-        dbgs() << tri_->getName(Candidates[i].first) << " ";
-      dbgs() << "\n";
-    });
-
-  // Calculate the number of conflicts of each candidate.
-  for (IntervalPtrs::iterator i = active_.begin(); i != active_.end(); ++i) {
-    unsigned Reg = i->first->reg;
-    unsigned PhysReg = vrm_->getPhys(Reg);
-    if (!cur->overlapsFrom(*i->first, i->second))
-      continue;
-    for (unsigned j = 0; j < NumCands; ++j) {
-      unsigned Candidate = Candidates[j].first;
-      if (tri_->regsOverlap(PhysReg, Candidate)) {
-        if (NumCands > 1)
-          Conflicts[j] += getConflictWeight(cur, Reg, li_, mri_, loopInfo);
-        SLIs[j].push_back(i->first);
-      }
-    }
-  }
-
-  for (IntervalPtrs::iterator i = inactive_.begin(); i != inactive_.end(); ++i){
-    unsigned Reg = i->first->reg;
-    unsigned PhysReg = vrm_->getPhys(Reg);
-    if (!cur->overlapsFrom(*i->first, i->second-1))
-      continue;
-    for (unsigned j = 0; j < NumCands; ++j) {
-      unsigned Candidate = Candidates[j].first;
-      if (tri_->regsOverlap(PhysReg, Candidate)) {
-        if (NumCands > 1)
-          Conflicts[j] += getConflictWeight(cur, Reg, li_, mri_, loopInfo);
-        SLIs[j].push_back(i->first);
-      }
-    }
-  }
-
-  // Which is the best candidate?
-  unsigned BestCandidate = 0;
-  float MinConflicts = Conflicts[0];
-  for (unsigned i = 1; i != NumCands; ++i) {
-    if (Conflicts[i] < MinConflicts) {
-      BestCandidate = i;
-      MinConflicts = Conflicts[i];
-    }
-  }
-
-  std::copy(SLIs[BestCandidate].begin(), SLIs[BestCandidate].end(),
-            std::back_inserter(SpillIntervals));
-}
-
-namespace {
-  struct WeightCompare {
-  private:
-    const RALinScan &Allocator;
-
-  public:
-    WeightCompare(const RALinScan &Alloc) : Allocator(Alloc) {}
-
-    typedef std::pair<unsigned, float> RegWeightPair;
-    bool operator()(const RegWeightPair &LHS, const RegWeightPair &RHS) const {
-      return LHS.second < RHS.second && !Allocator.isRecentlyUsed(LHS.first);
-    }
-  };
-}
-
-static bool weightsAreClose(float w1, float w2) {
-  if (!NewHeuristic)
-    return false;
-
-  float diff = w1 - w2;
-  if (diff <= 0.02f)  // Within 0.02f
-    return true;
-  return (diff / w2) <= 0.05f;  // Within 5%.
-}
-
-LiveInterval *RALinScan::hasNextReloadInterval(LiveInterval *cur) {
-  DenseMap<unsigned, unsigned>::iterator I = NextReloadMap.find(cur->reg);
-  if (I == NextReloadMap.end())
-    return 0;
-  return &li_->getInterval(I->second);
-}
-
-void RALinScan::DowngradeRegister(LiveInterval *li, unsigned Reg) {
-  for (const unsigned *AS = tri_->getOverlaps(Reg); *AS; ++AS) {
-    bool isNew = DowngradedRegs.insert(*AS);
-    (void)isNew; // Silence compiler warning.
-    assert(isNew && "Multiple reloads holding the same register?");
-    DowngradeMap.insert(std::make_pair(li->reg, *AS));
-  }
-  ++NumDowngrade;
-}
-
-void RALinScan::UpgradeRegister(unsigned Reg) {
-  if (Reg) {
-    DowngradedRegs.erase(Reg);
-    for (const unsigned *AS = tri_->getAliasSet(Reg); *AS; ++AS)
-      DowngradedRegs.erase(*AS);
-  }
-}
-
-namespace {
-  struct LISorter {
-    bool operator()(LiveInterval* A, LiveInterval* B) {
-      return A->beginIndex() < B->beginIndex();
-    }
-  };
-}
-
-/// assignRegOrStackSlotAtInterval - assign a register if one is available, or
-/// spill.
-void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
-  const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
-  DEBUG(dbgs() << "\tallocating current interval from "
-               << RC->getName() << ": ");
-
-  // This is an implicitly defined live interval, just assign any register.
-  if (cur->empty()) {
-    unsigned physReg = vrm_->getRegAllocPref(cur->reg);
-    if (!physReg)
-      physReg = getFirstNonReservedPhysReg(RC);
-    DEBUG(dbgs() <<  tri_->getName(physReg) << '\n');
-    // Note the register is not really in use.
-    vrm_->assignVirt2Phys(cur->reg, physReg);
-    return;
-  }
-
-  backUpRegUses();
-
-  std::vector<std::pair<unsigned, float> > SpillWeightsToAdd;
-  SlotIndex StartPosition = cur->beginIndex();
-  const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC);
-
-  // If start of this live interval is defined by a move instruction and its
-  // source is assigned a physical register that is compatible with the target
-  // register class, then we should try to assign it the same register.
-  // This can happen when the move is from a larger register class to a smaller
-  // one, e.g. X86::mov32to32_. These move instructions are not coalescable.
-  if (!vrm_->getRegAllocPref(cur->reg) && cur->hasAtLeastOneValue()) {
-    VNInfo *vni = cur->begin()->valno;
-    if (!vni->isUnused() && vni->def.isValid()) {
-      MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
-      if (CopyMI && CopyMI->isCopy()) {
-        unsigned DstSubReg = CopyMI->getOperand(0).getSubReg();
-        unsigned SrcReg = CopyMI->getOperand(1).getReg();
-        unsigned SrcSubReg = CopyMI->getOperand(1).getSubReg();
-        unsigned Reg = 0;
-        if (TargetRegisterInfo::isPhysicalRegister(SrcReg))
-          Reg = SrcReg;
-        else if (vrm_->isAssignedReg(SrcReg))
-          Reg = vrm_->getPhys(SrcReg);
-        if (Reg) {
-          if (SrcSubReg)
-            Reg = tri_->getSubReg(Reg, SrcSubReg);
-          if (DstSubReg)
-            Reg = tri_->getMatchingSuperReg(Reg, DstSubReg, RC);
-          if (Reg && allocatableRegs_[Reg] && RC->contains(Reg))
-            mri_->setRegAllocationHint(cur->reg, 0, Reg);
-        }
-      }
-    }
-  }
-
-  // For every interval in inactive we overlap with, mark the
-  // register as not free and update spill weights.
-  for (IntervalPtrs::const_iterator i = inactive_.begin(),
-         e = inactive_.end(); i != e; ++i) {
-    unsigned Reg = i->first->reg;
-    assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
-           "Can only allocate virtual registers!");
-    const TargetRegisterClass *RegRC = mri_->getRegClass(Reg);
-    // If this is not in a related reg class to the register we're allocating,
-    // don't check it.
-    if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader &&
-        cur->overlapsFrom(*i->first, i->second-1)) {
-      Reg = vrm_->getPhys(Reg);
-      addRegUse(Reg);
-      SpillWeightsToAdd.push_back(std::make_pair(Reg, i->first->weight));
-    }
-  }
-
-  // Speculatively check to see if we can get a register right now.  If not,
-  // we know we won't be able to by adding more constraints.  If so, we can
-  // check to see if it is valid.  Doing an exhaustive search of the fixed_ list
-  // is very bad (it contains all callee clobbered registers for any functions
-  // with a call), so we want to avoid doing that if possible.
-  unsigned physReg = getFreePhysReg(cur);
-  unsigned BestPhysReg = physReg;
-  if (physReg) {
-    // We got a register.  However, if it's in the fixed_ list, we might
-    // conflict with it.  Check to see if we conflict with it or any of its
-    // aliases.
-    SmallSet<unsigned, 8> RegAliases;
-    for (const unsigned *AS = tri_->getAliasSet(physReg); *AS; ++AS)
-      RegAliases.insert(*AS);
-
-    bool ConflictsWithFixed = false;
-    for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
-      IntervalPtr &IP = fixed_[i];
-      if (physReg == IP.first->reg || RegAliases.count(IP.first->reg)) {
-        // Okay, this reg is on the fixed list.  Check to see if we actually
-        // conflict.
-        LiveInterval *I = IP.first;
-        if (I->endIndex() > StartPosition) {
-          LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition);
-          IP.second = II;
-          if (II != I->begin() && II->start > StartPosition)
-            --II;
-          if (cur->overlapsFrom(*I, II)) {
-            ConflictsWithFixed = true;
-            break;
-          }
-        }
-      }
-    }
-
-    // Okay, the register picked by our speculative getFreePhysReg call turned
-    // out to be in use.  Actually add all of the conflicting fixed registers to
-    // regUse_ so we can do an accurate query.
-    if (ConflictsWithFixed) {
-      // For every interval in fixed we overlap with, mark the register as not
-      // free and update spill weights.
-      for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
-        IntervalPtr &IP = fixed_[i];
-        LiveInterval *I = IP.first;
-
-        const TargetRegisterClass *RegRC = OneClassForEachPhysReg[I->reg];
-        if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader &&
-            I->endIndex() > StartPosition) {
-          LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition);
-          IP.second = II;
-          if (II != I->begin() && II->start > StartPosition)
-            --II;
-          if (cur->overlapsFrom(*I, II)) {
-            unsigned reg = I->reg;
-            addRegUse(reg);
-            SpillWeightsToAdd.push_back(std::make_pair(reg, I->weight));
-          }
-        }
-      }
-
-      // Using the newly updated regUse_ object, which includes conflicts in the
-      // future, see if there are any registers available.
-      physReg = getFreePhysReg(cur);
-    }
-  }
-
-  // Restore the physical register tracker, removing information about the
-  // future.
-  restoreRegUses();
-
-  // If we find a free register, we are done: assign this virtual to
-  // the free physical register and add this interval to the active
-  // list.
-  if (physReg) {
-    DEBUG(dbgs() <<  tri_->getName(physReg) << '\n');
-    assert(RC->contains(physReg) && "Invalid candidate");
-    vrm_->assignVirt2Phys(cur->reg, physReg);
-    addRegUse(physReg);
-    active_.push_back(std::make_pair(cur, cur->begin()));
-    handled_.push_back(cur);
-
-    // Remember physReg for avoiding a write-after-write hazard in the next
-    // instruction.
-    if (AvoidWAWHazard &&
-        tri_->avoidWriteAfterWrite(mri_->getRegClass(cur->reg)))
-      avoidWAW_ = physReg;
-
-    // "Upgrade" the physical register since it has been allocated.
-    UpgradeRegister(physReg);
-    if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) {
-      // "Downgrade" physReg to try to keep physReg from being allocated until
-      // the next reload from the same SS is allocated.
-      mri_->setRegAllocationHint(NextReloadLI->reg, 0, physReg);
-      DowngradeRegister(cur, physReg);
-    }
-    return;
-  }
-  DEBUG(dbgs() << "no free registers\n");
-
-  // Compile the spill weights into an array that is better for scanning.
-  std::vector<float> SpillWeights(tri_->getNumRegs(), 0.0f);
-  for (std::vector<std::pair<unsigned, float> >::iterator
-       I = SpillWeightsToAdd.begin(), E = SpillWeightsToAdd.end(); I != E; ++I)
-    updateSpillWeights(SpillWeights, I->first, I->second, RC);
-
-  // for each interval in active, update spill weights.
-  for (IntervalPtrs::const_iterator i = active_.begin(), e = active_.end();
-       i != e; ++i) {
-    unsigned reg = i->first->reg;
-    assert(TargetRegisterInfo::isVirtualRegister(reg) &&
-           "Can only allocate virtual registers!");
-    reg = vrm_->getPhys(reg);
-    updateSpillWeights(SpillWeights, reg, i->first->weight, RC);
-  }
-
-  DEBUG(dbgs() << "\tassigning stack slot at interval "<< *cur << ":\n");
-
-  // Find a register to spill.
-  float minWeight = HUGE_VALF;
-  unsigned minReg = 0;
-
-  bool Found = false;
-  std::vector<std::pair<unsigned,float> > RegsWeights;
-  ArrayRef<unsigned> Order = RegClassInfo.getOrder(RC);
-  if (!minReg || SpillWeights[minReg] == HUGE_VALF)
-    for (unsigned i = 0; i != Order.size(); ++i) {
-      unsigned reg = Order[i];
-      float regWeight = SpillWeights[reg];
-      // Skip recently allocated registers and reserved registers.
-      if (minWeight > regWeight && !isRecentlyUsed(reg))
-        Found = true;
-      RegsWeights.push_back(std::make_pair(reg, regWeight));
-    }
-
-  // If we didn't find a register that is spillable, try aliases?
-  if (!Found) {
-    for (unsigned i = 0; i != Order.size(); ++i) {
-      unsigned reg = Order[i];
-      // No need to worry about if the alias register size < regsize of RC.
-      // We are going to spill all registers that alias it anyway.
-      for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as)
-        RegsWeights.push_back(std::make_pair(*as, SpillWeights[*as]));
-    }
-  }
-
-  // Sort all potential spill candidates by weight.
-  std::sort(RegsWeights.begin(), RegsWeights.end(), WeightCompare(*this));
-  minReg = RegsWeights[0].first;
-  minWeight = RegsWeights[0].second;
-  if (minWeight == HUGE_VALF) {
-    // All registers must have inf weight. Just grab one!
-    minReg = BestPhysReg ? BestPhysReg : getFirstNonReservedPhysReg(RC);
-    if (cur->weight == HUGE_VALF ||
-        li_->getApproximateInstructionCount(*cur) == 0) {
-      // Spill a physical register around defs and uses.
-      if (li_->spillPhysRegAroundRegDefsUses(*cur, minReg, *vrm_)) {
-        // spillPhysRegAroundRegDefsUses may have invalidated iterator stored
-        // in fixed_. Reset them.
-        for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
-          IntervalPtr &IP = fixed_[i];
-          LiveInterval *I = IP.first;
-          if (I->reg == minReg || tri_->isSubRegister(minReg, I->reg))
-            IP.second = I->advanceTo(I->begin(), StartPosition);
-        }
-
-        DowngradedRegs.clear();
-        assignRegOrStackSlotAtInterval(cur);
-      } else {
-        assert(false && "Ran out of registers during register allocation!");
-        report_fatal_error("Ran out of registers during register allocation!");
-      }
-      return;
-    }
-  }
-
-  // Find up to 3 registers to consider as spill candidates.
-  unsigned LastCandidate = RegsWeights.size() >= 3 ? 3 : 1;
-  while (LastCandidate > 1) {
-    if (weightsAreClose(RegsWeights[LastCandidate-1].second, minWeight))
-      break;
-    --LastCandidate;
-  }
-
-  DEBUG({
-      dbgs() << "\t\tregister(s) with min weight(s): ";
-
-      for (unsigned i = 0; i != LastCandidate; ++i)
-        dbgs() << tri_->getName(RegsWeights[i].first)
-               << " (" << RegsWeights[i].second << ")\n";
-    });
-
-  // If the current has the minimum weight, we need to spill it and
-  // add any added intervals back to unhandled, and restart
-  // linearscan.
-  if (cur->weight != HUGE_VALF && cur->weight <= minWeight) {
-    DEBUG(dbgs() << "\t\t\tspilling(c): " << *cur << '\n');
-    SmallVector<LiveInterval*, 8> added;
-    LiveRangeEdit LRE(*cur, added);
-    spiller_->spill(LRE);
-
-    std::sort(added.begin(), added.end(), LISorter());
-    if (added.empty())
-      return;  // Early exit if all spills were folded.
-
-    // Merge added with unhandled.  Note that we have already sorted
-    // intervals returned by addIntervalsForSpills by their starting
-    // point.
-    // This also update the NextReloadMap. That is, it adds mapping from a
-    // register defined by a reload from SS to the next reload from SS in the
-    // same basic block.
-    MachineBasicBlock *LastReloadMBB = 0;
-    LiveInterval *LastReload = 0;
-    int LastReloadSS = VirtRegMap::NO_STACK_SLOT;
-    for (unsigned i = 0, e = added.size(); i != e; ++i) {
-      LiveInterval *ReloadLi = added[i];
-      if (ReloadLi->weight == HUGE_VALF &&
-          li_->getApproximateInstructionCount(*ReloadLi) == 0) {
-        SlotIndex ReloadIdx = ReloadLi->beginIndex();
-        MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx);
-        int ReloadSS = vrm_->getStackSlot(ReloadLi->reg);
-        if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) {
-          // Last reload of same SS is in the same MBB. We want to try to
-          // allocate both reloads the same register and make sure the reg
-          // isn't clobbered in between if at all possible.
-          assert(LastReload->beginIndex() < ReloadIdx);
-          NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg));
-        }
-        LastReloadMBB = ReloadMBB;
-        LastReload = ReloadLi;
-        LastReloadSS = ReloadSS;
-      }
-      unhandled_.push(ReloadLi);
-    }
-    return;
-  }
-
-  ++NumBacktracks;
-
-  // Push the current interval back to unhandled since we are going
-  // to re-run at least this iteration. Since we didn't modify it it
-  // should go back right in the front of the list
-  unhandled_.push(cur);
-
-  assert(TargetRegisterInfo::isPhysicalRegister(minReg) &&
-         "did not choose a register to spill?");
-
-  // We spill all intervals aliasing the register with
-  // minimum weight, rollback to the interval with the earliest
-  // start point and let the linear scan algorithm run again
-  SmallVector<LiveInterval*, 8> spillIs;
-
-  // Determine which intervals have to be spilled.
-  findIntervalsToSpill(cur, RegsWeights, LastCandidate, spillIs);
-
-  // Set of spilled vregs (used later to rollback properly)
-  SmallSet<unsigned, 8> spilled;
-
-  // The earliest start of a Spilled interval indicates up to where
-  // in handled we need to roll back
-  assert(!spillIs.empty() && "No spill intervals?");
-  SlotIndex earliestStart = spillIs[0]->beginIndex();
-
-  // Spill live intervals of virtual regs mapped to the physical register we
-  // want to clear (and its aliases).  We only spill those that overlap with the
-  // current interval as the rest do not affect its allocation. we also keep
-  // track of the earliest start of all spilled live intervals since this will
-  // mark our rollback point.
-  SmallVector<LiveInterval*, 8> added;
-  while (!spillIs.empty()) {
-    LiveInterval *sli = spillIs.back();
-    spillIs.pop_back();
-    DEBUG(dbgs() << "\t\t\tspilling(a): " << *sli << '\n');
-    if (sli->beginIndex() < earliestStart)
-      earliestStart = sli->beginIndex();
-    LiveRangeEdit LRE(*sli, added, 0, &spillIs);
-    spiller_->spill(LRE);
-    spilled.insert(sli->reg);
-  }
-
-  // Include any added intervals in earliestStart.
-  for (unsigned i = 0, e = added.size(); i != e; ++i) {
-    SlotIndex SI = added[i]->beginIndex();
-    if (SI < earliestStart)
-      earliestStart = SI;
-  }
-
-  DEBUG(dbgs() << "\t\trolling back to: " << earliestStart << '\n');
-
-  // Scan handled in reverse order up to the earliest start of a
-  // spilled live interval and undo each one, restoring the state of
-  // unhandled.
-  while (!handled_.empty()) {
-    LiveInterval* i = handled_.back();
-    // If this interval starts before t we are done.
-    if (!i->empty() && i->beginIndex() < earliestStart)
-      break;
-    DEBUG(dbgs() << "\t\t\tundo changes for: " << *i << '\n');
-    handled_.pop_back();
-
-    // When undoing a live interval allocation we must know if it is active or
-    // inactive to properly update regUse_ and the VirtRegMap.
-    IntervalPtrs::iterator it;
-    if ((it = FindIntervalInVector(active_, i)) != active_.end()) {
-      active_.erase(it);
-      assert(!TargetRegisterInfo::isPhysicalRegister(i->reg));
-      if (!spilled.count(i->reg))
-        unhandled_.push(i);
-      delRegUse(vrm_->getPhys(i->reg));
-      vrm_->clearVirt(i->reg);
-    } else if ((it = FindIntervalInVector(inactive_, i)) != inactive_.end()) {
-      inactive_.erase(it);
-      assert(!TargetRegisterInfo::isPhysicalRegister(i->reg));
-      if (!spilled.count(i->reg))
-        unhandled_.push(i);
-      vrm_->clearVirt(i->reg);
-    } else {
-      assert(TargetRegisterInfo::isVirtualRegister(i->reg) &&
-             "Can only allocate virtual registers!");
-      vrm_->clearVirt(i->reg);
-      unhandled_.push(i);
-    }
-
-    DenseMap<unsigned, unsigned>::iterator ii = DowngradeMap.find(i->reg);
-    if (ii == DowngradeMap.end())
-      // It interval has a preference, it must be defined by a copy. Clear the
-      // preference now since the source interval allocation may have been
-      // undone as well.
-      mri_->setRegAllocationHint(i->reg, 0, 0);
-    else {
-      UpgradeRegister(ii->second);
-    }
-  }
-
-  // Rewind the iterators in the active, inactive, and fixed lists back to the
-  // point we reverted to.
-  RevertVectorIteratorsTo(active_, earliestStart);
-  RevertVectorIteratorsTo(inactive_, earliestStart);
-  RevertVectorIteratorsTo(fixed_, earliestStart);
-
-  // Scan the rest and undo each interval that expired after t and
-  // insert it in active (the next iteration of the algorithm will
-  // put it in inactive if required)
-  for (unsigned i = 0, e = handled_.size(); i != e; ++i) {
-    LiveInterval *HI = handled_[i];
-    if (!HI->expiredAt(earliestStart) &&
-        HI->expiredAt(cur->beginIndex())) {
-      DEBUG(dbgs() << "\t\t\tundo changes for: " << *HI << '\n');
-      active_.push_back(std::make_pair(HI, HI->begin()));
-      assert(!TargetRegisterInfo::isPhysicalRegister(HI->reg));
-      addRegUse(vrm_->getPhys(HI->reg));
-    }
-  }
-
-  // Merge added with unhandled.
-  // This also update the NextReloadMap. That is, it adds mapping from a
-  // register defined by a reload from SS to the next reload from SS in the
-  // same basic block.
-  MachineBasicBlock *LastReloadMBB = 0;
-  LiveInterval *LastReload = 0;
-  int LastReloadSS = VirtRegMap::NO_STACK_SLOT;
-  std::sort(added.begin(), added.end(), LISorter());
-  for (unsigned i = 0, e = added.size(); i != e; ++i) {
-    LiveInterval *ReloadLi = added[i];
-    if (ReloadLi->weight == HUGE_VALF &&
-        li_->getApproximateInstructionCount(*ReloadLi) == 0) {
-      SlotIndex ReloadIdx = ReloadLi->beginIndex();
-      MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx);
-      int ReloadSS = vrm_->getStackSlot(ReloadLi->reg);
-      if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) {
-        // Last reload of same SS is in the same MBB. We want to try to
-        // allocate both reloads the same register and make sure the reg
-        // isn't clobbered in between if at all possible.
-        assert(LastReload->beginIndex() < ReloadIdx);
-        NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg));
-      }
-      LastReloadMBB = ReloadMBB;
-      LastReload = ReloadLi;
-      LastReloadSS = ReloadSS;
-    }
-    unhandled_.push(ReloadLi);
-  }
-}
-
-unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
-                                   const TargetRegisterClass *RC,
-                                   unsigned MaxInactiveCount,
-                                   SmallVector<unsigned, 256> &inactiveCounts,
-                                   bool SkipDGRegs) {
-  unsigned FreeReg = 0;
-  unsigned FreeRegInactiveCount = 0;
-
-  std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(cur->reg);
-  // Resolve second part of the hint (if possible) given the current allocation.
-  unsigned physReg = Hint.second;
-  if (TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg))
-    physReg = vrm_->getPhys(physReg);
-
-  ArrayRef<unsigned> Order;
-  if (Hint.first)
-    Order = tri_->getRawAllocationOrder(RC, Hint.first, physReg, *mf_);
-  else
-    Order = RegClassInfo.getOrder(RC);
-
-  assert(!Order.empty() && "No allocatable register in this register class!");
-
-  // Scan for the first available register.
-  for (unsigned i = 0; i != Order.size(); ++i) {
-    unsigned Reg = Order[i];
-    // Ignore "downgraded" registers.
-    if (SkipDGRegs && DowngradedRegs.count(Reg))
-      continue;
-    // Skip reserved registers.
-    if (reservedRegs_.test(Reg))
-      continue;
-    // Skip recently allocated registers.
-    if (isRegAvail(Reg) && (!SkipDGRegs || !isRecentlyUsed(Reg))) {
-      FreeReg = Reg;
-      if (FreeReg < inactiveCounts.size())
-        FreeRegInactiveCount = inactiveCounts[FreeReg];
-      else
-        FreeRegInactiveCount = 0;
-      break;
-    }
-  }
-
-  // If there are no free regs, or if this reg has the max inactive count,
-  // return this register.
-  if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount) {
-    // Remember what register we picked so we can skip it next time.
-    if (FreeReg != 0) recordRecentlyUsed(FreeReg);
-    return FreeReg;
-  }
-
-  // Continue scanning the registers, looking for the one with the highest
-  // inactive count.  Alkis found that this reduced register pressure very
-  // slightly on X86 (in rev 1.94 of this file), though this should probably be
-  // reevaluated now.
-  for (unsigned i = 0; i != Order.size(); ++i) {
-    unsigned Reg = Order[i];
-    // Ignore "downgraded" registers.
-    if (SkipDGRegs && DowngradedRegs.count(Reg))
-      continue;
-    // Skip reserved registers.
-    if (reservedRegs_.test(Reg))
-      continue;
-    if (isRegAvail(Reg) && Reg < inactiveCounts.size() &&
-        FreeRegInactiveCount < inactiveCounts[Reg] &&
-        (!SkipDGRegs || !isRecentlyUsed(Reg))) {
-      FreeReg = Reg;
-      FreeRegInactiveCount = inactiveCounts[Reg];
-      if (FreeRegInactiveCount == MaxInactiveCount)
-        break;    // We found the one with the max inactive count.
-    }
-  }
-
-  // Remember what register we picked so we can skip it next time.
-  recordRecentlyUsed(FreeReg);
-
-  return FreeReg;
-}
-
-/// getFreePhysReg - return a free physical register for this virtual register
-/// interval if we have one, otherwise return 0.
-unsigned RALinScan::getFreePhysReg(LiveInterval *cur) {
-  SmallVector<unsigned, 256> inactiveCounts;
-  unsigned MaxInactiveCount = 0;
-
-  const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
-  const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC);
-
-  for (IntervalPtrs::iterator i = inactive_.begin(), e = inactive_.end();
-       i != e; ++i) {
-    unsigned reg = i->first->reg;
-    assert(TargetRegisterInfo::isVirtualRegister(reg) &&
-           "Can only allocate virtual registers!");
-
-    // If this is not in a related reg class to the register we're allocating,
-    // don't check it.
-    const TargetRegisterClass *RegRC = mri_->getRegClass(reg);
-    if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader) {
-      reg = vrm_->getPhys(reg);
-      if (inactiveCounts.size() <= reg)
-        inactiveCounts.resize(reg+1);
-      ++inactiveCounts[reg];
-      MaxInactiveCount = std::max(MaxInactiveCount, inactiveCounts[reg]);
-    }
-  }
-
-  // If copy coalescer has assigned a "preferred" register, check if it's
-  // available first.
-  unsigned Preference = vrm_->getRegAllocPref(cur->reg);
-  if (Preference) {
-    DEBUG(dbgs() << "(preferred: " << tri_->getName(Preference) << ") ");
-    if (isRegAvail(Preference) &&
-        RC->contains(Preference))
-      return Preference;
-  }
-
-  unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts,
-                                    true);
-  if (FreeReg)
-    return FreeReg;
-  return getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, false);
-}
-
-FunctionPass* llvm::createLinearScanRegisterAllocator() {
-  return new RALinScan();
-}
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 0d2cf2d6184c..a2846145bc7e 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -32,14 +32,17 @@
 #define DEBUG_TYPE "regalloc"
 
 #include "RenderMachineFunction.h"
-#include "Splitter.h"
+#include "Spiller.h"
 #include "VirtRegMap.h"
-#include "VirtRegRewriter.h"
 #include "RegisterCoalescer.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/RegAllocPBQP.h"
+#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -54,6 +57,7 @@
 #include <limits>
 #include <memory>
 #include <set>
+#include <sstream>
 #include <vector>
 
 using namespace llvm;
@@ -67,10 +71,12 @@ pbqpCoalescing("pbqp-coalescing",
                 cl::desc("Attempt coalescing during PBQP register allocation."),
                 cl::init(false), cl::Hidden);
 
+#ifndef NDEBUG
 static cl::opt<bool>
-pbqpPreSplitting("pbqp-pre-splitting",
-                 cl::desc("Pre-split before PBQP register allocation."),
-                 cl::init(false), cl::Hidden);
+pbqpDumpGraphs("pbqp-dump-graphs",
+               cl::desc("Dump graphs for each function/round in the compilation unit."),
+               cl::init(false), cl::Hidden);
+#endif
 
 namespace {
 
@@ -88,11 +94,9 @@ public:
       : MachineFunctionPass(ID), builder(b), customPassID(cPassID) {
     initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
     initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
-    initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
     initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
     initializeLiveStacksPass(*PassRegistry::getPassRegistry());
     initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
-    initializeLoopSplitterPass(*PassRegistry::getPassRegistry());
     initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
     initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry());
   }
@@ -132,6 +136,7 @@ private:
   MachineRegisterInfo *mri;
   RenderMachineFunction *rmf;
 
+  std::auto_ptr<Spiller> spiller;
   LiveIntervals *lis;
   LiveStacks *lss;
   VirtRegMap *vrm;
@@ -141,10 +146,6 @@ private:
   /// \brief Finds the initial set of vreg intervals to allocate.
   void findVRegIntervalsToAlloc();
 
-  /// \brief Adds a stack interval if the given live interval has been
-  /// spilled. Used to support stack slot coloring.
-  void addStackInterval(const LiveInterval *spilled,MachineRegisterInfo* mri);
-
   /// \brief Given a solved PBQP problem maps this solution back to a register
   /// assignment.
   bool mapPBQPToRegAlloc(const PBQPRAProblem &problem,
@@ -170,7 +171,7 @@ PBQP::Graph::NodeItr PBQPRAProblem::getNodeForVReg(unsigned vreg) const {
   VReg2Node::const_iterator nodeItr = vreg2Node.find(vreg);
   assert(nodeItr != vreg2Node.end() && "No node for vreg.");
   return nodeItr->second;
-  
+
 }
 
 const PBQPRAProblem::AllowedSet&
@@ -195,9 +196,9 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
                                                 const RegSet &vregs) {
 
   typedef std::vector<const LiveInterval*> LIVector;
-
+  ArrayRef<SlotIndex> regMaskSlots = lis->getRegMaskSlots();
   MachineRegisterInfo *mri = &mf->getRegInfo();
-  const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo();  
+  const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo();
 
   std::auto_ptr<PBQPRAProblem> p(new PBQPRAProblem());
   PBQP::Graph &g = p->getGraph();
@@ -214,7 +215,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
 
   BitVector reservedRegs = tri->getReservedRegs(*mf);
 
-  // Iterate over vregs. 
+  // Iterate over vregs.
   for (RegSet::const_iterator vregItr = vregs.begin(), vregEnd = vregs.end();
        vregItr != vregEnd; ++vregItr) {
     unsigned vreg = *vregItr;
@@ -224,7 +225,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
     // Compute an initial allowed set for the current vreg.
     typedef std::vector<unsigned> VRAllowed;
     VRAllowed vrAllowed;
-    ArrayRef<unsigned> rawOrder = trc->getRawAllocationOrder(*mf);
+    ArrayRef<uint16_t> rawOrder = trc->getRawAllocationOrder(*mf);
     for (unsigned i = 0; i != rawOrder.size(); ++i) {
       unsigned preg = rawOrder[i];
       if (!reservedRegs.test(preg)) {
@@ -232,7 +233,9 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
       }
     }
 
-    // Remove any physical registers which overlap.
+    RegSet overlappingPRegs;
+
+    // Record physical registers whose ranges overlap.
     for (RegSet::const_iterator pregItr = pregs.begin(),
                                 pregEnd = pregs.end();
          pregItr != pregEnd; ++pregItr) {
@@ -243,9 +246,41 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
         continue;
       }
 
-      if (!vregLI->overlaps(*pregLI)) {
-        continue;
+      if (vregLI->overlaps(*pregLI))
+        overlappingPRegs.insert(preg);      
+    }
+
+    // Record any overlaps with regmask operands.
+    BitVector regMaskOverlaps(tri->getNumRegs());
+    for (ArrayRef<SlotIndex>::iterator rmItr = regMaskSlots.begin(),
+                                       rmEnd = regMaskSlots.end();
+         rmItr != rmEnd; ++rmItr) {
+      SlotIndex rmIdx = *rmItr;
+      if (vregLI->liveAt(rmIdx)) {
+        MachineInstr *rmMI = lis->getInstructionFromIndex(rmIdx);
+        const uint32_t* regMask = 0;
+        for (MachineInstr::mop_iterator mopItr = rmMI->operands_begin(),
+                                        mopEnd = rmMI->operands_end();
+             mopItr != mopEnd; ++mopItr) {
+          if (mopItr->isRegMask()) {
+            regMask = mopItr->getRegMask();
+            break;
+          }
+        }
+        assert(regMask != 0 && "Couldn't find register mask.");
+        regMaskOverlaps.setBitsNotInMask(regMask);
       }
+    }
+
+    for (unsigned preg = 0; preg < tri->getNumRegs(); ++preg) {
+      if (regMaskOverlaps.test(preg))
+        overlappingPRegs.insert(preg);
+    }
+
+    for (RegSet::const_iterator pregItr = overlappingPRegs.begin(),
+                                pregEnd = overlappingPRegs.end();
+         pregItr != pregEnd; ++pregItr) {
+      unsigned preg = *pregItr;
 
       // Remove the register from the allowed set.
       VRAllowed::iterator eraseItr =
@@ -256,7 +291,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
       }
 
       // Also remove any aliases.
-      const unsigned *aliasItr = tri->getAliasSet(preg);
+      const uint16_t *aliasItr = tri->getAliasSet(preg);
       if (aliasItr != 0) {
         for (; *aliasItr != 0; ++aliasItr) {
           VRAllowed::iterator eraseItr =
@@ -270,7 +305,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
     }
 
     // Construct the node.
-    PBQP::Graph::NodeItr node = 
+    PBQP::Graph::NodeItr node =
       g.addNode(PBQP::Vector(vrAllowed.size() + 1, 0));
 
     // Record the mapping and allowed set in the problem.
@@ -371,7 +406,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
 
       const float copyFactor = 0.5; // Cost of copy relative to load. Current
       // value plucked randomly out of the air.
-                                      
+
       PBQP::PBQPNum cBenefit =
         copyFactor * LiveIntervals::getSpillWeight(false, true,
                                                    loopInfo->getLoopDepth(mbb));
@@ -382,7 +417,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
         }
 
         const PBQPRAProblem::AllowedSet &allowed = p->getAllowedSet(src);
-        unsigned pregOpt = 0;  
+        unsigned pregOpt = 0;
         while (pregOpt < allowed.size() && allowed[pregOpt] != dst) {
           ++pregOpt;
         }
@@ -407,7 +442,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
             std::swap(allowed1, allowed2);
           }
         }
-            
+
         addVirtRegCoalesce(g.getEdgeCosts(edge), *allowed1, *allowed2,
                            cBenefit);
       }
@@ -439,27 +474,29 @@ void PBQPBuilderWithCoalescing::addVirtRegCoalesce(
 
       if (preg1 == preg2) {
         costMat[i + 1][j + 1] += -benefit;
-      } 
+      }
     }
   }
 }
 
 
 void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
+  au.setPreservesCFG();
+  au.addRequired<AliasAnalysis>();
+  au.addPreserved<AliasAnalysis>();
   au.addRequired<SlotIndexes>();
   au.addPreserved<SlotIndexes>();
   au.addRequired<LiveIntervals>();
   //au.addRequiredID(SplitCriticalEdgesID);
-  au.addRequiredID(RegisterCoalescerPassID);
   if (customPassID)
     au.addRequiredID(*customPassID);
   au.addRequired<CalculateSpillWeights>();
   au.addRequired<LiveStacks>();
   au.addPreserved<LiveStacks>();
+  au.addRequired<MachineDominatorTree>();
+  au.addPreserved<MachineDominatorTree>();
   au.addRequired<MachineLoopInfo>();
   au.addPreserved<MachineLoopInfo>();
-  if (pbqpPreSplitting)
-    au.addRequired<LoopSplitter>();
   au.addRequired<VirtRegMap>();
   au.addRequired<RenderMachineFunction>();
   MachineFunctionPass::getAnalysisUsage(au);
@@ -488,29 +525,6 @@ void RegAllocPBQP::findVRegIntervalsToAlloc() {
   }
 }
 
-void RegAllocPBQP::addStackInterval(const LiveInterval *spilled,
-                                    MachineRegisterInfo* mri) {
-  int stackSlot = vrm->getStackSlot(spilled->reg);
-
-  if (stackSlot == VirtRegMap::NO_STACK_SLOT) {
-    return;
-  }
-
-  const TargetRegisterClass *RC = mri->getRegClass(spilled->reg);
-  LiveInterval &stackInterval = lss->getOrCreateInterval(stackSlot, RC);
-
-  VNInfo *vni;
-  if (stackInterval.getNumValNums() != 0) {
-    vni = stackInterval.getValNumInfo(0);
-  } else {
-    vni = stackInterval.getNextValue(
-      SlotIndex(), 0, lss->getVNInfoAllocator());
-  }
-
-  LiveInterval &rhsInterval = lis->getInterval(spilled->reg);
-  stackInterval.MergeRangesInAsValue(rhsInterval, vni);
-}
-
 bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem,
                                      const PBQP::Solution &solution) {
   // Set to true if we have any spills
@@ -529,28 +543,22 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem,
     unsigned alloc = solution.getSelection(node);
 
     if (problem.isPRegOption(vreg, alloc)) {
-      unsigned preg = problem.getPRegForOption(vreg, alloc);    
+      unsigned preg = problem.getPRegForOption(vreg, alloc);
       DEBUG(dbgs() << "VREG " << vreg << " -> " << tri->getName(preg) << "\n");
       assert(preg != 0 && "Invalid preg selected.");
-      vrm->assignVirt2Phys(vreg, preg);      
+      vrm->assignVirt2Phys(vreg, preg);
     } else if (problem.isSpillOption(vreg, alloc)) {
       vregsToAlloc.erase(vreg);
-      const LiveInterval* spillInterval = &lis->getInterval(vreg);
-      double oldWeight = spillInterval->weight;
-      rmf->rememberUseDefs(spillInterval);
-      std::vector<LiveInterval*> newSpills =
-        lis->addIntervalsForSpills(*spillInterval, 0, loopInfo, *vrm);
-      addStackInterval(spillInterval, mri);
-      rmf->rememberSpills(spillInterval, newSpills);
-
-      (void) oldWeight;
+      SmallVector<LiveInterval*, 8> newSpills;
+      LiveRangeEdit LRE(lis->getInterval(vreg), newSpills, *mf, *lis, vrm);
+      spiller->spill(LRE);
+
       DEBUG(dbgs() << "VREG " << vreg << " -> SPILLED (Cost: "
-                   << oldWeight << ", New vregs: ");
+                   << LRE.getParent().weight << ", New vregs: ");
 
       // Copy any newly inserted live intervals into the list of regs to
       // allocate.
-      for (std::vector<LiveInterval*>::const_iterator
-           itr = newSpills.begin(), end = newSpills.end();
+      for (LiveRangeEdit::iterator itr = LRE.begin(), end = LRE.end();
            itr != end; ++itr) {
         assert(!(*itr)->empty() && "Empty spill range.");
         DEBUG(dbgs() << (*itr)->reg << " ");
@@ -560,9 +568,9 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem,
       DEBUG(dbgs() << ")\n");
 
       // We need another round if spill intervals were added.
-      anotherRoundNeeded |= !newSpills.empty();
+      anotherRoundNeeded |= !LRE.empty();
     } else {
-      assert(false && "Unknown allocation option.");
+      llvm_unreachable("Unknown allocation option.");
     }
   }
 
@@ -642,7 +650,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
   tm = &mf->getTarget();
   tri = tm->getRegisterInfo();
   tii = tm->getInstrInfo();
-  mri = &mf->getRegInfo(); 
+  mri = &mf->getRegInfo();
 
   lis = &getAnalysis<LiveIntervals>();
   lss = &getAnalysis<LiveStacks>();
@@ -650,7 +658,9 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
   rmf = &getAnalysis<RenderMachineFunction>();
 
   vrm = &getAnalysis<VirtRegMap>();
+  spiller.reset(createInlineSpiller(*this, MF, *vrm));
 
+  mri->freezeReservedRegs(MF);
 
   DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getFunction()->getName() << "\n");
 
@@ -666,6 +676,12 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
   // Find the vreg intervals in need of allocation.
   findVRegIntervalsToAlloc();
 
+  const Function* func = mf->getFunction();
+  std::string fqn =
+    func->getParent()->getModuleIdentifier() + "." +
+    func->getName().str();
+  (void)fqn;
+
   // If there are non-empty intervals allocate them using pbqp.
   if (!vregsToAlloc.empty()) {
 
@@ -677,6 +693,20 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
 
       std::auto_ptr<PBQPRAProblem> problem =
         builder->build(mf, lis, loopInfo, vregsToAlloc);
+
+#ifndef NDEBUG
+      if (pbqpDumpGraphs) {
+        std::ostringstream rs;
+        rs << round;
+        std::string graphFileName(fqn + "." + rs.str() + ".pbqpgraph");
+        std::string tmp;
+        raw_fd_ostream os(graphFileName.c_str(), tmp);
+        DEBUG(dbgs() << "Dumping graph for round " << round << " to \""
+              << graphFileName << "\"\n");
+        problem->getGraph().dump(os);
+      }
+#endif
+
       PBQP::Solution solution =
         PBQP::HeuristicSolver<PBQP::Heuristics::Briggs>::solve(
           problem->getGraph());
@@ -698,9 +728,12 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
   DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n");
 
   // Run rewriter
-  std::auto_ptr<VirtRegRewriter> rewriter(createVirtRegRewriter());
+  vrm->rewrite(lis->getSlotIndexes());
 
-  rewriter->runOnMachineFunction(*mf, *vrm, lis);
+  // All machine operands and other references to virtual registers have been
+  // replaced. Remove the virtual registers.
+  vrm->clearAllVirt();
+  mri->clearVirtRegs();
 
   return true;
 }
diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp
index 786d279c2b8c..17165fa72665 100644
--- a/lib/CodeGen/RegisterClassInfo.cpp
+++ b/lib/CodeGen/RegisterClassInfo.cpp
@@ -18,12 +18,16 @@
 #include "RegisterClassInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/Target/TargetMachine.h"
-
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
+static cl::opt<unsigned>
+StressRA("stress-regalloc", cl::Hidden, cl::init(0), cl::value_desc("N"),
+         cl::desc("Limit all regclasses to N registers"));
+
 RegisterClassInfo::RegisterClassInfo() : Tag(0), MF(0), TRI(0), CalleeSaved(0)
 {}
 
@@ -39,14 +43,14 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
   }
 
   // Does this MF have different CSRs?
-  const unsigned *CSR = TRI->getCalleeSavedRegs(MF);
+  const uint16_t *CSR = TRI->getCalleeSavedRegs(MF);
   if (Update || CSR != CalleeSaved) {
     // Build a CSRNum map. Every CSR alias gets an entry pointing to the last
     // overlapping CSR.
     CSRNum.clear();
     CSRNum.resize(TRI->getNumRegs(), 0);
     for (unsigned N = 0; unsigned Reg = CSR[N]; ++N)
-      for (const unsigned *AS = TRI->getOverlaps(Reg);
+      for (const uint16_t *AS = TRI->getOverlaps(Reg);
            unsigned Alias = *AS; ++AS)
         CSRNum[Alias] = N + 1; // 0 means no CSR, 1 means CalleeSaved[0], ...
     Update = true;
@@ -81,7 +85,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
 
   // FIXME: Once targets reserve registers instead of removing them from the
   // allocation order, we can simply use begin/end here.
-  ArrayRef<unsigned> RawOrder = RC->getRawAllocationOrder(*MF);
+  ArrayRef<uint16_t> RawOrder = RC->getRawAllocationOrder(*MF);
   for (unsigned i = 0; i != RawOrder.size(); ++i) {
     unsigned PhysReg = RawOrder[i];
     // Remove reserved registers from the allocation order.
@@ -99,6 +103,10 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
   // CSR aliases go after the volatile registers, preserve the target's order.
   std::copy(CSRAlias.begin(), CSRAlias.end(), &RCI.Order[N]);
 
+  // Register allocator stress test.  Clip register class to N registers.
+  if (StressRA && RCI.NumRegs > StressRA)
+    RCI.NumRegs = StressRA;
+
   // Check if RC is a proper sub-class.
   if (const TargetRegisterClass *Super = TRI->getLargestLegalSuperClass(RC))
     if (Super != RC && getNumAllocatableRegs(Super) > RCI.NumRegs)
diff --git a/lib/CodeGen/RegisterClassInfo.h b/lib/CodeGen/RegisterClassInfo.h
index 2c1407096cd7..400e1f48ce54 100644
--- a/lib/CodeGen/RegisterClassInfo.h
+++ b/lib/CodeGen/RegisterClassInfo.h
@@ -49,7 +49,7 @@ class RegisterClassInfo {
 
   // Callee saved registers of last MF. Assumed to be valid until the next
   // runOnFunction() call.
-  const unsigned *CalleeSaved;
+  const uint16_t *CalleeSaved;
 
   // Map register number to CalleeSaved index + 1;
   SmallVector<uint8_t, 4> CSRNum;
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 9b414d6212c7..75f88cafdf01 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -13,7 +13,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "regcoalescing"
+#define DEBUG_TYPE "regalloc"
 #include "RegisterCoalescer.h"
 #include "LiveDebugVariables.h"
 #include "RegisterClassInfo.h"
@@ -169,10 +169,6 @@ namespace {
     /// it as well.
     bool RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI);
 
-    /// RemoveCopyFlag - If DstReg is no longer defined by CopyMI, clear the
-    /// VNInfo copy flag for DstReg and all aliases.
-    void RemoveCopyFlag(unsigned DstReg, const MachineInstr *CopyMI);
-
     /// markAsJoined - Remember that CopyMI has already been joined.
     void markAsJoined(MachineInstr *CopyMI);
 
@@ -197,7 +193,7 @@ namespace {
   };
 } /// end anonymous namespace
 
-char &llvm::RegisterCoalescerPassID = RegisterCoalescer::ID;
+char &llvm::RegisterCoalescerID = RegisterCoalescer::ID;
 
 INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing",
                       "Simple Register Coalescing", false, false)
@@ -205,9 +201,6 @@ INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
 INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination)
-INITIALIZE_PASS_DEPENDENCY(PHIElimination)
-INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
                     "Simple Register Coalescing", false, false)
@@ -379,9 +372,6 @@ void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<MachineLoopInfo>();
   AU.addPreserved<MachineLoopInfo>();
   AU.addPreservedID(MachineDominatorsID);
-  AU.addPreservedID(StrongPHIEliminationID);
-  AU.addPreservedID(PHIEliminationID);
-  AU.addPreservedID(TwoAddressInstructionPassID);
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
@@ -423,7 +413,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
     LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
   LiveInterval &IntB =
     LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
-  SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getDefIndex();
+  SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot();
 
   // BValNo is a value number in B that is defined by a copy from A.  'B3' in
   // the example above.
@@ -434,40 +424,19 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
   // Get the location that B is defined at.  Two options: either this value has
   // an unknown definition point or it is defined at CopyIdx.  If unknown, we
   // can't process it.
-  if (!BValNo->isDefByCopy()) return false;
-  assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
+  if (BValNo->def != CopyIdx) return false;
 
   // AValNo is the value number in A that defines the copy, A3 in the example.
-  SlotIndex CopyUseIdx = CopyIdx.getUseIndex();
+  SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true);
   LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx);
   // The live range might not exist after fun with physreg coalescing.
   if (ALR == IntA.end()) return false;
   VNInfo *AValNo = ALR->valno;
-  // If it's re-defined by an early clobber somewhere in the live range, then
-  // it's not safe to eliminate the copy. FIXME: This is a temporary workaround.
-  // See PR3149:
-  // 172     %ECX<def> = MOV32rr %reg1039<kill>
-  // 180     INLINEASM <es:subl $5,$1
-  //         sbbl $3,$0>, 10, %EAX<def>, 14, %ECX<earlyclobber,def>, 9,
-  //         %EAX<kill>,
-  // 36, <fi#0>, 1, %reg0, 0, 9, %ECX<kill>, 36, <fi#1>, 1, %reg0, 0
-  // 188     %EAX<def> = MOV32rr %EAX<kill>
-  // 196     %ECX<def> = MOV32rr %ECX<kill>
-  // 204     %ECX<def> = MOV32rr %ECX<kill>
-  // 212     %EAX<def> = MOV32rr %EAX<kill>
-  // 220     %EAX<def> = MOV32rr %EAX
-  // 228     %reg1039<def> = MOV32rr %ECX<kill>
-  // The early clobber operand ties ECX input to the ECX def.
-  //
-  // The live interval of ECX is represented as this:
-  // %reg20,inf = [46,47:1)[174,230:0)  0@174-(230) 1@46-(47)
-  // The coalescer has no idea there was a def in the middle of [174,230].
-  if (AValNo->hasRedefByEC())
-    return false;
 
   // If AValNo is defined as a copy from IntB, we can potentially process this.
   // Get the instruction that defines this value number.
-  if (!CP.isCoalescable(AValNo->getCopy()))
+  MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def);
+  if (!CP.isCoalescable(ACopyMI))
     return false;
 
   // Get the LiveRange in IntB that this value number starts with.
@@ -492,7 +461,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
   // of its aliases is overlapping the live interval of the virtual register.
   // If so, do not coalesce.
   if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
-    for (const unsigned *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS)
+    for (const uint16_t *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS)
       if (LIS->hasInterval(*AS) && IntA.overlaps(LIS->getInterval(*AS))) {
         DEBUG({
             dbgs() << "\t\tInterfere with alias ";
@@ -511,8 +480,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
   // We are about to delete CopyMI, so need to remove it as the 'instruction
   // that defines this value #'. Update the valnum with the new defining
   // instruction #.
-  BValNo->def  = FillerStart;
-  BValNo->setCopy(0);
+  BValNo->def = FillerStart;
 
   // Okay, we can merge them.  We need to insert a new liverange:
   // [ValLR.end, BLR.begin) of either value number, then we merge the
@@ -522,12 +490,12 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
   // If the IntB live range is assigned to a physical register, and if that
   // physreg has sub-registers, update their live intervals as well.
   if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
-    for (const unsigned *SR = TRI->getSubRegisters(IntB.reg); *SR; ++SR) {
+    for (const uint16_t *SR = TRI->getSubRegisters(IntB.reg); *SR; ++SR) {
       if (!LIS->hasInterval(*SR))
         continue;
       LiveInterval &SRLI = LIS->getInterval(*SR);
       SRLI.addRange(LiveRange(FillerStart, FillerEnd,
-                              SRLI.getNextValue(FillerStart, 0,
+                              SRLI.getNextValue(FillerStart,
                                                 LIS->getVNInfoAllocator())));
     }
   }
@@ -554,9 +522,11 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
     ValLREndInst->getOperand(UIdx).setIsKill(false);
   }
 
-  // If the copy instruction was killing the destination register before the
-  // merge, find the last use and trim the live range. That will also add the
-  // isKill marker.
+  // Rewrite the copy. If the copy instruction was killing the destination
+  // register before the merge, find the last use and trim the live range. That
+  // will also add the isKill marker.
+  CopyMI->substituteRegister(IntA.reg, IntB.reg, CP.getSubIdx(),
+                             *TRI);
   if (ALR->end == CopyIdx)
     LIS->shrinkToUses(&IntA);
 
@@ -625,7 +595,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
   if (!LIS->hasInterval(CP.getDstReg()))
     return false;
 
-  SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getDefIndex();
+  SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot();
 
   LiveInterval &IntA =
     LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
@@ -635,13 +605,13 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
   // BValNo is a value number in B that is defined by a copy from A. 'B3' in
   // the example above.
   VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
-  if (!BValNo || !BValNo->isDefByCopy())
+  if (!BValNo || BValNo->def != CopyIdx)
     return false;
 
   assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
 
   // AValNo is the value number in A that defines the copy, A3 in the example.
-  VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getUseIndex());
+  VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true));
   assert(AValNo && "COPY source not live");
 
   // If other defs can reach uses of this def, then it's not safe to perform
@@ -651,8 +621,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
   MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def);
   if (!DefMI)
     return false;
-  const MCInstrDesc &MCID = DefMI->getDesc();
-  if (!MCID.isCommutable())
+  if (!DefMI->isCommutable())
     return false;
   // If DefMI is a two-address instruction then commuting it will change the
   // destination register.
@@ -684,7 +653,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
   // Abort if the aliases of IntB.reg have values that are not simply the
   // clobbers from the superreg.
   if (TargetRegisterInfo::isPhysicalRegister(IntB.reg))
-    for (const unsigned *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS)
+    for (const uint16_t *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS)
       if (LIS->hasInterval(*AS) &&
           HasOtherReachingDefs(IntA, LIS->getInterval(*AS), AValNo, 0))
         return false;
@@ -718,7 +687,8 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
     return false;
   if (NewMI != DefMI) {
     LIS->ReplaceMachineInstrInMaps(DefMI, NewMI);
-    MBB->insert(DefMI, NewMI);
+    MachineBasicBlock::iterator Pos = DefMI;
+    MBB->insert(Pos, NewMI);
     MBB->erase(DefMI);
   }
   unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false);
@@ -747,7 +717,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
       UseMO.setReg(NewReg);
       continue;
     }
-    SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getUseIndex();
+    SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getRegSlot(true);
     LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
     if (ULR == IntA.end() || ULR->valno != AValNo)
       continue;
@@ -765,7 +735,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
 
     // This copy will become a noop. If it's defining a new val#, merge it into
     // BValNo.
-    SlotIndex DefIdx = UseIdx.getDefIndex();
+    SlotIndex DefIdx = UseIdx.getRegSlot();
     VNInfo *DVNI = IntB.getVNInfoAt(DefIdx);
     if (!DVNI)
       continue;
@@ -779,7 +749,6 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
   // is updated.
   VNInfo *ValNo = BValNo;
   ValNo->def = AValNo->def;
-  ValNo->setCopy(0);
   for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
        AI != AE; ++AI) {
     if (AI->valno != AValNo) continue;
@@ -799,7 +768,7 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt,
                                                        bool preserveSrcInt,
                                                        unsigned DstReg,
                                                        MachineInstr *CopyMI) {
-  SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getUseIndex();
+  SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true);
   LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx);
   assert(SrcLR != SrcInt.end() && "Live range not found!");
   VNInfo *ValNo = SrcLR->valno;
@@ -809,14 +778,14 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt,
   if (!DefMI)
     return false;
   assert(DefMI && "Defining instruction disappeared");
-  const MCInstrDesc &MCID = DefMI->getDesc();
-  if (!MCID.isAsCheapAsAMove())
+  if (!DefMI->isAsCheapAsAMove())
     return false;
   if (!TII->isTriviallyReMaterializable(DefMI, AA))
     return false;
   bool SawStore = false;
   if (!DefMI->isSafeToMove(TII, AA, SawStore))
     return false;
+  const MCInstrDesc &MCID = DefMI->getDesc();
   if (MCID.getNumDefs() != 1)
     return false;
   if (!DefMI->isImplicitDef()) {
@@ -831,27 +800,52 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt,
       return false;
   }
 
-  RemoveCopyFlag(DstReg, CopyMI);
-
   MachineBasicBlock *MBB = CopyMI->getParent();
   MachineBasicBlock::iterator MII =
     llvm::next(MachineBasicBlock::iterator(CopyMI));
   TII->reMaterialize(*MBB, MII, DstReg, 0, DefMI, *TRI);
   MachineInstr *NewMI = prior(MII);
 
+  // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86).
+  // We need to remember these so we can add intervals once we insert
+  // NewMI into SlotIndexes.
+  SmallVector<unsigned, 4> NewMIImplDefs;
+  for (unsigned i = NewMI->getDesc().getNumOperands(),
+         e = NewMI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = NewMI->getOperand(i);
+    if (MO.isReg()) {
+      assert(MO.isDef() && MO.isImplicit() && MO.isDead() &&
+             TargetRegisterInfo::isPhysicalRegister(MO.getReg()));
+      NewMIImplDefs.push_back(MO.getReg());
+    }
+  }
+
   // CopyMI may have implicit operands, transfer them over to the newly
   // rematerialized instruction. And update implicit def interval valnos.
   for (unsigned i = CopyMI->getDesc().getNumOperands(),
          e = CopyMI->getNumOperands(); i != e; ++i) {
     MachineOperand &MO = CopyMI->getOperand(i);
-    if (MO.isReg() && MO.isImplicit())
-      NewMI->addOperand(MO);
-    if (MO.isDef())
-      RemoveCopyFlag(MO.getReg(), CopyMI);
+    if (MO.isReg()) {
+      assert(MO.isImplicit() && "No explicit operands after implict operands.");
+      // Discard VReg implicit defs.
+      if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+        NewMI->addOperand(MO);
+      }
+    }
   }
 
-  NewMI->copyImplicitOps(CopyMI);
   LIS->ReplaceMachineInstrInMaps(CopyMI, NewMI);
+
+  SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
+  for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) {
+    unsigned reg = NewMIImplDefs[i];
+    LiveInterval &li = LIS->getInterval(reg);
+    VNInfo *DeadDefVN = li.getNextValue(NewMIIdx.getRegSlot(),
+                                        LIS->getVNInfoAllocator());
+    LiveRange lr(NewMIIdx.getRegSlot(), NewMIIdx.getDeadSlot(), DeadDefVN);
+    li.addRange(lr);
+  }
+
   CopyMI->eraseFromParent();
   ReMatCopies.insert(CopyMI);
   ReMatDefs.insert(DefMI);
@@ -887,7 +881,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI,
     DstInt = SrcInt;
   SrcInt = 0;
 
-  VNInfo *DeadVNI = DstInt->getVNInfoAt(Idx.getDefIndex());
+  VNInfo *DeadVNI = DstInt->getVNInfoAt(Idx.getRegSlot());
   assert(DeadVNI && "No value defined in DstInt");
   DstInt->removeValNo(DeadVNI);
 
@@ -941,13 +935,10 @@ RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) {
     SmallVector<unsigned,8> Ops;
     bool Reads, Writes;
     tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
-    bool Kills = false, Deads = false;
 
     // Replace SrcReg with DstReg in all UseMI operands.
     for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
       MachineOperand &MO = UseMI->getOperand(Ops[i]);
-      Kills |= MO.isKill();
-      Deads |= MO.isDead();
 
       // Make sure we don't create read-modify-write defs accidentally.  We
       // assume here that a SrcReg def cannot be joined into a live DstReg.  If
@@ -967,19 +958,6 @@ RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) {
     if (JoinedCopies.count(UseMI))
       continue;
 
-    if (SubIdx) {
-      // If UseMI was a simple SrcReg def, make sure we didn't turn it into a
-      // read-modify-write of DstReg.
-      if (Deads)
-        UseMI->addRegisterDead(DstReg, TRI);
-      else if (!Reads && Writes)
-        UseMI->addRegisterDefined(DstReg, TRI);
-
-      // Kill flags apply to the whole physical register.
-      if (DstIsPhys && Kills)
-        UseMI->addRegisterKilled(DstReg, TRI);
-    }
-
     DEBUG({
         dbgs() << "\t\tupdated: ";
         if (!UseMI->isDebugValue())
@@ -996,7 +974,7 @@ static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *LIS,
                                   const TargetRegisterInfo *TRI) {
   if (li.empty()) {
     if (TargetRegisterInfo::isPhysicalRegister(li.reg))
-      for (const unsigned* SR = TRI->getSubRegisters(li.reg); *SR; ++SR) {
+      for (const uint16_t* SR = TRI->getSubRegisters(li.reg); *SR; ++SR) {
         if (!LIS->hasInterval(*SR))
           continue;
         LiveInterval &sli = LIS->getInterval(*SR);
@@ -1013,7 +991,7 @@ static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *LIS,
 /// the val# it defines. If the live interval becomes empty, remove it as well.
 bool RegisterCoalescer::RemoveDeadDef(LiveInterval &li,
                                              MachineInstr *DefMI) {
-  SlotIndex DefIdx = LIS->getInstructionIndex(DefMI).getDefIndex();
+  SlotIndex DefIdx = LIS->getInstructionIndex(DefMI).getRegSlot();
   LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx);
   if (DefIdx != MLR->valno->def)
     return false;
@@ -1021,27 +999,6 @@ bool RegisterCoalescer::RemoveDeadDef(LiveInterval &li,
   return removeIntervalIfEmpty(li, LIS, TRI);
 }
 
-void RegisterCoalescer::RemoveCopyFlag(unsigned DstReg,
-                                              const MachineInstr *CopyMI) {
-  SlotIndex DefIdx = LIS->getInstructionIndex(CopyMI).getDefIndex();
-  if (LIS->hasInterval(DstReg)) {
-    LiveInterval &LI = LIS->getInterval(DstReg);
-    if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
-      if (LR->valno->def == DefIdx)
-        LR->valno->setCopy(0);
-  }
-  if (!TargetRegisterInfo::isPhysicalRegister(DstReg))
-    return;
-  for (const unsigned* AS = TRI->getAliasSet(DstReg); *AS; ++AS) {
-    if (!LIS->hasInterval(*AS))
-      continue;
-    LiveInterval &LI = LIS->getInterval(*AS);
-    if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
-      if (LR->valno->def == DefIdx)
-        LR->valno->setCopy(0);
-  }
-}
-
 /// shouldJoinPhys - Return true if a copy involving a physreg should be joined.
 /// We need to be careful about coalescing a source physical register with a
 /// virtual register. Once the coalescing is done, it cannot be broken and these
@@ -1279,7 +1236,7 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) {
     }
   }
 
-  // SrcReg is guarateed to be the register whose live interval that is
+  // SrcReg is guaranteed to be the register whose live interval that is
   // being merged.
   LIS->removeInterval(CP.getSrcReg());
 
@@ -1368,9 +1325,9 @@ static bool RegistersDefinedFromSameValue(LiveIntervals &li,
   // FIXME: This is very conservative. For example, we don't handle
   // physical registers.
 
-  MachineInstr *MI = VNI->getCopy();
+  MachineInstr *MI = li.getInstructionFromIndex(VNI->def);
 
-  if (!MI->isFullCopy() || CP.isPartial() || CP.isPhys())
+  if (!MI || !MI->isFullCopy() || CP.isPartial() || CP.isPhys())
     return false;
 
   unsigned Dst = MI->getOperand(0).getReg();
@@ -1388,11 +1345,9 @@ static bool RegistersDefinedFromSameValue(LiveIntervals &li,
   assert(Dst == A);
 
   VNInfo *Other = LR->valno;
-  if (!Other->isDefByCopy())
-    return false;
-  const MachineInstr *OtherMI = Other->getCopy();
+  const MachineInstr *OtherMI = li.getInstructionFromIndex(Other->def);
 
-  if (!OtherMI->isFullCopy())
+  if (!OtherMI || !OtherMI->isFullCopy())
     return false;
 
   unsigned OtherDst = OtherMI->getOperand(0).getReg();
@@ -1431,7 +1386,44 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
   // than the full interfeence check below. We allow overlapping live ranges
   // only when one is a copy of the other.
   if (CP.isPhys()) {
-    for (const unsigned *AS = TRI->getAliasSet(CP.getDstReg()); *AS; ++AS){
+    // Optimization for reserved registers like ESP.
+    // We can only merge with a reserved physreg if RHS has a single value that
+    // is a copy of CP.DstReg().  The live range of the reserved register will
+    // look like a set of dead defs - we don't properly track the live range of
+    // reserved registers.
+    if (RegClassInfo.isReserved(CP.getDstReg())) {
+      assert(CP.isFlipped() && RHS.containsOneValue() &&
+             "Invalid join with reserved register");
+      // Deny any overlapping intervals.  This depends on all the reserved
+      // register live ranges to look like dead defs.
+      for (const uint16_t *AS = TRI->getOverlaps(CP.getDstReg()); *AS; ++AS) {
+        if (!LIS->hasInterval(*AS)) {
+          // Make sure at least DstReg itself exists before attempting a join.
+          if (*AS == CP.getDstReg())
+            LIS->getOrCreateInterval(CP.getDstReg());
+          continue;
+        }
+        if (RHS.overlaps(LIS->getInterval(*AS))) {
+          DEBUG(dbgs() << "\t\tInterference: " << PrintReg(*AS, TRI) << '\n');
+          return false;
+        }
+      }
+      // Skip any value computations, we are not adding new values to the
+      // reserved register.  Also skip merging the live ranges, the reserved
+      // register live range doesn't need to be accurate as long as all the
+      // defs are there.
+      return true;
+    }
+
+    // Check if a register mask clobbers DstReg.
+    BitVector UsableRegs;
+    if (LIS->checkRegMaskInterference(RHS, UsableRegs) &&
+        !UsableRegs.test(CP.getDstReg())) {
+      DEBUG(dbgs() << "\t\tRegister mask interference.\n");
+      return false;
+    }
+
+    for (const uint16_t *AS = TRI->getAliasSet(CP.getDstReg()); *AS; ++AS){
       if (!LIS->hasInterval(*AS))
         continue;
       const LiveInterval &LHS = LIS->getInterval(*AS);
@@ -1485,12 +1477,12 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
   for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
        i != e; ++i) {
     VNInfo *VNI = *i;
-    if (VNI->isUnused() || !VNI->isDefByCopy())  // Src not defined by a copy?
+    if (VNI->isUnused() || VNI->isPHIDef())
+      continue;
+    MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def);
+    assert(MI && "Missing def");
+    if (!MI->isCopyLike())  // Src not defined by a copy?
       continue;
-
-    // Never join with a register that has EarlyClobber redefs.
-    if (VNI->hasRedefByEC())
-      return false;
 
     // Figure out the value # from the RHS.
     LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot());
@@ -1499,7 +1491,6 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
 
     // DstReg is known to be a register in the LHS interval.  If the src is
     // from the RHS interval, we can use its value #.
-    MachineInstr *MI = VNI->getCopy();
     if (!CP.isCoalescable(MI) &&
         !RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, lr, DupCopies))
       continue;
@@ -1512,12 +1503,12 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
   for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
        i != e; ++i) {
     VNInfo *VNI = *i;
-    if (VNI->isUnused() || !VNI->isDefByCopy())  // Src not defined by a copy?
+    if (VNI->isUnused() || VNI->isPHIDef())
+      continue;
+    MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def);
+    assert(MI && "Missing def");
+    if (!MI->isCopyLike())  // Src not defined by a copy?
       continue;
-
-    // Never join with a register that has EarlyClobber redefs.
-    if (VNI->hasRedefByEC())
-      return false;
 
     // Figure out the value # from the LHS.
     LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot());
@@ -1526,7 +1517,6 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
 
     // DstReg is known to be a register in the RHS interval.  If the src is
     // from the LHS interval, we can use its value #.
-    MachineInstr *MI = VNI->getCopy();
     if (!CP.isCoalescable(MI) &&
         !RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, lr, DupCopies))
         continue;
@@ -1600,10 +1590,6 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
       if (LHSValNoAssignments[I->valno->id] !=
           RHSValNoAssignments[J->valno->id])
         return false;
-      // If it's re-defined by an early clobber somewhere in the live range,
-      // then conservatively abort coalescing.
-      if (NewVNInfo[LHSValNoAssignments[I->valno->id]]->hasRedefByEC())
-        return false;
     }
 
     if (I->end < J->end)
@@ -1905,8 +1891,8 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
           unsigned Reg = MO.getReg();
           if (!Reg)
             continue;
+          DeadDefs.push_back(Reg);
           if (TargetRegisterInfo::isVirtualRegister(Reg)) {
-            DeadDefs.push_back(Reg);
             // Remat may also enable register class inflation.
             if (RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)))
               InflateRegs.push_back(Reg);
@@ -1936,7 +1922,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
 
       // Check for now unnecessary kill flags.
       if (LIS->isNotInMIMap(MI)) continue;
-      SlotIndex DefIdx = LIS->getInstructionIndex(MI).getDefIndex();
+      SlotIndex DefIdx = LIS->getInstructionIndex(MI).getRegSlot();
       for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
         MachineOperand &MO = MI->getOperand(i);
         if (!MO.isReg() || !MO.isKill()) continue;
@@ -1950,7 +1936,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
         // remain alive.
         if (!TargetRegisterInfo::isPhysicalRegister(reg))
           continue;
-        for (const unsigned *SR = TRI->getSubRegisters(reg);
+        for (const uint16_t *SR = TRI->getSubRegisters(reg);
              unsigned S = *SR; ++SR)
           if (LIS->hasInterval(S) && LIS->getInterval(S).liveAt(DefIdx))
             MI->addRegisterDefined(S, TRI);
diff --git a/lib/CodeGen/RegisterCoalescer.h b/lib/CodeGen/RegisterCoalescer.h
index 472c48377fef..310b933cab9b 100644
--- a/lib/CodeGen/RegisterCoalescer.h
+++ b/lib/CodeGen/RegisterCoalescer.h
@@ -1,4 +1,4 @@
-//===-- RegisterCoalescer.h - Register Coalescing Interface ------*- C++ -*-===//
+//===-- RegisterCoalescer.h - Register Coalescing Interface -----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains the abstract interface for register coalescers, 
+// This file contains the abstract interface for register coalescers,
 // allowing them to interact with and query register allocators.
 //
 //===----------------------------------------------------------------------===//
@@ -47,7 +47,7 @@ namespace llvm {
     /// CrossClass - True when both regs are virtual, and newRC is constrained.
     bool CrossClass;
 
-    /// Flipped - True when DstReg and SrcReg are reversed from the oriignal
+    /// Flipped - True when DstReg and SrcReg are reversed from the original
     /// copy instruction.
     bool Flipped;
 
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index ca02aa1b8143..03bd82e225dc 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -37,7 +37,7 @@ using namespace llvm;
 void RegScavenger::setUsed(unsigned Reg) {
   RegsAvailable.reset(Reg);
 
-  for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+  for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
        unsigned SubReg = *SubRegs; ++SubRegs)
     RegsAvailable.reset(SubReg);
 }
@@ -45,7 +45,7 @@ void RegScavenger::setUsed(unsigned Reg) {
 bool RegScavenger::isAliasUsed(unsigned Reg) const {
   if (isUsed(Reg))
     return true;
-  for (const unsigned *R = TRI->getAliasSet(Reg); *R; ++R)
+  for (const uint16_t *R = TRI->getAliasSet(Reg); *R; ++R)
     if (isUsed(*R))
       return true;
   return false;
@@ -59,9 +59,6 @@ void RegScavenger::initRegState() {
   // All registers started out unused.
   RegsAvailable.set();
 
-  // Reserved registers are always used.
-  RegsAvailable ^= ReservedRegs;
-
   if (!MBB)
     return;
 
@@ -86,17 +83,24 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
   assert((NumPhysRegs == 0 || NumPhysRegs == TRI->getNumRegs()) &&
          "Target changed?");
 
+  // It is not possible to use the register scavenger after late optimization
+  // passes that don't preserve accurate liveness information.
+  assert(MRI->tracksLiveness() &&
+         "Cannot use register scavenger with inaccurate liveness");
+
   // Self-initialize.
   if (!MBB) {
     NumPhysRegs = TRI->getNumRegs();
     RegsAvailable.resize(NumPhysRegs);
+    KillRegs.resize(NumPhysRegs);
+    DefRegs.resize(NumPhysRegs);
 
     // Create reserved registers bitvector.
     ReservedRegs = TRI->getReservedRegs(MF);
 
     // Create callee-saved registers bitvector.
     CalleeSavedRegs.resize(NumPhysRegs);
-    const unsigned *CSRegs = TRI->getCalleeSavedRegs();
+    const uint16_t *CSRegs = TRI->getCalleeSavedRegs(&MF);
     if (CSRegs != NULL)
       for (unsigned i = 0; CSRegs[i]; ++i)
         CalleeSavedRegs.set(CSRegs[i]);
@@ -110,13 +114,7 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
 
 void RegScavenger::addRegWithSubRegs(BitVector &BV, unsigned Reg) {
   BV.set(Reg);
-  for (const unsigned *R = TRI->getSubRegisters(Reg); *R; R++)
-    BV.set(*R);
-}
-
-void RegScavenger::addRegWithAliases(BitVector &BV, unsigned Reg) {
-  BV.set(Reg);
-  for (const unsigned *R = TRI->getAliasSet(Reg); *R; R++)
+  for (const uint16_t *R = TRI->getSubRegisters(Reg); *R; R++)
     BV.set(*R);
 }
 
@@ -148,12 +146,12 @@ void RegScavenger::forward() {
   // predicated, conservatively assume "kill" markers do not actually kill the
   // register. Similarly ignores "dead" markers.
   bool isPred = TII->isPredicated(MI);
-  BitVector EarlyClobberRegs(NumPhysRegs);
-  BitVector KillRegs(NumPhysRegs);
-  BitVector DefRegs(NumPhysRegs);
-  BitVector DeadRegs(NumPhysRegs);
+  KillRegs.reset();
+  DefRegs.reset();
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
+    if (MO.isRegMask())
+      (isPred ? DefRegs : KillRegs).setBitsNotInMask(MO.getRegMask());
     if (!MO.isReg())
       continue;
     unsigned Reg = MO.getReg();
@@ -164,21 +162,19 @@ void RegScavenger::forward() {
       // Ignore undef uses.
       if (MO.isUndef())
         continue;
-      // Two-address operands implicitly kill.
-      if (!isPred && (MO.isKill() || MI->isRegTiedToDefOperand(i)))
+      if (!isPred && MO.isKill())
         addRegWithSubRegs(KillRegs, Reg);
     } else {
       assert(MO.isDef());
       if (!isPred && MO.isDead())
-        addRegWithSubRegs(DeadRegs, Reg);
+        addRegWithSubRegs(KillRegs, Reg);
       else
         addRegWithSubRegs(DefRegs, Reg);
-      if (MO.isEarlyClobber())
-        addRegWithAliases(EarlyClobberRegs, Reg);
     }
   }
 
   // Verify uses and defs.
+#ifndef NDEBUG
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
     if (!MO.isReg())
@@ -199,17 +195,18 @@ void RegScavenger::forward() {
         // Ideally we would like a way to model this, but leaving the
         // insert_subreg around causes both correctness and performance issues.
         bool SubUsed = false;
-        for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+        for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
              unsigned SubReg = *SubRegs; ++SubRegs)
           if (isUsed(SubReg)) {
             SubUsed = true;
             break;
           }
-        assert(SubUsed && "Using an undefined register!");
+        if (!SubUsed) {
+          MBB->getParent()->verify(NULL, "In Register Scavenger");
+          llvm_unreachable("Using an undefined register!");
+        }
         (void)SubUsed;
       }
-      assert((!EarlyClobberRegs.test(Reg) || MI->isRegTiedToDefOperand(i)) &&
-             "Using an early clobbered register!");
     } else {
       assert(MO.isDef());
 #if 0
@@ -221,18 +218,20 @@ void RegScavenger::forward() {
 #endif
     }
   }
+#endif // NDEBUG
 
   // Commit the changes.
   setUnused(KillRegs);
-  setUnused(DeadRegs);
   setUsed(DefRegs);
 }
 
 void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) {
+  used = RegsAvailable;
+  used.flip();
   if (includeReserved)
-    used = ~RegsAvailable;
+    used |= ReservedRegs;
   else
-    used = ~RegsAvailable & ~ReservedRegs;
+    used.reset(ReservedRegs);
 }
 
 unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
@@ -286,6 +285,8 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
     // Remove any candidates touched by instruction.
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
       const MachineOperand &MO = MI->getOperand(i);
+      if (MO.isRegMask())
+        Candidates.clearBitsNotInMask(MO.getRegMask());
       if (!MO.isReg() || MO.isUndef() || !MO.getReg())
         continue;
       if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
@@ -296,7 +297,7 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
         continue;
       }
       Candidates.reset(MO.getReg());
-      for (const unsigned *R = TRI->getAliasSet(MO.getReg()); *R; R++)
+      for (const uint16_t *R = TRI->getAliasSet(MO.getReg()); *R; R++)
         Candidates.reset(*R);
     }
     // If we're not in a virtual reg's live range, this is a valid
@@ -347,9 +348,9 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
   // RegsAvailable, as RegsAvailable does not take aliases into account.
   // That's what getRegsAvailable() is for.
   BitVector Available = getRegsAvailable(RC);
-
-  if ((Candidates & Available).any())
-     Candidates &= Available;
+  Available &= Candidates;
+  if (Available.any())
+    Candidates = Available;
 
   // Find the register whose use is furthest away.
   MachineBasicBlock::iterator UseMI;
diff --git a/lib/CodeGen/RenderMachineFunction.cpp b/lib/CodeGen/RenderMachineFunction.cpp
index 8b02ec44273a..6020908d9112 100644
--- a/lib/CodeGen/RenderMachineFunction.cpp
+++ b/lib/CodeGen/RenderMachineFunction.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/RenderMachineFunction.cpp - MF->HTML -----s-----------===//
+//===-- llvm/CodeGen/RenderMachineFunction.cpp - MF->HTML -----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -560,12 +560,13 @@ namespace llvm {
 
     // For uses/defs recorded use/def indexes override current liveness and
     // instruction operands (Only for the interval which records the indexes).
-    if (i.isUse() || i.isDef()) {
+    // FIXME: This is all wrong, uses and defs share the same slots.
+    if (i.isEarlyClobber() || i.isRegister()) {
       UseDefs::const_iterator udItr = useDefs.find(li);
       if (udItr != useDefs.end()) {
         const SlotSet &slotSet = udItr->second;
         if (slotSet.count(i)) {
-          if (i.isUse()) {
+          if (i.isEarlyClobber()) {
             return Used;
           }
           // else
@@ -586,9 +587,9 @@ namespace llvm {
           return AliveStack;
         }
       } else {
-        if (i.isDef() && mi->definesRegister(li->reg, tri)) {
+        if (i.isRegister() && mi->definesRegister(li->reg, tri)) {
           return Defined;
-        } else if (i.isUse() && mi->readsRegister(li->reg)) {
+        } else if (i.isEarlyClobber() && mi->readsRegister(li->reg)) {
           return Used;
         } else {
           if (vrm == 0 || 
@@ -804,7 +805,7 @@ namespace llvm {
       os << indent + s(2) << "<tr height=6ex>\n";
       
       // Render the code column.
-      if (i.isLoad()) {
+      if (i.isBlock()) {
         MachineBasicBlock *mbb = sis->getMBBFromIndex(i);
         mi = sis->getInstructionFromIndex(i);
 
@@ -823,7 +824,7 @@ namespace llvm {
           }
           os << indent + s(4) << "</td>\n";
         } else {
-          i = i.getStoreIndex(); // <- Will be incremented to the next index.
+          i = i.getDeadSlot(); // <- Will be incremented to the next index.
           continue;
         }
       }
@@ -952,10 +953,10 @@ namespace llvm {
          rItr != rEnd; ++rItr) {
       const MachineInstr *mi = &*rItr;
       if (mi->readsRegister(li->reg)) {
-        useDefs[li].insert(lis->getInstructionIndex(mi).getUseIndex());
+        useDefs[li].insert(lis->getInstructionIndex(mi).getRegSlot(true));
       }
       if (mi->definesRegister(li->reg)) {
-        useDefs[li].insert(lis->getInstructionIndex(mi).getDefIndex());
+        useDefs[li].insert(lis->getInstructionIndex(mi).getRegSlot());
       }
     }
   }
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 1e9b5c89f172..8fd64265fda6 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -31,6 +31,8 @@ static cl::opt<bool> StressSchedOpt(
   cl::desc("Stress test instruction scheduling"));
 #endif
 
+void SchedulingPriorityQueue::anchor() { }
+
 ScheduleDAG::ScheduleDAG(MachineFunction &mf)
   : TM(mf.getTarget()),
     TII(TM.getInstrInfo()),
@@ -44,42 +46,17 @@ ScheduleDAG::ScheduleDAG(MachineFunction &mf)
 
 ScheduleDAG::~ScheduleDAG() {}
 
-/// getInstrDesc helper to handle SDNodes.
-const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
-  if (!Node || !Node->isMachineOpcode()) return NULL;
-  return &TII->get(Node->getMachineOpcode());
-}
-
-/// dump - dump the schedule.
-void ScheduleDAG::dumpSchedule() const {
-  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
-    if (SUnit *SU = Sequence[i])
-      SU->dump(this);
-    else
-      dbgs() << "**** NOOP ****\n";
-  }
-}
-
-
-/// Run - perform scheduling.
-///
-void ScheduleDAG::Run(MachineBasicBlock *bb,
-                      MachineBasicBlock::iterator insertPos) {
-  BB = bb;
-  InsertPos = insertPos;
-
+/// Clear the DAG state (e.g. between scheduling regions).
+void ScheduleDAG::clearDAG() {
   SUnits.clear();
-  Sequence.clear();
   EntrySU = SUnit();
   ExitSU = SUnit();
+}
 
-  Schedule();
-
-  DEBUG({
-      dbgs() << "*** Final schedule ***\n";
-      dumpSchedule();
-      dbgs() << '\n';
-    });
+/// getInstrDesc helper to handle SDNodes.
+const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
+  if (!Node || !Node->isMachineOpcode()) return NULL;
+  return &TII->get(Node->getMachineOpcode());
 }
 
 /// addPred - This adds the specified edge as a pred of the current node if
@@ -313,13 +290,12 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
       case SDep::Output:      dbgs() << "out "; break;
       case SDep::Order:       dbgs() << "ch  "; break;
       }
-      dbgs() << "#";
-      dbgs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")";
+      dbgs() << "SU(" << I->getSUnit()->NodeNum << ")";
       if (I->isArtificial())
         dbgs() << " *";
       dbgs() << ": Latency=" << I->getLatency();
       if (I->isAssignedRegDep())
-        dbgs() << " Reg=" << G->TRI->getName(I->getReg());
+        dbgs() << " Reg=" << PrintReg(I->getReg(), G->TRI);
       dbgs() << "\n";
     }
   }
@@ -334,8 +310,7 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
       case SDep::Output:      dbgs() << "out "; break;
       case SDep::Order:       dbgs() << "ch  "; break;
       }
-      dbgs() << "#";
-      dbgs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")";
+      dbgs() << "SU(" << I->getSUnit()->NodeNum << ")";
       if (I->isArtificial())
         dbgs() << " *";
       dbgs() << ": Latency=" << I->getLatency();
@@ -346,13 +321,12 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
 }
 
 #ifndef NDEBUG
-/// VerifySchedule - Verify that all SUnits were scheduled and that
-/// their state is consistent.
+/// VerifyScheduledDAG - Verify that all SUnits were scheduled and that
+/// their state is consistent. Return the number of scheduled nodes.
 ///
-void ScheduleDAG::VerifySchedule(bool isBottomUp) {
+unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) {
   bool AnyNotSched = false;
   unsigned DeadNodes = 0;
-  unsigned Noops = 0;
   for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
     if (!SUnits[i].isScheduled) {
       if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) {
@@ -393,12 +367,8 @@ void ScheduleDAG::VerifySchedule(bool isBottomUp) {
       }
     }
   }
-  for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
-    if (!Sequence[i])
-      ++Noops;
   assert(!AnyNotSched);
-  assert(Sequence.size() + DeadNodes - Noops == SUnits.size() &&
-         "The number of nodes scheduled doesn't match the expected number!");
+  return SUnits.size() - DeadNodes;
 }
 #endif
 
diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp
deleted file mode 100644
index f8b1bc76eb8b..000000000000
--- a/lib/CodeGen/ScheduleDAGEmit.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
-//===---- ScheduleDAGEmit.cpp - Emit routines for the ScheduleDAG class ---===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This implements the Emit routines for the ScheduleDAG class, which creates
-// MachineInstrs according to the computed schedule.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "pre-RA-sched"
-#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
-using namespace llvm;
-
-void ScheduleDAG::EmitNoop() {
-  TII->insertNoop(*BB, InsertPos);
-}
-
-void ScheduleDAG::EmitPhysRegCopy(SUnit *SU,
-                                  DenseMap<SUnit*, unsigned> &VRBaseMap) {
-  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
-       I != E; ++I) {
-    if (I->isCtrl()) continue;  // ignore chain preds
-    if (I->getSUnit()->CopyDstRC) {
-      // Copy to physical register.
-      DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit());
-      assert(VRI != VRBaseMap.end() && "Node emitted out of order - late");
-      // Find the destination physical register.
-      unsigned Reg = 0;
-      for (SUnit::const_succ_iterator II = SU->Succs.begin(),
-             EE = SU->Succs.end(); II != EE; ++II) {
-        if (II->isCtrl()) continue;  // ignore chain preds
-        if (II->getReg()) {
-          Reg = II->getReg();
-          break;
-        }
-      }
-      BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg)
-        .addReg(VRI->second);
-    } else {
-      // Copy from physical register.
-      assert(I->getReg() && "Unknown physical register!");
-      unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
-      bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
-      (void)isNew; // Silence compiler warning.
-      assert(isNew && "Node emitted out of order - early");
-      BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase)
-        .addReg(I->getReg());
-    }
-    break;
-  }
-}
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 34b8ab0b47f2..6be1ab7f5b08 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -13,14 +13,15 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "sched-instrs"
-#include "ScheduleDAGInstrs.h"
 #include "llvm/Operator.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -33,25 +34,17 @@ using namespace llvm;
 
 ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
                                      const MachineLoopInfo &mli,
-                                     const MachineDominatorTree &mdt)
+                                     const MachineDominatorTree &mdt,
+                                     bool IsPostRAFlag,
+                                     LiveIntervals *lis)
   : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()),
-    InstrItins(mf.getTarget().getInstrItineraryData()),
-    Defs(TRI->getNumRegs()), Uses(TRI->getNumRegs()),
-    LoopRegs(MLI, MDT), FirstDbgValue(0) {
+    InstrItins(mf.getTarget().getInstrItineraryData()), LIS(lis),
+    IsPostRA(IsPostRAFlag), UnitLatencies(false), LoopRegs(MLI, MDT),
+    FirstDbgValue(0) {
+  assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
   DbgValues.clear();
-}
-
-/// Run - perform scheduling.
-///
-void ScheduleDAGInstrs::Run(MachineBasicBlock *bb,
-                            MachineBasicBlock::iterator begin,
-                            MachineBasicBlock::iterator end,
-                            unsigned endcount) {
-  BB = bb;
-  Begin = begin;
-  InsertPosIndex = endcount;
-
-  ScheduleDAG::Run(bb, end);
+  assert(!(IsPostRA && MRI.getNumVirtRegs()) &&
+         "Virtual registers must be removed prior to PostRA scheduling");
 }
 
 /// getUnderlyingObjectFromInt - This is the function that does the work of
@@ -133,19 +126,58 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
   return 0;
 }
 
-void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) {
+void ScheduleDAGInstrs::startBlock(MachineBasicBlock *BB) {
   LoopRegs.Deps.clear();
   if (MachineLoop *ML = MLI.getLoopFor(BB))
-    if (BB == ML->getLoopLatch()) {
-      MachineBasicBlock *Header = ML->getHeader();
-      for (MachineBasicBlock::livein_iterator I = Header->livein_begin(),
-           E = Header->livein_end(); I != E; ++I)
-        LoopLiveInRegs.insert(*I);
+    if (BB == ML->getLoopLatch())
       LoopRegs.VisitLoop(ML);
-    }
 }
 
-/// AddSchedBarrierDeps - Add dependencies from instructions in the current
+void ScheduleDAGInstrs::finishBlock() {
+  // Nothing to do.
+}
+
+/// Initialize the map with the number of registers.
+void Reg2SUnitsMap::setRegLimit(unsigned Limit) {
+  PhysRegSet.setUniverse(Limit);
+  SUnits.resize(Limit);
+}
+
+/// Clear the map without deallocating storage.
+void Reg2SUnitsMap::clear() {
+  for (const_iterator I = reg_begin(), E = reg_end(); I != E; ++I) {
+    SUnits[*I].clear();
+  }
+  PhysRegSet.clear();
+}
+
+/// Initialize the DAG and common scheduler state for the current scheduling
+/// region. This does not actually create the DAG, only clears it. The
+/// scheduling driver may call BuildSchedGraph multiple times per scheduling
+/// region.
+void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb,
+                                    MachineBasicBlock::iterator begin,
+                                    MachineBasicBlock::iterator end,
+                                    unsigned endcount) {
+  BB = bb;
+  RegionBegin = begin;
+  RegionEnd = end;
+  EndIndex = endcount;
+  MISUnitMap.clear();
+
+  // Check to see if the scheduler cares about latencies.
+  UnitLatencies = forceUnitLatencies();
+
+  ScheduleDAG::clearDAG();
+}
+
+/// Close the current scheduling region. Don't clear any state in case the
+/// driver wants to refer to the previous scheduling region.
+void ScheduleDAGInstrs::exitRegion() {
+  // Nothing to do.
+}
+
+/// addSchedBarrierDeps - Add dependencies from instructions in the current
 /// list of instructions being scheduled to scheduling barrier by adding
 /// the exit SU to the register defs and use list. This is because we want to
 /// make sure instructions which define registers that are either used by
@@ -153,11 +185,11 @@ void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) {
 /// especially important when the definition latency of the return value(s)
 /// are too high to be hidden by the branch or when the liveout registers
 /// used by instructions in the fallthrough block.
-void ScheduleDAGInstrs::AddSchedBarrierDeps() {
-  MachineInstr *ExitMI = InsertPos != BB->end() ? &*InsertPos : 0;
+void ScheduleDAGInstrs::addSchedBarrierDeps() {
+  MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : 0;
   ExitSU.setInstr(ExitMI);
   bool AllDepKnown = ExitMI &&
-    (ExitMI->getDesc().isCall() || ExitMI->getDesc().isBarrier());
+    (ExitMI->isCall() || ExitMI->isBarrier());
   if (ExitMI && AllDepKnown) {
     // If it's a call or a barrier, add dependencies on the defs and uses of
     // instruction.
@@ -167,29 +199,313 @@ void ScheduleDAGInstrs::AddSchedBarrierDeps() {
       unsigned Reg = MO.getReg();
       if (Reg == 0) continue;
 
-      assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
-      Uses[Reg].push_back(&ExitSU);
+      if (TRI->isPhysicalRegister(Reg))
+        Uses[Reg].push_back(&ExitSU);
+      else {
+        assert(!IsPostRA && "Virtual register encountered after regalloc.");
+        addVRegUseDeps(&ExitSU, i);
+      }
     }
   } else {
     // For others, e.g. fallthrough, conditional branch, assume the exit
     // uses all the registers that are livein to the successor blocks.
-    SmallSet<unsigned, 8> Seen;
+    assert(Uses.empty() && "Uses in set before adding deps?");
     for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
            SE = BB->succ_end(); SI != SE; ++SI)
       for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
              E = (*SI)->livein_end(); I != E; ++I) {
         unsigned Reg = *I;
-        if (Seen.insert(Reg))
+        if (!Uses.contains(Reg))
           Uses[Reg].push_back(&ExitSU);
       }
   }
 }
 
-void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
-  // We'll be allocating one SUnit for each instruction, plus one for
-  // the region exit node.
+/// MO is an operand of SU's instruction that defines a physical register. Add
+/// data dependencies from SU to any uses of the physical register.
+void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU,
+                                           const MachineOperand &MO) {
+  assert(MO.isDef() && "expect physreg def");
+
+  // Ask the target if address-backscheduling is desirable, and if so how much.
+  const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+  unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
+  unsigned DataLatency = SU->Latency;
+
+  for (const uint16_t *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) {
+    if (!Uses.contains(*Alias))
+      continue;
+    std::vector<SUnit*> &UseList = Uses[*Alias];
+    for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
+      SUnit *UseSU = UseList[i];
+      if (UseSU == SU)
+        continue;
+      unsigned LDataLatency = DataLatency;
+      // Optionally add in a special extra latency for nodes that
+      // feed addresses.
+      // TODO: Perhaps we should get rid of
+      // SpecialAddressLatency and just move this into
+      // adjustSchedDependency for the targets that care about it.
+      if (SpecialAddressLatency != 0 && !UnitLatencies &&
+          UseSU != &ExitSU) {
+        MachineInstr *UseMI = UseSU->getInstr();
+        const MCInstrDesc &UseMCID = UseMI->getDesc();
+        int RegUseIndex = UseMI->findRegisterUseOperandIdx(*Alias);
+        assert(RegUseIndex >= 0 && "UseMI doesn't use register!");
+        if (RegUseIndex >= 0 &&
+            (UseMI->mayLoad() || UseMI->mayStore()) &&
+            (unsigned)RegUseIndex < UseMCID.getNumOperands() &&
+            UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass())
+          LDataLatency += SpecialAddressLatency;
+      }
+      // Adjust the dependence latency using operand def/use
+      // information (if any), and then allow the target to
+      // perform its own adjustments.
+      const SDep& dep = SDep(SU, SDep::Data, LDataLatency, *Alias);
+      if (!UnitLatencies) {
+        computeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
+        ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
+      }
+      UseSU->addPred(dep);
+    }
+  }
+}
+
+/// addPhysRegDeps - Add register dependencies (data, anti, and output) from
+/// this SUnit to following instructions in the same scheduling region that
+/// depend the physical register referenced at OperIdx.
+void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
+  const MachineInstr *MI = SU->getInstr();
+  const MachineOperand &MO = MI->getOperand(OperIdx);
+
+  // Optionally add output and anti dependencies. For anti
+  // dependencies we use a latency of 0 because for a multi-issue
+  // target we want to allow the defining instruction to issue
+  // in the same cycle as the using instruction.
+  // TODO: Using a latency of 1 here for output dependencies assumes
+  //       there's no cost for reusing registers.
+  SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
+  for (const uint16_t *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) {
+    if (!Defs.contains(*Alias))
+      continue;
+    std::vector<SUnit *> &DefList = Defs[*Alias];
+    for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
+      SUnit *DefSU = DefList[i];
+      if (DefSU == &ExitSU)
+        continue;
+      if (DefSU != SU &&
+          (Kind != SDep::Output || !MO.isDead() ||
+           !DefSU->getInstr()->registerDefIsDead(*Alias))) {
+        if (Kind == SDep::Anti)
+          DefSU->addPred(SDep(SU, Kind, 0, /*Reg=*/*Alias));
+        else {
+          unsigned AOLat = TII->getOutputLatency(InstrItins, MI, OperIdx,
+                                                 DefSU->getInstr());
+          DefSU->addPred(SDep(SU, Kind, AOLat, /*Reg=*/*Alias));
+        }
+      }
+    }
+  }
+
+  if (!MO.isDef()) {
+    // Either insert a new Reg2SUnits entry with an empty SUnits list, or
+    // retrieve the existing SUnits list for this register's uses.
+    // Push this SUnit on the use list.
+    Uses[MO.getReg()].push_back(SU);
+  }
+  else {
+    addPhysRegDataDeps(SU, MO);
+
+    // Either insert a new Reg2SUnits entry with an empty SUnits list, or
+    // retrieve the existing SUnits list for this register's defs.
+    std::vector<SUnit *> &DefList = Defs[MO.getReg()];
+
+    // If a def is going to wrap back around to the top of the loop,
+    // backschedule it.
+    if (!UnitLatencies && DefList.empty()) {
+      LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(MO.getReg());
+      if (I != LoopRegs.Deps.end()) {
+        const MachineOperand *UseMO = I->second.first;
+        unsigned Count = I->second.second;
+        const MachineInstr *UseMI = UseMO->getParent();
+        unsigned UseMOIdx = UseMO - &UseMI->getOperand(0);
+        const MCInstrDesc &UseMCID = UseMI->getDesc();
+        const TargetSubtargetInfo &ST =
+          TM.getSubtarget<TargetSubtargetInfo>();
+        unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
+        // TODO: If we knew the total depth of the region here, we could
+        // handle the case where the whole loop is inside the region but
+        // is large enough that the isScheduleHigh trick isn't needed.
+        if (UseMOIdx < UseMCID.getNumOperands()) {
+          // Currently, we only support scheduling regions consisting of
+          // single basic blocks. Check to see if the instruction is in
+          // the same region by checking to see if it has the same parent.
+          if (UseMI->getParent() != MI->getParent()) {
+            unsigned Latency = SU->Latency;
+            if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass())
+              Latency += SpecialAddressLatency;
+            // This is a wild guess as to the portion of the latency which
+            // will be overlapped by work done outside the current
+            // scheduling region.
+            Latency -= std::min(Latency, Count);
+            // Add the artificial edge.
+            ExitSU.addPred(SDep(SU, SDep::Order, Latency,
+                                /*Reg=*/0, /*isNormalMemory=*/false,
+                                /*isMustAlias=*/false,
+                                /*isArtificial=*/true));
+          } else if (SpecialAddressLatency > 0 &&
+                     UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) {
+            // The entire loop body is within the current scheduling region
+            // and the latency of this operation is assumed to be greater
+            // than the latency of the loop.
+            // TODO: Recursively mark data-edge predecessors as
+            //       isScheduleHigh too.
+            SU->isScheduleHigh = true;
+          }
+        }
+        LoopRegs.Deps.erase(I);
+      }
+    }
+
+    // clear this register's use list
+    if (Uses.contains(MO.getReg()))
+      Uses[MO.getReg()].clear();
+
+    if (!MO.isDead())
+      DefList.clear();
+
+    // Calls will not be reordered because of chain dependencies (see
+    // below). Since call operands are dead, calls may continue to be added
+    // to the DefList making dependence checking quadratic in the size of
+    // the block. Instead, we leave only one call at the back of the
+    // DefList.
+    if (SU->isCall) {
+      while (!DefList.empty() && DefList.back()->isCall)
+        DefList.pop_back();
+    }
+    // Defs are pushed in the order they are visited and never reordered.
+    DefList.push_back(SU);
+  }
+}
+
+/// addVRegDefDeps - Add register output and data dependencies from this SUnit
+/// to instructions that occur later in the same scheduling region if they read
+/// from or write to the virtual register defined at OperIdx.
+///
+/// TODO: Hoist loop induction variable increments. This has to be
+/// reevaluated. Generally, IV scheduling should be done before coalescing.
+void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
+  const MachineInstr *MI = SU->getInstr();
+  unsigned Reg = MI->getOperand(OperIdx).getReg();
+
+  // SSA defs do not have output/anti dependencies.
+  // The current operand is a def, so we have at least one.
+  if (llvm::next(MRI.def_begin(Reg)) == MRI.def_end())
+    return;
+
+  // Add output dependence to the next nearest def of this vreg.
+  //
+  // Unless this definition is dead, the output dependence should be
+  // transitively redundant with antidependencies from this definition's
+  // uses. We're conservative for now until we have a way to guarantee the uses
+  // are not eliminated sometime during scheduling. The output dependence edge
+  // is also useful if output latency exceeds def-use latency.
+  VReg2SUnitMap::iterator DefI = findVRegDef(Reg);
+  if (DefI == VRegDefs.end())
+    VRegDefs.insert(VReg2SUnit(Reg, SU));
+  else {
+    SUnit *DefSU = DefI->SU;
+    if (DefSU != SU && DefSU != &ExitSU) {
+      unsigned OutLatency = TII->getOutputLatency(InstrItins, MI, OperIdx,
+                                                  DefSU->getInstr());
+      DefSU->addPred(SDep(SU, SDep::Output, OutLatency, Reg));
+    }
+    DefI->SU = SU;
+  }
+}
+
+/// addVRegUseDeps - Add a register data dependency if the instruction that
+/// defines the virtual register used at OperIdx is mapped to an SUnit. Add a
+/// register antidependency from this SUnit to instructions that occur later in
+/// the same scheduling region if they write the virtual register.
+///
+/// TODO: Handle ExitSU "uses" properly.
+void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
+  MachineInstr *MI = SU->getInstr();
+  unsigned Reg = MI->getOperand(OperIdx).getReg();
+
+  // Lookup this operand's reaching definition.
+  assert(LIS && "vreg dependencies requires LiveIntervals");
+  SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot();
+  LiveInterval *LI = &LIS->getInterval(Reg);
+  VNInfo *VNI = LI->getVNInfoBefore(UseIdx);
+  // VNI will be valid because MachineOperand::readsReg() is checked by caller.
+  MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def);
+  // Phis and other noninstructions (after coalescing) have a NULL Def.
+  if (Def) {
+    SUnit *DefSU = getSUnit(Def);
+    if (DefSU) {
+      // The reaching Def lives within this scheduling region.
+      // Create a data dependence.
+      //
+      // TODO: Handle "special" address latencies cleanly.
+      const SDep &dep = SDep(DefSU, SDep::Data, DefSU->Latency, Reg);
+      if (!UnitLatencies) {
+        // Adjust the dependence latency using operand def/use information, then
+        // allow the target to perform its own adjustments.
+        computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep));
+        const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+        ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
+      }
+      SU->addPred(dep);
+    }
+  }
+
+  // Add antidependence to the following def of the vreg it uses.
+  VReg2SUnitMap::iterator DefI = findVRegDef(Reg);
+  if (DefI != VRegDefs.end() && DefI->SU != SU)
+    DefI->SU->addPred(SDep(SU, SDep::Anti, 0, Reg));
+}
+
+/// Create an SUnit for each real instruction, numbered in top-down toplological
+/// order. The instruction order A < B, implies that no edge exists from B to A.
+///
+/// Map each real instruction to its SUnit.
+///
+/// After initSUnits, the SUnits vector cannot be resized and the scheduler may
+/// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs
+/// instead of pointers.
+///
+/// MachineScheduler relies on initSUnits numbering the nodes by their order in
+/// the original instruction list.
+void ScheduleDAGInstrs::initSUnits() {
+  // We'll be allocating one SUnit for each real instruction in the region,
+  // which is contained within a basic block.
   SUnits.reserve(BB->size());
 
+  for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) {
+    MachineInstr *MI = I;
+    if (MI->isDebugValue())
+      continue;
+
+    SUnit *SU = newSUnit(MI);
+    MISUnitMap[MI] = SU;
+
+    SU->isCall = MI->isCall();
+    SU->isCommutable = MI->isCommutable();
+
+    // Assign the Latency field of SU using target-provided information.
+    if (UnitLatencies)
+      SU->Latency = 1;
+    else
+      computeLatency(SU);
+  }
+}
+
+void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) {
+  // Create an SUnit for each real instruction.
+  initSUnits();
+
   // We build scheduling units by walking a block's instruction list from bottom
   // to top.
 
@@ -203,29 +519,29 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
   std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs;
   std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
 
-  // Check to see if the scheduler cares about latencies.
-  bool UnitLatencies = ForceUnitLatencies();
-
-  // Ask the target if address-backscheduling is desirable, and if so how much.
-  const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
-  unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
-
   // Remove any stale debug info; sometimes BuildSchedGraph is called again
   // without emitting the info from the previous call.
   DbgValues.clear();
   FirstDbgValue = NULL;
 
+  assert(Defs.empty() && Uses.empty() &&
+         "Only BuildGraph should update Defs/Uses");
+  Defs.setRegLimit(TRI->getNumRegs());
+  Uses.setRegLimit(TRI->getNumRegs());
+
+  assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs");
+  // FIXME: Allow SparseSet to reserve space for the creation of virtual
+  // registers during scheduling. Don't artificially inflate the Universe
+  // because we want to assert that vregs are not created during DAG building.
+  VRegDefs.setUniverse(MRI.getNumVirtRegs());
+
   // Model data dependencies between instructions being scheduled and the
   // ExitSU.
-  AddSchedBarrierDeps();
-
-  for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) {
-    assert(Defs[i].empty() && "Only BuildGraph should push/pop Defs");
-  }
+  addSchedBarrierDeps();
 
   // Walk the list of instructions, from bottom moving up.
   MachineInstr *PrevMI = NULL;
-  for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin;
+  for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin;
        MII != MIE; --MII) {
     MachineInstr *MI = prior(MII);
     if (MI && PrevMI) {
@@ -238,19 +554,11 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
       continue;
     }
 
-    const MCInstrDesc &MCID = MI->getDesc();
-    assert(!MCID.isTerminator() && !MI->isLabel() &&
+    assert(!MI->isTerminator() && !MI->isLabel() &&
            "Cannot schedule terminators or labels!");
-    // Create the SUnit for this MI.
-    SUnit *SU = NewSUnit(MI);
-    SU->isCall = MCID.isCall();
-    SU->isCommutable = MCID.isCommutable();
 
-    // Assign the Latency field of SU using target-provided information.
-    if (UnitLatencies)
-      SU->Latency = 1;
-    else
-      ComputeLatency(SU);
+    SUnit *SU = MISUnitMap[MI];
+    assert(SU && "No SUnit mapped to this MI");
 
     // Add register-based dependencies (data, anti, and output).
     for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
@@ -259,152 +567,14 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
       unsigned Reg = MO.getReg();
       if (Reg == 0) continue;
 
-      assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
-
-      std::vector<SUnit *> &UseList = Uses[Reg];
-      // Defs are push in the order they are visited and never reordered.
-      std::vector<SUnit *> &DefList = Defs[Reg];
-      // Optionally add output and anti dependencies. For anti
-      // dependencies we use a latency of 0 because for a multi-issue
-      // target we want to allow the defining instruction to issue
-      // in the same cycle as the using instruction.
-      // TODO: Using a latency of 1 here for output dependencies assumes
-      //       there's no cost for reusing registers.
-      SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
-      unsigned AOLatency = (Kind == SDep::Anti) ? 0 : 1;
-      for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
-        SUnit *DefSU = DefList[i];
-        if (DefSU == &ExitSU)
-          continue;
-        if (DefSU != SU &&
-            (Kind != SDep::Output || !MO.isDead() ||
-             !DefSU->getInstr()->registerDefIsDead(Reg)))
-          DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/Reg));
-      }
-      for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
-        std::vector<SUnit *> &MemDefList = Defs[*Alias];
-        for (unsigned i = 0, e = MemDefList.size(); i != e; ++i) {
-          SUnit *DefSU = MemDefList[i];
-          if (DefSU == &ExitSU)
-            continue;
-          if (DefSU != SU &&
-              (Kind != SDep::Output || !MO.isDead() ||
-               !DefSU->getInstr()->registerDefIsDead(*Alias)))
-            DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/ *Alias));
-        }
-      }
-
-      if (MO.isDef()) {
-        // Add any data dependencies.
-        unsigned DataLatency = SU->Latency;
-        for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
-          SUnit *UseSU = UseList[i];
-          if (UseSU == SU)
-            continue;
-          unsigned LDataLatency = DataLatency;
-          // Optionally add in a special extra latency for nodes that
-          // feed addresses.
-          // TODO: Do this for register aliases too.
-          // TODO: Perhaps we should get rid of
-          // SpecialAddressLatency and just move this into
-          // adjustSchedDependency for the targets that care about it.
-          if (SpecialAddressLatency != 0 && !UnitLatencies &&
-              UseSU != &ExitSU) {
-            MachineInstr *UseMI = UseSU->getInstr();
-            const MCInstrDesc &UseMCID = UseMI->getDesc();
-            int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg);
-            assert(RegUseIndex >= 0 && "UseMI doesn's use register!");
-            if (RegUseIndex >= 0 &&
-                (UseMCID.mayLoad() || UseMCID.mayStore()) &&
-                (unsigned)RegUseIndex < UseMCID.getNumOperands() &&
-                UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass())
-              LDataLatency += SpecialAddressLatency;
-          }
-          // Adjust the dependence latency using operand def/use
-          // information (if any), and then allow the target to
-          // perform its own adjustments.
-          const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg);
-          if (!UnitLatencies) {
-            ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
-            ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
-          }
-          UseSU->addPred(dep);
-        }
-        for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
-          std::vector<SUnit *> &UseList = Uses[*Alias];
-          for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
-            SUnit *UseSU = UseList[i];
-            if (UseSU == SU)
-              continue;
-            const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias);
-            if (!UnitLatencies) {
-              ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
-              ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
-            }
-            UseSU->addPred(dep);
-          }
-        }
-
-        // If a def is going to wrap back around to the top of the loop,
-        // backschedule it.
-        if (!UnitLatencies && DefList.empty()) {
-          LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(Reg);
-          if (I != LoopRegs.Deps.end()) {
-            const MachineOperand *UseMO = I->second.first;
-            unsigned Count = I->second.second;
-            const MachineInstr *UseMI = UseMO->getParent();
-            unsigned UseMOIdx = UseMO - &UseMI->getOperand(0);
-            const MCInstrDesc &UseMCID = UseMI->getDesc();
-            // TODO: If we knew the total depth of the region here, we could
-            // handle the case where the whole loop is inside the region but
-            // is large enough that the isScheduleHigh trick isn't needed.
-            if (UseMOIdx < UseMCID.getNumOperands()) {
-              // Currently, we only support scheduling regions consisting of
-              // single basic blocks. Check to see if the instruction is in
-              // the same region by checking to see if it has the same parent.
-              if (UseMI->getParent() != MI->getParent()) {
-                unsigned Latency = SU->Latency;
-                if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass())
-                  Latency += SpecialAddressLatency;
-                // This is a wild guess as to the portion of the latency which
-                // will be overlapped by work done outside the current
-                // scheduling region.
-                Latency -= std::min(Latency, Count);
-                // Add the artificial edge.
-                ExitSU.addPred(SDep(SU, SDep::Order, Latency,
-                                    /*Reg=*/0, /*isNormalMemory=*/false,
-                                    /*isMustAlias=*/false,
-                                    /*isArtificial=*/true));
-              } else if (SpecialAddressLatency > 0 &&
-                         UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) {
-                // The entire loop body is within the current scheduling region
-                // and the latency of this operation is assumed to be greater
-                // than the latency of the loop.
-                // TODO: Recursively mark data-edge predecessors as
-                //       isScheduleHigh too.
-                SU->isScheduleHigh = true;
-              }
-            }
-            LoopRegs.Deps.erase(I);
-          }
-        }
-
-        UseList.clear();
-        if (!MO.isDead())
-          DefList.clear();
-
-        // Calls will not be reordered because of chain dependencies (see
-        // below). Since call operands are dead, calls may continue to be added
-        // to the DefList making dependence checking quadratic in the size of
-        // the block. Instead, we leave only one call at the back of the
-        // DefList.
-        if (SU->isCall) {
-          while (!DefList.empty() && DefList.back()->isCall)
-            DefList.pop_back();
-        }
-        DefList.push_back(SU);
-      } else {
-        UseList.push_back(SU);
+      if (TRI->isPhysicalRegister(Reg))
+        addPhysRegDeps(SU, j);
+      else {
+        assert(!IsPostRA && "Virtual register encountered!");
+        if (MO.isDef())
+          addVRegDefDeps(SU, j);
+        else if (MO.readsReg()) // ignore undef operands
+          addVRegUseDeps(SU, j);
       }
     }
 
@@ -419,9 +589,9 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
     // produce more precise dependence information.
 #define STORE_LOAD_LATENCY 1
     unsigned TrueMemOrderLatency = 0;
-    if (MCID.isCall() || MI->hasUnmodeledSideEffects() ||
+    if (MI->isCall() || MI->hasUnmodeledSideEffects() ||
         (MI->hasVolatileMemoryRef() &&
-         (!MCID.mayLoad() || !MI->isInvariantLoad(AA)))) {
+         (!MI->mayLoad() || !MI->isInvariantLoad(AA)))) {
       // Be conservative with these and add dependencies on all memory
       // references, even those that are known to not alias.
       for (std::map<const Value *, SUnit *>::iterator I =
@@ -460,7 +630,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
       PendingLoads.clear();
       AliasMemDefs.clear();
       AliasMemUses.clear();
-    } else if (MCID.mayStore()) {
+    } else if (MI->mayStore()) {
       bool MayAlias = true;
       TrueMemOrderLatency = STORE_LOAD_LATENCY;
       if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
@@ -516,7 +686,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
                             /*Reg=*/0, /*isNormalMemory=*/false,
                             /*isMustAlias=*/false,
                             /*isArtificial=*/true));
-    } else if (MCID.mayLoad()) {
+    } else if (MI->mayLoad()) {
       bool MayAlias = true;
       TrueMemOrderLatency = 0;
       if (MI->isInvariantLoad(AA)) {
@@ -558,32 +728,27 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
   if (PrevMI)
     FirstDbgValue = PrevMI;
 
-  for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) {
-    Defs[i].clear();
-    Uses[i].clear();
-  }
+  Defs.clear();
+  Uses.clear();
+  VRegDefs.clear();
   PendingLoads.clear();
 }
 
-void ScheduleDAGInstrs::FinishBlock() {
-  // Nothing to do.
-}
-
-void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
+void ScheduleDAGInstrs::computeLatency(SUnit *SU) {
   // Compute the latency for the node.
   if (!InstrItins || InstrItins->isEmpty()) {
     SU->Latency = 1;
 
     // Simplistic target-independent heuristic: assume that loads take
     // extra time.
-    if (SU->getInstr()->getDesc().mayLoad())
+    if (SU->getInstr()->mayLoad())
       SU->Latency += 2;
   } else {
     SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr());
   }
 }
 
-void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
+void ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use,
                                               SDep& dep) const {
   if (!InstrItins || InstrItins->isEmpty())
     return;
@@ -608,7 +773,9 @@ void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
       //   %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ...
       // What we want is to compute latency between def of %D6/%D7 and use of
       // %Q3 instead.
-      DefIdx = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI);
+      unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI);
+      if (DefMI->getOperand(Op2).isReg())
+        DefIdx = Op2;
     }
     MachineInstr *UseMI = Use->getInstr();
     // For all uses of the register, calculate the maxmimum latency
@@ -656,43 +823,8 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
   return oss.str();
 }
 
-// EmitSchedule - Emit the machine code in scheduled order.
-MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() {
-  // For MachineInstr-based scheduling, we're rescheduling the instructions in
-  // the block, so start by removing them from the block.
-  while (Begin != InsertPos) {
-    MachineBasicBlock::iterator I = Begin;
-    ++Begin;
-    BB->remove(I);
-  }
-
-  // If first instruction was a DBG_VALUE then put it back.
-  if (FirstDbgValue)
-    BB->insert(InsertPos, FirstDbgValue);
-
-  // Then re-insert them according to the given schedule.
-  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
-    if (SUnit *SU = Sequence[i])
-      BB->insert(InsertPos, SU->getInstr());
-    else
-      // Null SUnit* is a noop.
-      EmitNoop();
-  }
-
-  // Update the Begin iterator, as the first instruction in the block
-  // may have been scheduled later.
-  if (!Sequence.empty())
-    Begin = Sequence[0]->getInstr();
-
-  // Reinsert any remaining debug_values.
-  for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
-         DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
-    std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
-    MachineInstr *DbgValue = P.first;
-    MachineInstr *OrigPrivMI = P.second;
-    BB->insertAfter(OrigPrivMI, DbgValue);
-  }
-  DbgValues.clear();
-  FirstDbgValue = NULL;
-  return BB;
+/// Return the basic block label. It is not necessarilly unique because a block
+/// contains multiple scheduling regions. But it is fine for visualization.
+std::string ScheduleDAGInstrs::getDAGName() const {
+  return "dag." + BB->getFullName();
 }
diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h
deleted file mode 100644
index 666bdf548c71..000000000000
--- a/lib/CodeGen/ScheduleDAGInstrs.h
+++ /dev/null
@@ -1,212 +0,0 @@
-//==- ScheduleDAGInstrs.h - MachineInstr Scheduling --------------*- C++ -*-==//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the ScheduleDAGInstrs class, which implements
-// scheduling for a MachineInstr-based dependency graph.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SCHEDULEDAGINSTRS_H
-#define SCHEDULEDAGINSTRS_H
-
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/SmallSet.h"
-#include <map>
-
-namespace llvm {
-  class MachineLoopInfo;
-  class MachineDominatorTree;
-
-  /// LoopDependencies - This class analyzes loop-oriented register
-  /// dependencies, which are used to guide scheduling decisions.
-  /// For example, loop induction variable increments should be
-  /// scheduled as soon as possible after the variable's last use.
-  ///
-  class LLVM_LIBRARY_VISIBILITY LoopDependencies {
-    const MachineLoopInfo &MLI;
-    const MachineDominatorTree &MDT;
-
-  public:
-    typedef std::map<unsigned, std::pair<const MachineOperand *, unsigned> >
-      LoopDeps;
-    LoopDeps Deps;
-
-    LoopDependencies(const MachineLoopInfo &mli,
-                     const MachineDominatorTree &mdt) :
-      MLI(mli), MDT(mdt) {}
-
-    /// VisitLoop - Clear out any previous state and analyze the given loop.
-    ///
-    void VisitLoop(const MachineLoop *Loop) {
-      assert(Deps.empty() && "stale loop dependencies");
-
-      MachineBasicBlock *Header = Loop->getHeader();
-      SmallSet<unsigned, 8> LoopLiveIns;
-      for (MachineBasicBlock::livein_iterator LI = Header->livein_begin(),
-           LE = Header->livein_end(); LI != LE; ++LI)
-        LoopLiveIns.insert(*LI);
-
-      const MachineDomTreeNode *Node = MDT.getNode(Header);
-      const MachineBasicBlock *MBB = Node->getBlock();
-      assert(Loop->contains(MBB) &&
-             "Loop does not contain header!");
-      VisitRegion(Node, MBB, Loop, LoopLiveIns);
-    }
-
-  private:
-    void VisitRegion(const MachineDomTreeNode *Node,
-                     const MachineBasicBlock *MBB,
-                     const MachineLoop *Loop,
-                     const SmallSet<unsigned, 8> &LoopLiveIns) {
-      unsigned Count = 0;
-      for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
-           I != E; ++I) {
-        const MachineInstr *MI = I;
-        if (MI->isDebugValue())
-          continue;
-        for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-          const MachineOperand &MO = MI->getOperand(i);
-          if (!MO.isReg() || !MO.isUse())
-            continue;
-          unsigned MOReg = MO.getReg();
-          if (LoopLiveIns.count(MOReg))
-            Deps.insert(std::make_pair(MOReg, std::make_pair(&MO, Count)));
-        }
-        ++Count; // Not every iteration due to dbg_value above.
-      }
-
-      const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
-      for (std::vector<MachineDomTreeNode*>::const_iterator I =
-           Children.begin(), E = Children.end(); I != E; ++I) {
-        const MachineDomTreeNode *ChildNode = *I;
-        MachineBasicBlock *ChildBlock = ChildNode->getBlock();
-        if (Loop->contains(ChildBlock))
-          VisitRegion(ChildNode, ChildBlock, Loop, LoopLiveIns);
-      }
-    }
-  };
-
-  /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of
-  /// MachineInstrs.
-  class LLVM_LIBRARY_VISIBILITY ScheduleDAGInstrs : public ScheduleDAG {
-    const MachineLoopInfo &MLI;
-    const MachineDominatorTree &MDT;
-    const MachineFrameInfo *MFI;
-    const InstrItineraryData *InstrItins;
-
-    /// Defs, Uses - Remember where defs and uses of each physical register
-    /// are as we iterate upward through the instructions. This is allocated
-    /// here instead of inside BuildSchedGraph to avoid the need for it to be
-    /// initialized and destructed for each block.
-    std::vector<std::vector<SUnit *> > Defs;
-    std::vector<std::vector<SUnit *> > Uses;
-
-    /// PendingLoads - Remember where unknown loads are after the most recent
-    /// unknown store, as we iterate. As with Defs and Uses, this is here
-    /// to minimize construction/destruction.
-    std::vector<SUnit *> PendingLoads;
-
-    /// LoopRegs - Track which registers are used for loop-carried dependencies.
-    ///
-    LoopDependencies LoopRegs;
-
-    /// LoopLiveInRegs - Track which regs are live into a loop, to help guide
-    /// back-edge-aware scheduling.
-    ///
-    SmallSet<unsigned, 8> LoopLiveInRegs;
-
-  protected:
-
-    /// DbgValues - Remember instruction that preceeds DBG_VALUE.
-    typedef std::vector<std::pair<MachineInstr *, MachineInstr *> >
-      DbgValueVector;
-    DbgValueVector DbgValues;
-    MachineInstr *FirstDbgValue;
-
-  public:
-    MachineBasicBlock::iterator Begin;    // The beginning of the range to
-                                          // be scheduled. The range extends
-                                          // to InsertPos.
-    unsigned InsertPosIndex;              // The index in BB of InsertPos.
-
-    explicit ScheduleDAGInstrs(MachineFunction &mf,
-                               const MachineLoopInfo &mli,
-                               const MachineDominatorTree &mdt);
-
-    virtual ~ScheduleDAGInstrs() {}
-
-    /// NewSUnit - Creates a new SUnit and return a ptr to it.
-    ///
-    SUnit *NewSUnit(MachineInstr *MI) {
-#ifndef NDEBUG
-      const SUnit *Addr = SUnits.empty() ? 0 : &SUnits[0];
-#endif
-      SUnits.push_back(SUnit(MI, (unsigned)SUnits.size()));
-      assert((Addr == 0 || Addr == &SUnits[0]) &&
-             "SUnits std::vector reallocated on the fly!");
-      SUnits.back().OrigNode = &SUnits.back();
-      return &SUnits.back();
-    }
-
-    /// Run - perform scheduling.
-    ///
-    void Run(MachineBasicBlock *bb,
-             MachineBasicBlock::iterator begin,
-             MachineBasicBlock::iterator end,
-             unsigned endindex);
-
-    /// BuildSchedGraph - Build SUnits from the MachineBasicBlock that we are
-    /// input.
-    virtual void BuildSchedGraph(AliasAnalysis *AA);
-
-    /// AddSchedBarrierDeps - Add dependencies from instructions in the current
-    /// list of instructions being scheduled to scheduling barrier. We want to
-    /// make sure instructions which define registers that are either used by
-    /// the terminator or are live-out are properly scheduled. This is
-    /// especially important when the definition latency of the return value(s)
-    /// are too high to be hidden by the branch or when the liveout registers
-    /// used by instructions in the fallthrough block.
-    void AddSchedBarrierDeps();
-
-    /// ComputeLatency - Compute node latency.
-    ///
-    virtual void ComputeLatency(SUnit *SU);
-
-    /// ComputeOperandLatency - Override dependence edge latency using
-    /// operand use/def information
-    ///
-    virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use,
-                                       SDep& dep) const;
-
-    virtual MachineBasicBlock *EmitSchedule();
-
-    /// StartBlock - Prepare to perform scheduling in the given block.
-    ///
-    virtual void StartBlock(MachineBasicBlock *BB);
-
-    /// Schedule - Order nodes according to selected style, filling
-    /// in the Sequence member.
-    ///
-    virtual void Schedule() = 0;
-
-    /// FinishBlock - Clean up after scheduling in the given block.
-    ///
-    virtual void FinishBlock();
-
-    virtual void dumpNode(const SUnit *SU) const;
-
-    virtual std::string getGraphNodeLabel(const SUnit *SU) const;
-  };
-}
-
-#endif
diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp
index 4b55a2284f85..38feee95a58e 100644
--- a/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -25,7 +25,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/Config/config.h"
 #include <fstream>
 using namespace llvm;
 
@@ -42,12 +41,12 @@ namespace llvm {
     static bool renderGraphFromBottomUp() {
       return true;
     }
-    
+
     static bool hasNodeAddressLabel(const SUnit *Node,
                                     const ScheduleDAG *Graph) {
       return true;
     }
-    
+
     /// If you want to override the dot attributes printed for a particular
     /// edge, override this method.
     static std::string getEdgeAttributes(const SUnit *Node,
@@ -59,7 +58,7 @@ namespace llvm {
         return "color=blue,style=dashed";
       return "";
     }
-    
+
 
     std::string getNodeLabel(const SUnit *Node, const ScheduleDAG *Graph);
     static std::string getNodeAttributes(const SUnit *N,
@@ -82,18 +81,17 @@ std::string DOTGraphTraits<ScheduleDAG*>::getNodeLabel(const SUnit *SU,
 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
 /// rendered using 'dot'.
 ///
-void ScheduleDAG::viewGraph() {
-// This code is only for debugging!
+void ScheduleDAG::viewGraph(const Twine &Name, const Twine &Title) {
+  // This code is only for debugging!
 #ifndef NDEBUG
-  if (BB->getBasicBlock())
-    ViewGraph(this, "dag." + MF.getFunction()->getNameStr(), false,
-              "Scheduling-Units Graph for " + MF.getFunction()->getNameStr() + 
-              ":" + BB->getBasicBlock()->getNameStr());
-  else
-    ViewGraph(this, "dag." + MF.getFunction()->getNameStr(), false,
-              "Scheduling-Units Graph for " + MF.getFunction()->getNameStr());
+  ViewGraph(this, Name, false, Title);
 #else
   errs() << "ScheduleDAG::viewGraph is only available in debug builds on "
          << "systems with Graphviz or gv!\n";
 #endif  // NDEBUG
 }
+
+/// Out-of-line implementation with no arguments is handy for gdb.
+void ScheduleDAG::viewGraph() {
+  viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
+}
diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index b80c01ed58b9..3d22035974da 100644
--- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp
+++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -140,8 +140,6 @@ ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
 
       unsigned freeUnits = IS->getUnits();
       switch (IS->getReservationKind()) {
-      default:
-       assert(0 && "Invalid FU reservation");
       case InstrStage::Required:
         // Required FUs conflict with both reserved and required ones
         freeUnits &= ~ReservedScoreboard[StageCycle];
@@ -194,8 +192,6 @@ void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) {
 
       unsigned freeUnits = IS->getUnits();
       switch (IS->getReservationKind()) {
-      default:
-       assert(0 && "Invalid FU reservation");
       case InstrStage::Required:
         // Required FUs conflict with both reserved and required ones
         freeUnits &= ~ReservedScoreboard[cycle + i];
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
index 2282f0e6eb83..a6bdc3be32e0 100644
--- a/lib/CodeGen/SelectionDAG/CMakeLists.txt
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -10,24 +10,16 @@ add_llvm_library(LLVMSelectionDAG
   LegalizeTypesGeneric.cpp
   LegalizeVectorOps.cpp
   LegalizeVectorTypes.cpp
+  ResourcePriorityQueue.cpp
   ScheduleDAGFast.cpp
-  ScheduleDAGList.cpp
   ScheduleDAGRRList.cpp
   ScheduleDAGSDNodes.cpp
   SelectionDAG.cpp
   SelectionDAGBuilder.cpp
+  SelectionDAGDumper.cpp
   SelectionDAGISel.cpp
   SelectionDAGPrinter.cpp
+  ScheduleDAGVLIW.cpp
   TargetLowering.cpp
   TargetSelectionDAGInfo.cpp
   )
-
-add_llvm_library_dependencies(LLVMSelectionDAG
-  LLVMAnalysis
-  LLVMCodeGen
-  LLVMCore
-  LLVMMC
-  LLVMSupport
-  LLVMTarget
-  LLVMTransformUtils
-  )
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 7b878688df63..d1b998f8d840 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -22,7 +22,6 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLowering.h"
@@ -64,7 +63,24 @@ namespace {
     bool LegalTypes;
 
     // Worklist of all of the nodes that need to be simplified.
-    std::vector<SDNode*> WorkList;
+    //
+    // This has the semantics that when adding to the worklist,
+    // the item added must be next to be processed. It should
+    // also only appear once. The naive approach to this takes
+    // linear time.
+    //
+    // To reduce the insert/remove time to logarithmic, we use
+    // a set and a vector to maintain our worklist.
+    //
+    // The set contains the items on the worklist, but does not
+    // maintain the order they should be visited.
+    //
+    // The vector maintains the order nodes should be visited, but may
+    // contain duplicate or removed nodes. When choosing a node to
+    // visit, we pop off the order stack until we find an item that is
+    // also in the contents set. All operations are O(log N).
+    SmallPtrSet<SDNode*, 64> WorkListContents;
+    SmallVector<SDNode*, 64> WorkListOrder;
 
     // AA - Used for DAG load/store alias analysis.
     AliasAnalysis &AA;
@@ -84,18 +100,17 @@ namespace {
     SDValue visit(SDNode *N);
 
   public:
-    /// AddToWorkList - Add to the work list making sure it's instance is at the
-    /// the back (next to be processed.)
+    /// AddToWorkList - Add to the work list making sure its instance is at the
+    /// back (next to be processed.)
     void AddToWorkList(SDNode *N) {
-      removeFromWorkList(N);
-      WorkList.push_back(N);
+      WorkListContents.insert(N);
+      WorkListOrder.push_back(N);
     }
 
     /// removeFromWorkList - remove all instances of N from the worklist.
     ///
     void removeFromWorkList(SDNode *N) {
-      WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), N),
-                     WorkList.end());
+      WorkListContents.erase(N);
     }
 
     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
@@ -159,7 +174,9 @@ namespace {
     SDValue visitADD(SDNode *N);
     SDValue visitSUB(SDNode *N);
     SDValue visitADDC(SDNode *N);
+    SDValue visitSUBC(SDNode *N);
     SDValue visitADDE(SDNode *N);
+    SDValue visitSUBE(SDNode *N);
     SDValue visitMUL(SDNode *N);
     SDValue visitSDIV(SDNode *N);
     SDValue visitUDIV(SDNode *N);
@@ -181,7 +198,9 @@ namespace {
     SDValue visitSRA(SDNode *N);
     SDValue visitSRL(SDNode *N);
     SDValue visitCTLZ(SDNode *N);
+    SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
     SDValue visitCTTZ(SDNode *N);
+    SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
     SDValue visitCTPOP(SDNode *N);
     SDValue visitSELECT(SDNode *N);
     SDValue visitSELECT_CC(SDNode *N);
@@ -279,7 +298,7 @@ namespace {
 
   public:
     DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
-      : DAG(D), TLI(D.getTargetLoweringInfo()), Level(Unrestricted),
+      : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
         OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {}
 
     /// Run - runs the dag combiner on all nodes in the work list
@@ -362,6 +381,8 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
 /// specified expression for the same cost as the expression itself, or 2 if we
 /// can compute the negated form more cheaply than the expression itself.
 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
+                               const TargetLowering &TLI,
+                               const TargetOptions *Options,
                                unsigned Depth = 0) {
   // No compile time optimizations on this type.
   if (Op.getValueType() == MVT::ppcf128)
@@ -384,34 +405,44 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
     return LegalOperations ? 0 : 1;
   case ISD::FADD:
     // FIXME: determine better conditions for this xform.
-    if (!UnsafeFPMath) return 0;
+    if (!Options->UnsafeFPMath) return 0;
+
+    // After operation legalization, it might not be legal to create new FSUBs.
+    if (LegalOperations &&
+        !TLI.isOperationLegalOrCustom(ISD::FSUB,  Op.getValueType()))
+      return 0;
 
     // fold (fsub (fadd A, B)) -> (fsub (fneg A), B)
-    if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+    if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
+                                    Options, Depth + 1))
       return V;
     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
-    return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1);
+    return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
+                              Depth + 1);
   case ISD::FSUB:
     // We can't turn -(A-B) into B-A when we honor signed zeros.
-    if (!UnsafeFPMath) return 0;
+    if (!Options->UnsafeFPMath) return 0;
 
     // fold (fneg (fsub A, B)) -> (fsub B, A)
     return 1;
 
   case ISD::FMUL:
   case ISD::FDIV:
-    if (HonorSignDependentRoundingFPMath()) return 0;
+    if (Options->HonorSignDependentRoundingFPMath()) return 0;
 
     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
-    if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+    if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
+                                    Options, Depth + 1))
       return V;
 
-    return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1);
+    return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
+                              Depth + 1);
 
   case ISD::FP_EXTEND:
   case ISD::FP_ROUND:
   case ISD::FSIN:
-    return isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1);
+    return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
+                              Depth + 1);
   }
 }
 
@@ -435,10 +466,12 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
   }
   case ISD::FADD:
     // FIXME: determine better conditions for this xform.
-    assert(UnsafeFPMath);
+    assert(DAG.getTarget().Options.UnsafeFPMath);
 
     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
-    if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+    if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
+                           DAG.getTargetLoweringInfo(),
+                           &DAG.getTarget().Options, Depth+1))
       return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
                          GetNegatedExpression(Op.getOperand(0), DAG,
                                               LegalOperations, Depth+1),
@@ -450,7 +483,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
                        Op.getOperand(0));
   case ISD::FSUB:
     // We can't turn -(A-B) into B-A when we honor signed zeros.
-    assert(UnsafeFPMath);
+    assert(DAG.getTarget().Options.UnsafeFPMath);
 
     // fold (fneg (fsub 0, B)) -> B
     if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
@@ -463,10 +496,12 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
 
   case ISD::FMUL:
   case ISD::FDIV:
-    assert(!HonorSignDependentRoundingFPMath());
+    assert(!DAG.getTarget().Options.HonorSignDependentRoundingFPMath());
 
     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
-    if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+    if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
+                           DAG.getTargetLoweringInfo(),
+                           &DAG.getTarget().Options, Depth+1))
       return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
                          GetNegatedExpression(Op.getOperand(0), DAG,
                                               LegalOperations, Depth+1),
@@ -944,14 +979,13 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
 void DAGCombiner::Run(CombineLevel AtLevel) {
   // set the instance variables, so that the various visit routines may use it.
   Level = AtLevel;
-  LegalOperations = Level >= NoIllegalOperations;
-  LegalTypes = Level >= NoIllegalTypes;
+  LegalOperations = Level >= AfterLegalizeVectorOps;
+  LegalTypes = Level >= AfterLegalizeTypes;
 
   // Add all the dag nodes to the worklist.
-  WorkList.reserve(DAG.allnodes_size());
   for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
        E = DAG.allnodes_end(); I != E; ++I)
-    WorkList.push_back(I);
+    AddToWorkList(I);
 
   // Create a dummy node (which is not added to allnodes), that adds a reference
   // to the root node, preventing it from being deleted, and tracking any
@@ -962,11 +996,17 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
   // done.  Set it to null to avoid confusion.
   DAG.setRoot(SDValue());
 
-  // while the worklist isn't empty, inspect the node on the end of it and
+  // while the worklist isn't empty, find a node and
   // try and combine it.
-  while (!WorkList.empty()) {
-    SDNode *N = WorkList.back();
-    WorkList.pop_back();
+  while (!WorkListContents.empty()) {
+    SDNode *N;
+    // The WorkListOrder holds the SDNodes in order, but it may contain duplicates.
+    // In order to avoid a linear scan, we use a set (O(log N)) to hold what the
+    // worklist *should* contain, and check the node we want to visit is should
+    // actually be visited.
+    do {
+      N = WorkListOrder.pop_back_val();
+    } while (!WorkListContents.erase(N));
 
     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
     // N is deleted from the DAG, since they too may now be dead or may have a
@@ -1050,7 +1090,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::ADD:                return visitADD(N);
   case ISD::SUB:                return visitSUB(N);
   case ISD::ADDC:               return visitADDC(N);
+  case ISD::SUBC:               return visitSUBC(N);
   case ISD::ADDE:               return visitADDE(N);
+  case ISD::SUBE:               return visitSUBE(N);
   case ISD::MUL:                return visitMUL(N);
   case ISD::SDIV:               return visitSDIV(N);
   case ISD::UDIV:               return visitUDIV(N);
@@ -1071,7 +1113,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::SRA:                return visitSRA(N);
   case ISD::SRL:                return visitSRL(N);
   case ISD::CTLZ:               return visitCTLZ(N);
+  case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
   case ISD::CTTZ:               return visitCTTZ(N);
+  case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
   case ISD::CTPOP:              return visitCTPOP(N);
   case ISD::SELECT:             return visitSELECT(N);
   case ISD::SELECT_CC:          return visitSELECT_CC(N);
@@ -1408,16 +1452,14 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
   if (VT.isInteger() && !VT.isVector()) {
     APInt LHSZero, LHSOne;
     APInt RHSZero, RHSOne;
-    APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
-    DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
+    DAG.ComputeMaskedBits(N0, LHSZero, LHSOne);
 
     if (LHSZero.getBoolValue()) {
-      DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
+      DAG.ComputeMaskedBits(N1, RHSZero, RHSOne);
 
       // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
       // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
-      if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||
-          (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
+      if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
         return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1);
     }
   }
@@ -1486,8 +1528,8 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
   EVT VT = N0.getValueType();
 
   // If the flag result is dead, turn this into an ADD.
-  if (N->hasNUsesOfValue(0, 1))
-    return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0),
+  if (!N->hasAnyUseOfValue(1))
+    return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, N1),
                      DAG.getNode(ISD::CARRY_FALSE,
                                  N->getDebugLoc(), MVT::Glue));
 
@@ -1503,16 +1545,14 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
   // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
   APInt LHSZero, LHSOne;
   APInt RHSZero, RHSOne;
-  APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
-  DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
+  DAG.ComputeMaskedBits(N0, LHSZero, LHSOne);
 
   if (LHSZero.getBoolValue()) {
-    DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
+    DAG.ComputeMaskedBits(N1, RHSZero, RHSOne);
 
     // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
     // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
-    if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||
-        (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
+    if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
       return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1),
                        DAG.getNode(ISD::CARRY_FALSE,
                                    N->getDebugLoc(), MVT::Glue));
@@ -1535,7 +1575,7 @@ SDValue DAGCombiner::visitADDE(SDNode *N) {
 
   // fold (adde x, y, false) -> (addc x, y)
   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
-    return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0);
+    return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N0, N1);
 
   return SDValue();
 }
@@ -1645,6 +1685,51 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitSUBC(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N0.getValueType();
+
+  // If the flag result is dead, turn this into an SUB.
+  if (!N->hasAnyUseOfValue(1))
+    return CombineTo(N, DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1),
+                     DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
+                                 MVT::Glue));
+
+  // fold (subc x, x) -> 0 + no borrow
+  if (N0 == N1)
+    return CombineTo(N, DAG.getConstant(0, VT),
+                     DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
+                                 MVT::Glue));
+
+  // fold (subc x, 0) -> x + no borrow
+  if (N1C && N1C->isNullValue())
+    return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
+                                        MVT::Glue));
+
+  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
+  if (N0C && N0C->isAllOnesValue())
+    return CombineTo(N, DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0),
+                     DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
+                                 MVT::Glue));
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitSUBE(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue CarryIn = N->getOperand(2);
+
+  // fold (sube x, y, false) -> (subc x, y)
+  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
+    return DAG.getNode(ISD::SUBC, N->getDebugLoc(), N->getVTList(), N0, N1);
+
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitMUL(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -1756,7 +1841,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
   if (N0C && N1C && !N1C->isNullValue())
     return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C);
   // fold (sdiv X, 1) -> X
-  if (N1C && N1C->getSExtValue() == 1LL)
+  if (N1C && N1C->getAPIntValue() == 1LL)
     return N0;
   // fold (sdiv X, -1) -> 0-X
   if (N1C && N1C->isAllOnesValue())
@@ -1770,17 +1855,15 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
                          N0, N1);
   }
   // fold (sdiv X, pow2) -> simple ops after legalize
-  if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap() &&
-      (isPowerOf2_64(N1C->getSExtValue()) ||
-       isPowerOf2_64(-N1C->getSExtValue()))) {
+  if (N1C && !N1C->isNullValue() &&
+      (N1C->getAPIntValue().isPowerOf2() ||
+       (-N1C->getAPIntValue()).isPowerOf2())) {
     // If dividing by powers of two is cheap, then don't perform the following
     // fold.
     if (TLI.isPow2DivCheap())
       return SDValue();
 
-    int64_t pow2 = N1C->getSExtValue();
-    int64_t abs2 = pow2 > 0 ? pow2 : -pow2;
-    unsigned lg2 = Log2_64(abs2);
+    unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
 
     // Splat the sign bit into the register
     SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
@@ -1800,7 +1883,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
 
     // If we're dividing by a positive value, we're done.  Otherwise, we must
     // negate the result.
-    if (pow2 > 0)
+    if (N1C->getAPIntValue().isNonNegative())
       return SRA;
 
     AddToWorkList(SRA.getNode());
@@ -1810,8 +1893,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
 
   // if integer divide is expensive and we satisfy the requirements, emit an
   // alternate sequence.
-  if (N1C && (N1C->getSExtValue() < -1 || N1C->getSExtValue() > 1) &&
-      !TLI.isIntDivCheap()) {
+  if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) {
     SDValue Op = BuildSDIV(N);
     if (Op.getNode()) return Op;
   }
@@ -2250,6 +2332,67 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
                        ORNode, N0.getOperand(1));
   }
 
+  // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
+  // Only perform this optimization after type legalization and before
+  // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
+  // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
+  // we don't want to undo this promotion.
+  // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
+  // on scalars.
+  if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR)
+      && Level == AfterLegalizeVectorOps) {
+    SDValue In0 = N0.getOperand(0);
+    SDValue In1 = N1.getOperand(0);
+    EVT In0Ty = In0.getValueType();
+    EVT In1Ty = In1.getValueType();
+    // If both incoming values are integers, and the original types are the same.
+    if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
+      SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), In0Ty, In0, In1);
+      SDValue BC = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, Op);
+      AddToWorkList(Op.getNode());
+      return BC;
+    }
+  }
+
+  // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
+  // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
+  // If both shuffles use the same mask, and both shuffle within a single
+  // vector, then it is worthwhile to move the swizzle after the operation.
+  // The type-legalizer generates this pattern when loading illegal
+  // vector types from memory. In many cases this allows additional shuffle
+  // optimizations.
+  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
+      N0.getOperand(1).getOpcode() == ISD::UNDEF &&
+      N1.getOperand(1).getOpcode() == ISD::UNDEF) {
+    ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
+    ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
+
+    assert(N0.getOperand(0).getValueType() == N1.getOperand(1).getValueType() &&
+           "Inputs to shuffles are not the same type");
+
+    unsigned NumElts = VT.getVectorNumElements();
+
+    // Check that both shuffles use the same mask. The masks are known to be of
+    // the same length because the result vector type is the same.
+    bool SameMask = true;
+    for (unsigned i = 0; i != NumElts; ++i) {
+      int Idx0 = SVN0->getMaskElt(i);
+      int Idx1 = SVN1->getMaskElt(i);
+      if (Idx0 != Idx1) {
+        SameMask = false;
+        break;
+      }
+    }
+
+    if (SameMask) {
+      SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT,
+                               N0.getOperand(0), N1.getOperand(0));
+      AddToWorkList(Op.getNode());
+      return DAG.getVectorShuffle(VT, N->getDebugLoc(), Op,
+                                  DAG.getUNDEF(VT), &SVN0->getMask()[0]);
+    }
+  }
+
   return SDValue();
 }
 
@@ -2312,6 +2455,88 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
     }
   }
+  // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> 
+  // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
+  // already be zero by virtue of the width of the base type of the load.
+  //
+  // the 'X' node here can either be nothing or an extract_vector_elt to catch
+  // more cases.
+  if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+       N0.getOperand(0).getOpcode() == ISD::LOAD) ||
+      N0.getOpcode() == ISD::LOAD) {
+    LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
+                                         N0 : N0.getOperand(0) );
+
+    // Get the constant (if applicable) the zero'th operand is being ANDed with.
+    // This can be a pure constant or a vector splat, in which case we treat the
+    // vector as a scalar and use the splat value.
+    APInt Constant = APInt::getNullValue(1);
+    if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+      Constant = C->getAPIntValue();
+    } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
+      APInt SplatValue, SplatUndef;
+      unsigned SplatBitSize;
+      bool HasAnyUndefs;
+      bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
+                                             SplatBitSize, HasAnyUndefs);
+      if (IsSplat) {
+        // Undef bits can contribute to a possible optimisation if set, so
+        // set them.
+        SplatValue |= SplatUndef;
+
+        // The splat value may be something like "0x00FFFFFF", which means 0 for
+        // the first vector value and FF for the rest, repeating. We need a mask
+        // that will apply equally to all members of the vector, so AND all the
+        // lanes of the constant together.
+        EVT VT = Vector->getValueType(0);
+        unsigned BitWidth = VT.getVectorElementType().getSizeInBits();
+        Constant = APInt::getAllOnesValue(BitWidth);
+        for (unsigned i = 0, n = VT.getVectorNumElements(); i < n; ++i)
+          Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
+      }
+    }
+
+    // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
+    // actually legal and isn't going to get expanded, else this is a false
+    // optimisation.
+    bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
+                                                    Load->getMemoryVT());
+
+    // Resize the constant to the same size as the original memory access before
+    // extension. If it is still the AllOnesValue then this AND is completely
+    // unneeded.
+    Constant =
+      Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits());
+
+    bool B;
+    switch (Load->getExtensionType()) {
+    default: B = false; break;
+    case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
+    case ISD::ZEXTLOAD:
+    case ISD::NON_EXTLOAD: B = true; break;
+    }
+
+    if (B && Constant.isAllOnesValue()) {
+      // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
+      // preserve semantics once we get rid of the AND.
+      SDValue NewLoad(Load, 0);
+      if (Load->getExtensionType() == ISD::EXTLOAD) {
+        NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
+                              Load->getValueType(0), Load->getDebugLoc(),
+                              Load->getChain(), Load->getBasePtr(),
+                              Load->getOffset(), Load->getMemoryVT(),
+                              Load->getMemOperand());
+        // Replace uses of the EXTLOAD with the new ZEXTLOAD.
+        CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
+      }
+
+      // Fold the AND away, taking care not to fold to the old load node if we
+      // replaced it.
+      CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
+
+      return SDValue(N, 0); // Return N so it doesn't get rechecked!
+    }
+  }
   // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
   if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
     ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
@@ -3323,7 +3548,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
 
   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
   //                               (and (srl x, (sub c1, c2), MASK)
-  if (N1C && N0.getOpcode() == ISD::SRL &&
+  // Only fold this if the inner shift has no other uses -- if it does, folding
+  // this will increase the total number of instructions.
+  if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
       N0.getOperand(1).getOpcode() == ISD::Constant) {
     uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
     if (c1 < VT.getSizeInBits()) {
@@ -3603,8 +3830,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   if (N1C && N0.getOpcode() == ISD::CTLZ &&
       N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) {
     APInt KnownZero, KnownOne;
-    APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
-    DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne);
+    DAG.ComputeMaskedBits(N0.getOperand(0), KnownZero, KnownOne);
 
     // If any of the input bits are KnownOne, then the input couldn't be all
     // zeros, thus the result of the srl will always be zero.
@@ -3612,7 +3838,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
 
     // If all of the bits input the to ctlz node are known to be zero, then
     // the result of the ctlz is "32" and the result of the shift is one.
-    APInt UnknownBits = ~KnownZero & Mask;
+    APInt UnknownBits = ~KnownZero;
     if (UnknownBits == 0) return DAG.getConstant(1, VT);
 
     // Otherwise, check to see if there is exactly one bit input to the ctlz.
@@ -3713,6 +3939,16 @@ SDValue DAGCombiner::visitCTLZ(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // fold (ctlz_zero_undef c1) -> c2
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0);
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
@@ -3723,6 +3959,16 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // fold (cttz_zero_undef c1) -> c2
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0);
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
@@ -4108,12 +4354,17 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
     // Only do this before legalize for now.
     if (VT.isVector() && !LegalOperations) {
       EVT N0VT = N0.getOperand(0).getValueType();
-        // We know that the # elements of the results is the same as the
-        // # elements of the compare (and the # elements of the compare result
-        // for that matter).  Check to see that they are the same size.  If so,
-        // we know that the element size of the sext'd result matches the
-        // element size of the compare operands.
-      if (VT.getSizeInBits() == N0VT.getSizeInBits())
+      // On some architectures (such as SSE/NEON/etc) the SETCC result type is
+      // of the same size as the compared operands. Only optimize sext(setcc())
+      // if this is the case.
+      EVT SVT = TLI.getSetCCResultType(N0VT);
+
+      // We know that the # elements of the results is the same as the
+      // # elements of the compare (and the # elements of the compare result
+      // for that matter).  Check to see that they are the same size.  If so,
+      // we know that the element size of the sext'd result matches the
+      // element size of the compare operands.
+      if (VT.getSizeInBits() == SVT.getSizeInBits())
         return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
                              N0.getOperand(1),
                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
@@ -4127,11 +4378,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
         EVT MatchingVectorType =
           EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
                            N0VT.getVectorNumElements());
-        SDValue VsetCC =
-          DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
-                        N0.getOperand(1),
-                        cast<CondCodeSDNode>(N0.getOperand(2))->get());
-        return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
+
+        if (SVT == MatchingVectorType) {
+          SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType,
+                                 N0.getOperand(0), N0.getOperand(1),
+                                 cast<CondCodeSDNode>(N0.getOperand(2))->get());
+          return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
+        }
       }
     }
 
@@ -4162,6 +4415,44 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
   return SDValue();
 }
 
+// isTruncateOf - If N is a truncate of some other value, return true, record
+// the value being truncated in Op and which of Op's bits are zero in KnownZero.
+// This function computes KnownZero to avoid a duplicated call to
+// ComputeMaskedBits in the caller.
+static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
+                         APInt &KnownZero) {
+  APInt KnownOne;
+  if (N->getOpcode() == ISD::TRUNCATE) {
+    Op = N->getOperand(0);
+    DAG.ComputeMaskedBits(Op, KnownZero, KnownOne);
+    return true;
+  }
+
+  if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
+      cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
+    return false;
+
+  SDValue Op0 = N->getOperand(0);
+  SDValue Op1 = N->getOperand(1);
+  assert(Op0.getValueType() == Op1.getValueType());
+
+  ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0);
+  ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1);
+  if (COp0 && COp0->isNullValue())
+    Op = Op1;
+  else if (COp1 && COp1->isNullValue())
+    Op = Op0;
+  else
+    return false;
+
+  DAG.ComputeMaskedBits(Op, KnownZero, KnownOne);
+
+  if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue())
+    return false;
+
+  return true;
+}
+
 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
@@ -4175,6 +4466,30 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
     return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT,
                        N0.getOperand(0));
 
+  // fold (zext (truncate x)) -> (zext x) or
+  //      (zext (truncate x)) -> (truncate x)
+  // This is valid when the truncated bits of x are already zero.
+  // FIXME: We should extend this to work for vectors too.
+  SDValue Op;
+  APInt KnownZero;
+  if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) {
+    APInt TruncatedBits =
+      (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
+      APInt(Op.getValueSizeInBits(), 0) :
+      APInt::getBitsSet(Op.getValueSizeInBits(),
+                        N0.getValueSizeInBits(),
+                        std::min(Op.getValueSizeInBits(),
+                                 VT.getSizeInBits()));
+    if (TruncatedBits == (KnownZero & TruncatedBits)) {
+      if (VT.bitsGT(Op.getValueType()))
+        return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, Op);
+      if (VT.bitsLT(Op.getValueType()))
+        return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
+
+      return Op;
+    }
+  }
+
   // fold (zext (truncate (load x))) -> (zext (smaller load x))
   // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
   if (N0.getOpcode() == ISD::TRUNCATE) {
@@ -4567,6 +4882,16 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
 SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
   switch (V.getOpcode()) {
   default: break;
+  case ISD::Constant: {
+    const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
+    assert(CV != 0 && "Const value should be ConstSDNode.");
+    const APInt &CVal = CV->getAPIntValue();
+    APInt NewVal = CVal & Mask;
+    if (NewVal != CVal) {
+      return DAG.getConstant(NewVal, V.getValueType());
+    }
+    break;
+  }
   case ISD::OR:
   case ISD::XOR:
     // If the LHS or RHS don't contribute bits to the or, drop them.
@@ -4705,7 +5030,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
   if (ExtType == ISD::NON_EXTLOAD)
     Load =  DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
                         LN0->getPointerInfo().getWithOffset(PtrOff),
-                        LN0->isVolatile(), LN0->isNonTemporal(), NewAlign);
+                        LN0->isVolatile(), LN0->isNonTemporal(),
+                        LN0->isInvariant(), NewAlign);
   else
     Load = DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(),NewPtr,
                           LN0->getPointerInfo().getWithOffset(PtrOff),
@@ -4844,6 +5170,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
+  bool isLE = TLI.isLittleEndian();
 
   // noop truncate
   if (N0.getValueType() == N->getValueType(0))
@@ -4871,6 +5198,44 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
       return N0.getOperand(0);
   }
 
+  // Fold extract-and-trunc into a narrow extract. For example:
+  //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
+  //   i32 y = TRUNCATE(i64 x)
+  //        -- becomes --
+  //   v16i8 b = BITCAST (v2i64 val)
+  //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
+  //
+  // Note: We only run this optimization after type legalization (which often
+  // creates this pattern) and before operation legalization after which
+  // we need to be more careful about the vector instructions that we generate.
+  if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+      LegalTypes && !LegalOperations && N0->hasOneUse()) {
+
+    EVT VecTy = N0.getOperand(0).getValueType();
+    EVT ExTy = N0.getValueType();
+    EVT TrTy = N->getValueType(0);
+
+    unsigned NumElem = VecTy.getVectorNumElements();
+    unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
+
+    EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
+    assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
+
+    SDValue EltNo = N0->getOperand(1);
+    if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
+      int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+
+      int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
+
+      SDValue V = DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+                              NVT, N0.getOperand(0));
+
+      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+                         N->getDebugLoc(), TrTy, V,
+                         DAG.getConstant(Index, MVT::i32));
+    }
+  }
+
   // See if we can simplify the input to this truncate through knowledge that
   // only the low bits are being used.
   // For example "trunc (or (shl x, 8), y)" // -> trunc y
@@ -4934,7 +5299,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
       return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(),
                          LD1->getBasePtr(), LD1->getPointerInfo(),
-                         false, false, Align);
+                         false, false, false, Align);
   }
 
   return SDValue();
@@ -5004,7 +5369,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
       SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(),
                                  LN0->getBasePtr(), LN0->getPointerInfo(),
                                  LN0->isVolatile(), LN0->isNonTemporal(),
-                                 OrigAlign);
+                                 LN0->isInvariant(), OrigAlign);
       AddToWorkList(N);
       CombineTo(N0.getNode(),
                 DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
@@ -5017,7 +5382,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
   // This often reduces constant pool loads.
-  if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) &&
+  if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(VT)) ||
+       (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(VT))) &&
       N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) {
     SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT,
                                   N0.getOperand(0));
@@ -5247,20 +5613,24 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
   if (N0CFP && !N1CFP)
     return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0);
   // fold (fadd A, 0) -> A
-  if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+  if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
+      N1CFP->getValueAPF().isZero())
     return N0;
   // fold (fadd A, (fneg B)) -> (fsub A, B)
-  if (isNegatibleForFree(N1, LegalOperations) == 2)
+  if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
+      isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
     return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0,
                        GetNegatedExpression(N1, DAG, LegalOperations));
   // fold (fadd (fneg A), B) -> (fsub B, A)
-  if (isNegatibleForFree(N0, LegalOperations) == 2)
+  if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
+      isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
     return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1,
                        GetNegatedExpression(N0, DAG, LegalOperations));
 
   // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
-  if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FADD &&
-      N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
+  if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
+      N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
+      isa<ConstantFPSDNode>(N0.getOperand(1)))
     return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0),
                        DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
                                    N0.getOperand(1), N1));
@@ -5285,20 +5655,39 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
   if (N0CFP && N1CFP && VT != MVT::ppcf128)
     return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1);
   // fold (fsub A, 0) -> A
-  if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+  if (DAG.getTarget().Options.UnsafeFPMath &&
+      N1CFP && N1CFP->getValueAPF().isZero())
     return N0;
   // fold (fsub 0, B) -> -B
-  if (UnsafeFPMath && N0CFP && N0CFP->getValueAPF().isZero()) {
-    if (isNegatibleForFree(N1, LegalOperations))
+  if (DAG.getTarget().Options.UnsafeFPMath &&
+      N0CFP && N0CFP->getValueAPF().isZero()) {
+    if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
       return GetNegatedExpression(N1, DAG, LegalOperations);
     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
       return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1);
   }
   // fold (fsub A, (fneg B)) -> (fadd A, B)
-  if (isNegatibleForFree(N1, LegalOperations))
+  if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
     return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0,
                        GetNegatedExpression(N1, DAG, LegalOperations));
 
+  // If 'unsafe math' is enabled, fold
+  //    (fsub x, (fadd x, y)) -> (fneg y) &
+  //    (fsub x, (fadd y, x)) -> (fneg y)
+  if (DAG.getTarget().Options.UnsafeFPMath) {
+    if (N1.getOpcode() == ISD::FADD) {
+      SDValue N10 = N1->getOperand(0);
+      SDValue N11 = N1->getOperand(1);
+
+      if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI,
+                                          &DAG.getTarget().Options))
+        return GetNegatedExpression(N11, DAG, LegalOperations);
+      else if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI,
+                                               &DAG.getTarget().Options))
+        return GetNegatedExpression(N10, DAG, LegalOperations);
+    }
+  }
+
   return SDValue();
 }
 
@@ -5308,6 +5697,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   EVT VT = N->getValueType(0);
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
   // fold vector ops
   if (VT.isVector()) {
@@ -5322,10 +5712,12 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
   if (N0CFP && !N1CFP)
     return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0);
   // fold (fmul A, 0) -> 0
-  if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+  if (DAG.getTarget().Options.UnsafeFPMath &&
+      N1CFP && N1CFP->getValueAPF().isZero())
     return N1;
   // fold (fmul A, 0) -> 0, vector edition.
-  if (UnsafeFPMath && ISD::isBuildVectorAllZeros(N1.getNode()))
+  if (DAG.getTarget().Options.UnsafeFPMath &&
+      ISD::isBuildVectorAllZeros(N1.getNode()))
     return N1;
   // fold (fmul X, 2.0) -> (fadd X, X)
   if (N1CFP && N1CFP->isExactlyValue(+2.0))
@@ -5336,8 +5728,10 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
       return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0);
 
   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
-  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) {
-    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) {
+  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,
+                                       &DAG.getTarget().Options)) {
+    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, 
+                                         &DAG.getTarget().Options)) {
       // Both can be negated for free, check to see if at least one is cheaper
       // negated.
       if (LHSNeg == 2 || RHSNeg == 2)
@@ -5348,7 +5742,8 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
   }
 
   // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
-  if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FMUL &&
+  if (DAG.getTarget().Options.UnsafeFPMath &&
+      N1CFP && N0.getOpcode() == ISD::FMUL &&
       N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
     return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0),
                        DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
@@ -5363,6 +5758,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   EVT VT = N->getValueType(0);
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
   // fold vector ops
   if (VT.isVector()) {
@@ -5374,10 +5770,30 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
   if (N0CFP && N1CFP && VT != MVT::ppcf128)
     return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1);
 
+  // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
+  if (N1CFP && VT != MVT::ppcf128 && DAG.getTarget().Options.UnsafeFPMath) {
+    // Compute the reciprocal 1.0 / c2.
+    APFloat N1APF = N1CFP->getValueAPF();
+    APFloat Recip(N1APF.getSemantics(), 1); // 1.0
+    APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
+    // Only do the transform if the reciprocal is a legal fp immediate that
+    // isn't too nasty (eg NaN, denormal, ...).
+    if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
+        (!LegalOperations ||
+         // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
+         // backend)... we should handle this gracefully after Legalize.
+         // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
+         TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
+         TLI.isFPImmLegal(Recip, VT)))
+      return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0,
+                         DAG.getConstantFP(Recip, VT));
+  }
 
   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
-  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) {
-    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) {
+  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,
+                                       &DAG.getTarget().Options)) {
+    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI,
+                                         &DAG.getTarget().Options)) {
       // Both can be negated for free, check to see if at least one is cheaper
       // negated.
       if (LHSNeg == 2 || RHSNeg == 2)
@@ -5463,7 +5879,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
   // fold (sint_to_fp c1) -> c1fp
   if (N0C && OpVT != MVT::ppcf128 &&
       // ...but only if the target supports immediate floating-point values
-      (Level == llvm::Unrestricted ||
+      (!LegalOperations ||
        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
     return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
 
@@ -5488,7 +5904,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
   // fold (uint_to_fp c1) -> c1fp
   if (N0C && OpVT != MVT::ppcf128 &&
       // ...but only if the target supports immediate floating-point values
-      (Level == llvm::Unrestricted ||
+      (!LegalOperations ||
        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
     return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
 
@@ -5630,12 +6046,13 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
 
-  if (isNegatibleForFree(N0, LegalOperations))
+  if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
+                         &DAG.getTarget().Options))
     return GetNegatedExpression(N0, DAG, LegalOperations);
 
   // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
   // constant pool values.
-  if (N0.getOpcode() == ISD::BITCAST &&
+  if (!TLI.isFNegFree(VT) && N0.getOpcode() == ISD::BITCAST &&
       !VT.isVector() &&
       N0.getNode()->hasOneUse() &&
       N0.getOperand(0).getValueType().isInteger()) {
@@ -5671,7 +6088,8 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
 
   // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading
   // constant pool values.
-  if (N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&
+  if (!TLI.isFAbsFree(VT) && 
+      N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&
       N0.getOperand(0).getValueType().isInteger() &&
       !N0.getOperand(0).getValueType().isVector()) {
     SDValue Int = N0.getOperand(0);
@@ -5860,6 +6278,47 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) {
   return SDValue();
 }
 
+/// canFoldInAddressingMode - Return true if 'Use' is a load or a store that
+/// uses N as its base pointer and that N may be folded in the load / store
+/// addressing mode.
+static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
+                                    SelectionDAG &DAG,
+                                    const TargetLowering &TLI) {
+  EVT VT;
+  if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
+    if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
+      return false;
+    VT = Use->getValueType(0);
+  } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
+    if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
+      return false;
+    VT = ST->getValue().getValueType();
+  } else
+    return false;
+
+  TargetLowering::AddrMode AM;
+  if (N->getOpcode() == ISD::ADD) {
+    ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+    if (Offset)
+      // [reg +/- imm]
+      AM.BaseOffs = Offset->getSExtValue();
+    else
+      // [reg +/- reg]
+      AM.Scale = 1;
+  } else if (N->getOpcode() == ISD::SUB) {
+    ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+    if (Offset)
+      // [reg +/- imm]
+      AM.BaseOffs = -Offset->getSExtValue();
+    else
+      // [reg +/- reg]
+      AM.Scale = 1;
+  } else
+    return false;
+
+  return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext()));
+}
+
 /// CombineToPreIndexedLoadStore - Try turning a load / store into a
 /// pre-indexed load / store when the base pointer is an add or subtract
 /// and it has other uses besides the load / store. After the
@@ -5867,7 +6326,7 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) {
 /// the add / subtract in and all of its other uses are redirected to the
 /// new load / store.
 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
-  if (!LegalOperations)
+  if (Level < AfterLegalizeDAG)
     return false;
 
   bool isLoad = true;
@@ -5946,10 +6405,9 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
     if (N->hasPredecessorHelper(Use, Visited, Worklist))
       return false;
 
-    if (!((Use->getOpcode() == ISD::LOAD &&
-           cast<LoadSDNode>(Use)->getBasePtr() == Ptr) ||
-          (Use->getOpcode() == ISD::STORE &&
-           cast<StoreSDNode>(Use)->getBasePtr() == Ptr)))
+    // If Ptr may be folded in addressing mode of other use, then it's
+    // not profitable to do this transformation.
+    if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
       RealUse = true;
   }
 
@@ -5999,7 +6457,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
 /// load / store effectively and all of its uses are redirected to the
 /// new load / store.
 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
-  if (!LegalOperations)
+  if (Level < AfterLegalizeDAG)
     return false;
 
   bool isLoad = true;
@@ -6046,7 +6504,8 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
         continue;
 
       // Try turning it into a post-indexed load / store except when
-      // 1) All uses are load / store ops that use it as base ptr.
+      // 1) All uses are load / store ops that use it as base ptr (and
+      //    it may be folded as addressing mmode).
       // 2) Op must be independent of N, i.e. Op is neither a predecessor
       //    nor a successor of N. Otherwise, if Op is folded that would
       //    create a cycle.
@@ -6069,10 +6528,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
           for (SDNode::use_iterator III = Use->use_begin(),
                  EEE = Use->use_end(); III != EEE; ++III) {
             SDNode *UseUse = *III;
-            if (!((UseUse->getOpcode() == ISD::LOAD &&
-                   cast<LoadSDNode>(UseUse)->getBasePtr().getNode() == Use) ||
-                  (UseUse->getOpcode() == ISD::STORE &&
-                   cast<StoreSDNode>(UseUse)->getBasePtr().getNode() == Use)))
+            if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) 
               RealUse = true;
           }
 
@@ -6139,7 +6595,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
   if (!LD->isVolatile()) {
     if (N->getValueType(1) == MVT::Other) {
       // Unindexed loads.
-      if (N->hasNUsesOfValue(0, 0)) {
+      if (!N->hasAnyUseOfValue(0)) {
         // It's not safe to use the two value CombineTo variant here. e.g.
         // v1, chain2 = load chain1, loc
         // v2, chain3 = load chain2, loc
@@ -6164,7 +6620,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
     } else {
       // Indexed loads.
       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
-      if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) {
+      if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) {
         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
         DEBUG(dbgs() << "\nReplacing.7 ";
               N->dump(&DAG);
@@ -6222,7 +6678,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
         ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(),
                                BetterChain, Ptr, LD->getPointerInfo(),
                                LD->isVolatile(), LD->isNonTemporal(),
-                               LD->getAlignment());
+                               LD->isInvariant(), LD->getAlignment());
       } else {
         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(),
                                   LD->getValueType(0),
@@ -6486,7 +6942,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
                                   LD->getChain(), NewPtr,
                                   LD->getPointerInfo().getWithOffset(PtrOff),
                                   LD->isVolatile(), LD->isNonTemporal(),
-                                  NewAlign);
+                                  LD->isInvariant(), NewAlign);
       SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD,
                                    DAG.getConstant(NewImm, NewVT));
       SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(),
@@ -6546,7 +7002,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
     SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(),
                                 LD->getChain(), LD->getBasePtr(),
                                 LD->getPointerInfo(),
-                                false, false, LDAlign);
+                                false, false, false, LDAlign);
 
     SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(),
                                  NewLD, ST->getBasePtr(),
@@ -6823,13 +7279,14 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
   // (vextract (scalar_to_vector val, 0) -> val
   SDValue InVec = N->getOperand(0);
+  EVT VT = InVec.getValueType();
+  EVT NVT = N->getValueType(0);
 
   if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
     // Check if the result type doesn't match the inserted element type. A
     // SCALAR_TO_VECTOR may truncate the inserted element and the
     // EXTRACT_VECTOR_ELT may widen the extracted vector.
     SDValue InOp = InVec.getOperand(0);
-    EVT NVT = N->getValueType(0);
     if (InOp.getValueType() != NVT) {
       assert(InOp.getValueType().isInteger() && NVT.isInteger());
       return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT);
@@ -6837,6 +7294,38 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
     return InOp;
   }
 
+  SDValue EltNo = N->getOperand(1);
+  bool ConstEltNo = isa<ConstantSDNode>(EltNo);
+
+  // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
+  // We only perform this optimization before the op legalization phase because
+  // we may introduce new vector instructions which are not backed by TD patterns.
+  // For example on AVX, extracting elements from a wide vector without using
+  // extract_subvector.
+  if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE
+      && ConstEltNo && !LegalOperations) {
+    int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+    int NumElem = VT.getVectorNumElements();
+    ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
+    // Find the new index to extract from.
+    int OrigElt = SVOp->getMaskElt(Elt);
+
+    // Extracting an undef index is undef.
+    if (OrigElt == -1)
+      return DAG.getUNDEF(NVT);
+
+    // Select the right vector half to extract from.
+    if (OrigElt < NumElem) {
+      InVec = InVec->getOperand(0);
+    } else {
+      InVec = InVec->getOperand(1);
+      OrigElt -= NumElem;
+    }
+
+    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), NVT,
+                       InVec, DAG.getConstant(OrigElt, MVT::i32));
+  }
+
   // Perform only after legalization to ensure build_vector / vector_shuffle
   // optimizations have already been done.
   if (!LegalOperations) return SDValue();
@@ -6844,17 +7333,24 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
-  SDValue EltNo = N->getOperand(1);
 
-  if (isa<ConstantSDNode>(EltNo)) {
+  if (ConstEltNo) {
     int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
     bool NewLoad = false;
     bool BCNumEltsChanged = false;
-    EVT VT = InVec.getValueType();
     EVT ExtVT = VT.getVectorElementType();
     EVT LVT = ExtVT;
 
+    // If the result of load has to be truncated, then it's not necessarily
+    // profitable.
+    if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
+      return SDValue();
+
     if (InVec.getOpcode() == ISD::BITCAST) {
+      // Don't duplicate a load with other uses.
+      if (!InVec.hasOneUse())
+        return SDValue();
+
       EVT BCVT = InVec.getOperand(0).getValueType();
       if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
         return SDValue();
@@ -6872,12 +7368,20 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
     } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
                InVec.getOperand(0).getValueType() == ExtVT &&
                ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
+      // Don't duplicate a load with other uses.
+      if (!InVec.hasOneUse())
+        return SDValue();
+
       LN0 = cast<LoadSDNode>(InVec.getOperand(0));
     } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
       // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
       // =>
       // (load $addr+1*size)
 
+      // Don't duplicate a load with other uses.
+      if (!InVec.hasOneUse())
+        return SDValue();
+
       // If the bit convert changed the number of elements, it is unsafe
       // to examine the mask.
       if (BCNumEltsChanged)
@@ -6888,14 +7392,21 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
       int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
       InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
 
-      if (InVec.getOpcode() == ISD::BITCAST)
+      if (InVec.getOpcode() == ISD::BITCAST) {
+        // Don't duplicate a load with other uses.
+        if (!InVec.hasOneUse())
+          return SDValue();
+
         InVec = InVec.getOperand(0);
+      }
       if (ISD::isNormalLoad(InVec.getNode())) {
         LN0 = cast<LoadSDNode>(InVec);
         Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
       }
     }
 
+    // Make sure we found a non-volatile load and the extractelement is
+    // the only use.
     if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
       return SDValue();
 
@@ -6929,9 +7440,45 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
                            DAG.getConstant(PtrOff, PtrType));
     }
 
-    return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
-                       LN0->getPointerInfo().getWithOffset(PtrOff),
-                       LN0->isVolatile(), LN0->isNonTemporal(), Align);
+    // The replacement we need to do here is a little tricky: we need to
+    // replace an extractelement of a load with a load.
+    // Use ReplaceAllUsesOfValuesWith to do the replacement.
+    // Note that this replacement assumes that the extractvalue is the only
+    // use of the load; that's okay because we don't want to perform this
+    // transformation in other cases anyway.
+    SDValue Load;
+    SDValue Chain;
+    if (NVT.bitsGT(LVT)) {
+      // If the result type of vextract is wider than the load, then issue an
+      // extending load instead.
+      ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT)
+        ? ISD::ZEXTLOAD : ISD::EXTLOAD;
+      Load = DAG.getExtLoad(ExtType, N->getDebugLoc(), NVT, LN0->getChain(),
+                            NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff),
+                            LVT, LN0->isVolatile(), LN0->isNonTemporal(),Align);
+      Chain = Load.getValue(1);
+    } else {
+      Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
+                         LN0->getPointerInfo().getWithOffset(PtrOff),
+                         LN0->isVolatile(), LN0->isNonTemporal(), 
+                         LN0->isInvariant(), Align);
+      Chain = Load.getValue(1);
+      if (NVT.bitsLT(LVT))
+        Load = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Load);
+      else
+        Load = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), NVT, Load);
+    }
+    WorkListRemover DeadNodes(*this);
+    SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) };
+    SDValue To[] = { Load, Chain };
+    DAG.ReplaceAllUsesOfValuesWith(From, To, 2, &DeadNodes);
+    // Since we're explcitly calling ReplaceAllUses, add the new node to the
+    // worklist explicitly as well.
+    AddToWorkList(Load.getNode());
+    AddUsersToWorkList(Load.getNode()); // Add users too
+    // Make sure to revisit this node to clean it up; it will usually be dead.
+    AddToWorkList(N);
+    return SDValue(N, 0);
   }
 
   return SDValue();
@@ -6939,11 +7486,122 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
 
 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
   unsigned NumInScalars = N->getNumOperands();
+  DebugLoc dl = N->getDebugLoc();
   EVT VT = N->getValueType(0);
+  // Check to see if this is a BUILD_VECTOR of a bunch of values
+  // which come from any_extend or zero_extend nodes. If so, we can create
+  // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
+  // optimizations. We do not handle sign-extend because we can't fill the sign
+  // using shuffles.
+  EVT SourceType = MVT::Other;
+  bool AllAnyExt = true;
+  bool AllUndef = true;
+  for (unsigned i = 0; i != NumInScalars; ++i) {
+    SDValue In = N->getOperand(i);
+    // Ignore undef inputs.
+    if (In.getOpcode() == ISD::UNDEF) continue;
+    AllUndef = false;
+
+    bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
+    bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
+
+    // Abort if the element is not an extension.
+    if (!ZeroExt && !AnyExt) {
+      SourceType = MVT::Other;
+      break;
+    }
+
+    // The input is a ZeroExt or AnyExt. Check the original type.
+    EVT InTy = In.getOperand(0).getValueType();
+
+    // Check that all of the widened source types are the same.
+    if (SourceType == MVT::Other)
+      // First time.
+      SourceType = InTy;
+    else if (InTy != SourceType) {
+      // Multiple income types. Abort.
+      SourceType = MVT::Other;
+      break;
+    }
+
+    // Check if all of the extends are ANY_EXTENDs.
+    AllAnyExt &= AnyExt;
+  }
+
+  if (AllUndef)
+    return DAG.getUNDEF(VT);
+
+  // In order to have valid types, all of the inputs must be extended from the
+  // same source type and all of the inputs must be any or zero extend.
+  // Scalar sizes must be a power of two.
+  EVT OutScalarTy = N->getValueType(0).getScalarType();
+  bool ValidTypes = SourceType != MVT::Other &&
+                 isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
+                 isPowerOf2_32(SourceType.getSizeInBits());
+
+  // We perform this optimization post type-legalization because
+  // the type-legalizer often scalarizes integer-promoted vectors.
+  // Performing this optimization before may create bit-casts which
+  // will be type-legalized to complex code sequences.
+  // We perform this optimization only before the operation legalizer because we
+  // may introduce illegal operations.
+  // Create a new simpler BUILD_VECTOR sequence which other optimizations can
+  // turn into a single shuffle instruction.
+  if ((Level == AfterLegalizeVectorOps || Level == AfterLegalizeTypes) &&
+      ValidTypes) {
+    bool isLE = TLI.isLittleEndian();
+    unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
+    assert(ElemRatio > 1 && "Invalid element size ratio");
+    SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
+                                 DAG.getConstant(0, SourceType);
+
+    unsigned NewBVElems = ElemRatio * N->getValueType(0).getVectorNumElements();
+    SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
+
+    // Populate the new build_vector
+    for (unsigned i=0; i < N->getNumOperands(); ++i) {
+      SDValue Cast = N->getOperand(i);
+      assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
+              Cast.getOpcode() == ISD::ZERO_EXTEND ||
+              Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode");
+      SDValue In;
+      if (Cast.getOpcode() == ISD::UNDEF)
+        In = DAG.getUNDEF(SourceType);
+      else
+        In = Cast->getOperand(0);
+      unsigned Index = isLE ? (i * ElemRatio) :
+                              (i * ElemRatio + (ElemRatio - 1));
+
+      assert(Index < Ops.size() && "Invalid index");
+      Ops[Index] = In;
+    }
+
+    // The type of the new BUILD_VECTOR node.
+    EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
+    assert(VecVT.getSizeInBits() == N->getValueType(0).getSizeInBits() &&
+           "Invalid vector size");
+    // Check if the new vector type is legal.
+    if (!isTypeLegal(VecVT)) return SDValue();
+
+    // Make the new BUILD_VECTOR.
+    SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+                                 VecVT, &Ops[0], Ops.size());
+
+    // The new BUILD_VECTOR node has the potential to be further optimized.
+    AddToWorkList(BV.getNode());
+    // Bitcast to the desired type.
+    return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), BV);
+  }
 
   // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
   // operations.  If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
   // at most two distinct vectors, turn this into a shuffle node.
+
+  // May only combine to shuffle after legalize if shuffle is legal.
+  if (LegalOperations &&
+      !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT))
+    return SDValue();
+
   SDValue VecIn1, VecIn2;
   for (unsigned i = 0; i != NumInScalars; ++i) {
     // Ignore undef inputs.
@@ -6957,15 +7615,8 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
       break;
     }
 
-    // If the input vector type disagrees with the result of the build_vector,
-    // we can't make a shuffle.
+    // We allow up to two distinct input vectors.
     SDValue ExtractedFromVec = N->getOperand(i).getOperand(0);
-    if (ExtractedFromVec.getValueType() != VT) {
-      VecIn1 = VecIn2 = SDValue(0, 0);
-      break;
-    }
-
-    // Otherwise, remember this.  We allow up to two distinct input vectors.
     if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
       continue;
 
@@ -6980,7 +7631,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
     }
   }
 
-  // If everything is good, we can make a shuffle operation.
+    // If everything is good, we can make a shuffle operation.
   if (VecIn1.getNode()) {
     SmallVector<int, 8> Mask;
     for (unsigned i = 0; i != NumInScalars; ++i) {
@@ -7006,14 +7657,39 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
       Mask.push_back(Idx+NumInScalars);
     }
 
-    // Add count and size info.
+    // We can't generate a shuffle node with mismatched input and output types.
+    // Attempt to transform a single input vector to the correct type.
+    if ((VT != VecIn1.getValueType())) {
+      // We don't support shuffeling between TWO values of different types.
+      if (VecIn2.getNode() != 0)
+        return SDValue();
+
+      // We only support widening of vectors which are half the size of the
+      // output registers. For example XMM->YMM widening on X86 with AVX.
+      if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits())
+        return SDValue();
+
+      // Widen the input vector by adding undef values.
+      VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
+                           VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
+    }
+
+    // If VecIn2 is unused then change it to undef.
+    VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+
+    // Check that we were able to transform all incoming values to the same type.
+    if (VecIn2.getValueType() != VecIn1.getValueType() ||
+        VecIn1.getValueType() != VT)
+          return SDValue();
+
+    // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
     if (!isTypeLegal(VT))
       return SDValue();
 
     // Return the new VECTOR_SHUFFLE node.
     SDValue Ops[2];
     Ops[0] = VecIn1;
-    Ops[1] = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+    Ops[1] = VecIn2;
     return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]);
   }
 
@@ -7045,19 +7721,23 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
     if (NVT != SmallVT || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
       return SDValue();
 
-    // Combine:
-    //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
-    // Into:
-    //    indicies are equal => V1
-    //    otherwise => (extract_subvec V1, ExtIdx)
-    //
-    SDValue InsIdx = N->getOperand(1);
-    SDValue ExtIdx = V->getOperand(2);
+    // Only handle cases where both indexes are constants with the same type.
+    ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
+    ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
 
-    if (InsIdx == ExtIdx)
-      return V->getOperand(1);
-    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT,
-                       V->getOperand(0), N->getOperand(1));
+    if (InsIdx && ExtIdx &&
+        InsIdx->getValueType(0).getSizeInBits() <= 64 &&
+        ExtIdx->getValueType(0).getSizeInBits() <= 64) {
+      // Combine:
+      //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
+      // Into:
+      //    indices are equal => V1
+      //    otherwise => (extract_subvec V1, ExtIdx)
+      if (InsIdx->getZExtValue() == ExtIdx->getZExtValue())
+        return V->getOperand(1);
+      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT,
+                         V->getOperand(0), N->getOperand(1));
+    }
   }
 
   return SDValue();
@@ -7068,15 +7748,63 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
   unsigned NumElts = VT.getVectorNumElements();
 
   SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
+
+  // Canonicalize shuffle undef, undef -> undef
+  if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
+    return DAG.getUNDEF(VT);
+
+  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
 
-  assert(N0.getValueType().getVectorNumElements() == NumElts &&
-        "Vector shuffle must be normalized in DAG");
+  // Canonicalize shuffle v, v -> v, undef
+  if (N0 == N1) {
+    SmallVector<int, 8> NewMask;
+    for (unsigned i = 0; i != NumElts; ++i) {
+      int Idx = SVN->getMaskElt(i);
+      if (Idx >= (int)NumElts) Idx -= NumElts;
+      NewMask.push_back(Idx);
+    }
+    return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, DAG.getUNDEF(VT),
+                                &NewMask[0]);
+  }
+
+  // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
+  if (N0.getOpcode() == ISD::UNDEF) {
+    SmallVector<int, 8> NewMask;
+    for (unsigned i = 0; i != NumElts; ++i) {
+      int Idx = SVN->getMaskElt(i);
+      if (Idx >= 0) {
+        if (Idx < (int)NumElts)
+          Idx += NumElts;
+        else
+          Idx -= NumElts;
+      }
+      NewMask.push_back(Idx);
+    }
+    return DAG.getVectorShuffle(VT, N->getDebugLoc(), N1, DAG.getUNDEF(VT),
+                                &NewMask[0]);
+  }
 
-  // FIXME: implement canonicalizations from DAG.getVectorShuffle()
+  // Remove references to rhs if it is undef
+  if (N1.getOpcode() == ISD::UNDEF) {
+    bool Changed = false;
+    SmallVector<int, 8> NewMask;
+    for (unsigned i = 0; i != NumElts; ++i) {
+      int Idx = SVN->getMaskElt(i);
+      if (Idx >= (int)NumElts) {
+        Idx = -1;
+        Changed = true;
+      }
+      NewMask.push_back(Idx);
+    }
+    if (Changed)
+      return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, N1, &NewMask[0]);
+  }
 
   // If it is a splat, check if the argument vector is another splat or a
   // build_vector with all scalar elements the same.
-  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
     SDNode *V = N0.getNode();
 
@@ -7115,6 +7843,40 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
         return N0;
     }
   }
+
+  // If this shuffle node is simply a swizzle of another shuffle node,
+  // and it reverses the swizzle of the previous shuffle then we can
+  // optimize shuffle(shuffle(x, undef), undef) -> x.
+  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
+      N1.getOpcode() == ISD::UNDEF) {
+
+    ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
+
+    // Shuffle nodes can only reverse shuffles with a single non-undef value.
+    if (N0.getOperand(1).getOpcode() != ISD::UNDEF)
+      return SDValue();
+
+    // The incoming shuffle must be of the same type as the result of the
+    // current shuffle.
+    assert(OtherSV->getOperand(0).getValueType() == VT &&
+           "Shuffle types don't match");
+
+    for (unsigned i = 0; i != NumElts; ++i) {
+      int Idx = SVN->getMaskElt(i);
+      assert(Idx < (int)NumElts && "Index references undef operand");
+      // Next, this index comes from the first value, which is the incoming
+      // shuffle. Adopt the incoming index.
+      if (Idx >= 0)
+        Idx = OtherSV->getMaskElt(Idx);
+
+      // The combined shuffle must map each index to itself.
+      if (Idx >= 0 && (unsigned)Idx != i)
+        return SDValue();
+    }
+
+    return OtherSV->getOperand(0);
+  }
+
   return SDValue();
 }
 
@@ -7190,7 +7952,8 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
         SDValue Elt = RHS.getOperand(i);
         if (!isa<ConstantSDNode>(Elt))
           return SDValue();
-        else if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
+
+        if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
           Indices.push_back(i);
         else if (cast<ConstantSDNode>(Elt)->isNullValue())
           Indices.push_back(NumElts);
@@ -7261,8 +8024,19 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
       }
 
       EVT VT = LHSOp.getValueType();
-      assert(RHSOp.getValueType() == VT &&
-             "SimplifyVBinOp with different BUILD_VECTOR element types");
+      EVT RVT = RHSOp.getValueType();
+      if (RVT != VT) {
+        // Integer BUILD_VECTOR operands may have types larger than the element
+        // size (e.g., when the element type is not legal).  Prior to type
+        // legalization, the types may not match between the two BUILD_VECTORS.
+        // Truncate one of the operands to make them match.
+        if (RVT.getSizeInBits() > VT.getSizeInBits()) {
+          RHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, RHSOp);
+        } else {
+          LHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), RVT, LHSOp);
+          VT = RVT;
+        }
+      }
       SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), VT,
                                    LHSOp, RHSOp);
       if (FoldOp.getOpcode() != ISD::UNDEF &&
@@ -7374,8 +8148,8 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
 
       if ((LLD->hasAnyUseOfValue(1) &&
            (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
-          (LLD->hasAnyUseOfValue(1) &&
-           (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))))
+          (RLD->hasAnyUseOfValue(1) &&
+           (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
         return false;
 
       Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
@@ -7393,7 +8167,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
                          // FIXME: Discards pointer info.
                          LLD->getChain(), Addr, MachinePointerInfo(),
                          LLD->isVolatile(), LLD->isNonTemporal(),
-                         LLD->getAlignment());
+                         LLD->isInvariant(), LLD->getAlignment());
     } else {
       Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ?
                             RLD->getExtensionType() : LLD->getExtensionType(),
@@ -7509,7 +8283,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
         AddToWorkList(CPIdx.getNode());
         return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
                            MachinePointerInfo::getConstantPool(), false,
-                           false, Alignment);
+                           false, false, Alignment);
 
       }
     }
@@ -7517,8 +8291,6 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
   // Check to see if we can perform the "gzip trick", transforming
   // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
   if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT &&
-      N0.getValueType().isInteger() &&
-      N2.getValueType().isInteger() &&
       (N1C->isNullValue() ||                         // (a < 0) ? b : 0
        (N1C->getAPIntValue() == 1 && N0 == N2))) {   // (a < 1) ? a : 0
     EVT XType = N0.getValueType();
@@ -7720,7 +8492,7 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
   std::vector<SDNode*> Built;
-  SDValue S = TLI.BuildSDIV(N, DAG, &Built);
+  SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, &Built);
 
   for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
        ii != ee; ++ii)
@@ -7734,7 +8506,7 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) {
 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
   std::vector<SDNode*> Built;
-  SDValue S = TLI.BuildUDIV(N, DAG, &Built);
+  SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, &Built);
 
   for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
        ii != ee; ++ii)
@@ -7856,30 +8628,20 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
 /// FindAliasInfo - Extracts the relevant alias information from the memory
 /// node.  Returns true if the operand was a load.
 bool DAGCombiner::FindAliasInfo(SDNode *N,
-                        SDValue &Ptr, int64_t &Size,
-                        const Value *&SrcValue,
-                        int &SrcValueOffset,
-                        unsigned &SrcValueAlign,
-                        const MDNode *&TBAAInfo) const {
-  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
-    Ptr = LD->getBasePtr();
-    Size = LD->getMemoryVT().getSizeInBits() >> 3;
-    SrcValue = LD->getSrcValue();
-    SrcValueOffset = LD->getSrcValueOffset();
-    SrcValueAlign = LD->getOriginalAlignment();
-    TBAAInfo = LD->getTBAAInfo();
-    return true;
-  }
-  if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
-    Ptr = ST->getBasePtr();
-    Size = ST->getMemoryVT().getSizeInBits() >> 3;
-    SrcValue = ST->getSrcValue();
-    SrcValueOffset = ST->getSrcValueOffset();
-    SrcValueAlign = ST->getOriginalAlignment();
-    TBAAInfo = ST->getTBAAInfo();
-    return false;
-  }
-  llvm_unreachable("FindAliasInfo expected a memory operand");
+                                SDValue &Ptr, int64_t &Size,
+                                const Value *&SrcValue,
+                                int &SrcValueOffset,
+                                unsigned &SrcValueAlign,
+                                const MDNode *&TBAAInfo) const {
+  LSBaseSDNode *LS = cast<LSBaseSDNode>(N);
+
+  Ptr = LS->getBasePtr();
+  Size = LS->getMemoryVT().getSizeInBits() >> 3;
+  SrcValue = LS->getSrcValue();
+  SrcValueOffset = LS->getSrcValueOffset();
+  SrcValueAlign = LS->getOriginalAlignment();
+  TBAAInfo = LS->getTBAAInfo();
+  return isa<LoadSDNode>(LS);
 }
 
 /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index e8f8c73d6883..0c1ac6982d2a 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -39,6 +39,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "isel"
 #include "llvm/Function.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/Instructions.h"
@@ -58,8 +59,15 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
+STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by "
+          "target-independent selector");
+STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by "
+          "target-specific selector");
+STATISTIC(NumFastIselDead, "Number of dead insts removed on failure");
+
 /// startNewBlock - Set the current block to which generated machine
 /// instructions will be appended, and clear the local CSE map.
 ///
@@ -96,6 +104,11 @@ bool FastISel::hasTrivialKill(const Value *V) const {
         !hasTrivialKill(Cast->getOperand(0)))
       return false;
 
+  // GEPs with all zero indices are trivially coalesced by fast-isel.
+  if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I))
+    if (GEP->hasAllZeroIndices() && !hasTrivialKill(GEP->getOperand(0)))
+      return false;
+
   // Only instructions with a single use in the same basic block are considered
   // to have trivial kills.
   return I->hasOneUse() &&
@@ -123,15 +136,8 @@ unsigned FastISel::getRegForValue(const Value *V) {
       return 0;
   }
 
-  // Look up the value to see if we already have a register for it. We
-  // cache values defined by Instructions across blocks, and other values
-  // only locally. This is because Instructions already have the SSA
-  // def-dominates-use requirement enforced.
-  DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V);
-  if (I != FuncInfo.ValueMap.end())
-    return I->second;
-
-  unsigned Reg = LocalValueMap[V];
+  // Look up the value to see if we already have a register for it.
+  unsigned Reg = lookUpRegForValue(V);
   if (Reg != 0)
     return Reg;
 
@@ -186,7 +192,7 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
       uint32_t IntBitWidth = IntVT.getSizeInBits();
       bool isExact;
       (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,
-                                APFloat::rmTowardZero, &isExact);
+                                  APFloat::rmTowardZero, &isExact);
       if (isExact) {
         APInt IntVal(IntBitWidth, x);
 
@@ -297,6 +303,18 @@ void FastISel::recomputeInsertPt() {
     ++FuncInfo.InsertPt;
 }
 
+void FastISel::removeDeadCode(MachineBasicBlock::iterator I,
+                              MachineBasicBlock::iterator E) {
+  assert (I && E && std::distance(I, E) > 0 && "Invalid iterator!");
+  while (I != E) {
+    MachineInstr *Dead = &*I;
+    ++I;
+    Dead->eraseFromParent();
+    ++NumFastIselDead;
+  }
+  recomputeInsertPt();
+}
+
 FastISel::SavePoint FastISel::enterLocalValueArea() {
   MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt;
   DebugLoc OldDL = DL;
@@ -377,6 +395,13 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
       ISDOpcode = ISD::SRA;
     }
 
+    // Transform "urem x, pow2" -> "and x, pow2-1".
+    if (ISDOpcode == ISD::UREM && isa<BinaryOperator>(I) &&
+        isPowerOf2_64(Imm)) {
+      --Imm;
+      ISDOpcode = ISD::AND;
+    }
+
     unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0,
                                       Op0IsKill, Imm, VT.getSimpleVT());
     if (ResultReg == 0) return false;
@@ -427,6 +452,11 @@ bool FastISel::SelectGetElementPtr(const User *I) {
 
   bool NIsKill = hasTrivialKill(I->getOperand(0));
 
+  // Keep a running tab of the total offset to coalesce multiple N = N + Offset
+  // into a single N = N + TotalOffset.
+  uint64_t TotalOffs = 0;
+  // FIXME: What's a good SWAG number for MaxOffs?
+  uint64_t MaxOffs = 2048;
   Type *Ty = I->getOperand(0)->getType();
   MVT VT = TLI.getPointerTy();
   for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1,
@@ -436,14 +466,15 @@ bool FastISel::SelectGetElementPtr(const User *I) {
       unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
       if (Field) {
         // N = N + Offset
-        uint64_t Offs = TD.getStructLayout(StTy)->getElementOffset(Field);
-        // FIXME: This can be optimized by combining the add with a
-        // subsequent one.
-        N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT);
-        if (N == 0)
-          // Unhandled operand. Halt "fast" selection and bail.
-          return false;
-        NIsKill = true;
+        TotalOffs += TD.getStructLayout(StTy)->getElementOffset(Field);
+        if (TotalOffs >= MaxOffs) {
+          N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+          if (N == 0)
+            // Unhandled operand. Halt "fast" selection and bail.
+            return false;
+          NIsKill = true;
+          TotalOffs = 0;
+        }
       }
       Ty = StTy->getElementType(Field);
     } else {
@@ -452,14 +483,26 @@ bool FastISel::SelectGetElementPtr(const User *I) {
       // If this is a constant subscript, handle it quickly.
       if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
         if (CI->isZero()) continue;
-        uint64_t Offs =
+        // N = N + Offset
+        TotalOffs += 
           TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
-        N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT);
+        if (TotalOffs >= MaxOffs) {
+          N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+          if (N == 0)
+            // Unhandled operand. Halt "fast" selection and bail.
+            return false;
+          NIsKill = true;
+          TotalOffs = 0;
+        }
+        continue;
+      }
+      if (TotalOffs) {
+        N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
         if (N == 0)
           // Unhandled operand. Halt "fast" selection and bail.
           return false;
         NIsKill = true;
-        continue;
+        TotalOffs = 0;
       }
 
       // N = N + Idx * ElementSize;
@@ -484,6 +527,12 @@ bool FastISel::SelectGetElementPtr(const User *I) {
         return false;
     }
   }
+  if (TotalOffs) {
+    N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+    if (N == 0)
+      // Unhandled operand. Halt "fast" selection and bail.
+      return false;
+  }
 
   // We successfully emitted code for the given LLVM Instruction.
   UpdateValueMap(I, N);
@@ -512,21 +561,32 @@ bool FastISel::SelectCall(const User *I) {
     return true;
   }
 
+  MachineModuleInfo &MMI = FuncInfo.MF->getMMI();
+  ComputeUsesVAFloatArgument(*Call, &MMI);
+
   const Function *F = Call->getCalledFunction();
   if (!F) return false;
 
   // Handle selected intrinsic function calls.
   switch (F->getIntrinsicID()) {
   default: break;
+    // At -O0 we don't care about the lifetime intrinsics.
+  case Intrinsic::lifetime_start:
+  case Intrinsic::lifetime_end:
+    return true;
   case Intrinsic::dbg_declare: {
     const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call);
     if (!DIVariable(DI->getVariable()).Verify() ||
-        !FuncInfo.MF->getMMI().hasDebugInfo())
+        !FuncInfo.MF->getMMI().hasDebugInfo()) {
+      DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
       return true;
+    }
 
     const Value *Address = DI->getAddress();
-    if (!Address || isa<UndefValue>(Address) || isa<AllocaInst>(Address))
+    if (!Address || isa<UndefValue>(Address)) {
+      DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
       return true;
+    }
 
     unsigned Reg = 0;
     unsigned Offset = 0;
@@ -534,16 +594,36 @@ bool FastISel::SelectCall(const User *I) {
       // Some arguments' frame index is recorded during argument lowering.
       Offset = FuncInfo.getArgumentFrameIndex(Arg);
       if (Offset)
-	Reg = TRI.getFrameRegister(*FuncInfo.MF);
+        Reg = TRI.getFrameRegister(*FuncInfo.MF);
     }
     if (!Reg)
-      Reg = getRegForValue(Address);
+      Reg = lookUpRegForValue(Address);
+
+    // If we have a VLA that has a "use" in a metadata node that's then used
+    // here but it has no other uses, then we have a problem. E.g.,
+    //
+    //   int foo (const int *x) {
+    //     char a[*x];
+    //     return 0;
+    //   }
+    //
+    // If we assign 'a' a vreg and fast isel later on has to use the selection
+    // DAG isel, it will want to copy the value to the vreg. However, there are
+    // no uses, which goes counter to what selection DAG isel expects.
+    if (!Reg && !Address->use_empty() && isa<Instruction>(Address) &&
+        (!isa<AllocaInst>(Address) ||
+         !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(Address))))
+      Reg = FuncInfo.InitializeRegForValue(Address);
 
     if (Reg)
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
               TII.get(TargetOpcode::DBG_VALUE))
         .addReg(Reg, RegState::Debug).addImm(Offset)
         .addMetadata(DI->getVariable());
+    else
+      // We can't yet handle anything else here because it would require
+      // generating code, thus altering codegen because of debug info.
+      DEBUG(dbgs() << "Dropping debug info for " << DI);
     return true;
   }
   case Intrinsic::dbg_value: {
@@ -581,60 +661,6 @@ bool FastISel::SelectCall(const User *I) {
     }
     return true;
   }
-  case Intrinsic::eh_exception: {
-    EVT VT = TLI.getValueType(Call->getType());
-    if (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)!=TargetLowering::Expand)
-      break;
-
-    assert(FuncInfo.MBB->isLandingPad() &&
-           "Call to eh.exception not in landing pad!");
-    unsigned Reg = TLI.getExceptionAddressRegister();
-    const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
-    unsigned ResultReg = createResultReg(RC);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
-            ResultReg).addReg(Reg);
-    UpdateValueMap(Call, ResultReg);
-    return true;
-  }
-  case Intrinsic::eh_selector: {
-    EVT VT = TLI.getValueType(Call->getType());
-    if (TLI.getOperationAction(ISD::EHSELECTION, VT) != TargetLowering::Expand)
-      break;
-    if (FuncInfo.MBB->isLandingPad())
-      AddCatchInfo(*Call, &FuncInfo.MF->getMMI(), FuncInfo.MBB);
-    else {
-#ifndef NDEBUG
-      FuncInfo.CatchInfoLost.insert(Call);
-#endif
-      // FIXME: Mark exception selector register as live in.  Hack for PR1508.
-      unsigned Reg = TLI.getExceptionSelectorRegister();
-      if (Reg) FuncInfo.MBB->addLiveIn(Reg);
-    }
-
-    unsigned Reg = TLI.getExceptionSelectorRegister();
-    EVT SrcVT = TLI.getPointerTy();
-    const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT);
-    unsigned ResultReg = createResultReg(RC);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
-            ResultReg).addReg(Reg);
-
-    bool ResultRegIsKill = hasTrivialKill(Call);
-
-    // Cast the register to the type of the selector.
-    if (SrcVT.bitsGT(MVT::i32))
-      ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE,
-                             ResultReg, ResultRegIsKill);
-    else if (SrcVT.bitsLT(MVT::i32))
-      ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32,
-                             ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill);
-    if (ResultReg == 0)
-      // Unhandled operand. Halt "fast" selection and bail.
-      return false;
-
-    UpdateValueMap(Call, ResultReg);
-
-    return true;
-  }
   case Intrinsic::objectsize: {
     ConstantInt *CI = cast<ConstantInt>(Call->getArgOperand(1));
     unsigned long long Res = CI->isZero() ? -1ULL : 0;
@@ -726,8 +752,8 @@ bool FastISel::SelectBitCast(const User *I) {
   // First, try to perform the bitcast by inserting a reg-reg copy.
   unsigned ResultReg = 0;
   if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) {
-    TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT);
-    TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT);
+    const TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT);
+    const TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT);
     // Don't attempt a cross-class copy. It will likely fail.
     if (SrcClass == DstClass) {
       ResultReg = createResultReg(DstClass);
@@ -758,17 +784,33 @@ FastISel::SelectInstruction(const Instruction *I) {
 
   DL = I->getDebugLoc();
 
+  MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt;
+
   // First, try doing target-independent selection.
   if (SelectOperator(I, I->getOpcode())) {
+    ++NumFastIselSuccessIndependent;
     DL = DebugLoc();
     return true;
   }
+  // Remove dead code.  However, ignore call instructions since we've flushed 
+  // the local value map and recomputed the insert point.
+  if (!isa<CallInst>(I)) {
+    recomputeInsertPt();
+    if (SavedInsertPt != FuncInfo.InsertPt)
+      removeDeadCode(FuncInfo.InsertPt, SavedInsertPt);
+  }
 
   // Next, try calling the target to attempt to handle the instruction.
+  SavedInsertPt = FuncInfo.InsertPt;
   if (TargetSelectInstruction(I)) {
+    ++NumFastIselSuccessTarget;
     DL = DebugLoc();
     return true;
   }
+  // Check for dead code and remove as necessary.
+  recomputeInsertPt();
+  if (SavedInsertPt != FuncInfo.InsertPt)
+    removeDeadCode(FuncInfo.InsertPt, SavedInsertPt);
 
   DL = DebugLoc();
   return false;
@@ -779,8 +821,11 @@ FastISel::SelectInstruction(const Instruction *I) {
 /// the CFG.
 void
 FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) {
-  if (FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
-    // The unconditional fall-through case, which needs no instructions.
+
+  if (FuncInfo.MBB->getBasicBlock()->size() > 1 && FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
+    // For more accurate line information if this is the only instruction
+    // in the block then emit it, otherwise we have the unconditional
+    // fall-through case, which needs no instructions.
   } else {
     // The unconditional branch case.
     TII.InsertBranch(*FuncInfo.MBB, MSucc, NULL,
@@ -1354,8 +1399,8 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
       // exactly one register for each non-void instruction.
       EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
       if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
-        // Promote MVT::i1.
-        if (VT == MVT::i1)
+        // Handle integer promotions, though, because they're common and easy.
+        if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
           VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT);
         else {
           FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index b052740a1abe..8dde919079d9 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "function-lowering-info"
+#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
@@ -68,7 +69,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
   GetReturnInfo(Fn->getReturnType(),
                 Fn->getAttributes().getRetAttributes(), Outs, TLI);
   CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), *MF,
-				      Fn->isVarArg(),
+                                      Fn->isVarArg(),
                                       Outs, Fn->getContext());
 
   // Initialize the mapping of values to registers.  This is only set up for
@@ -92,14 +93,16 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
         // candidate. I.e., it would trigger the creation of a stack protector.
         bool MayNeedSP =
           (AI->isArrayAllocation() ||
-           (TySize > 8 && isa<ArrayType>(Ty) &&
+           (TySize >= 8 && isa<ArrayType>(Ty) &&
             cast<ArrayType>(Ty)->getElementType()->isIntegerTy(8)));
         StaticAllocaMap[AI] =
-          MF->getFrameInfo()->CreateStackObject(TySize, Align, false, MayNeedSP);
+          MF->getFrameInfo()->CreateStackObject(TySize, Align, false,
+                                                MayNeedSP);
       }
 
   for (; BB != EB; ++BB)
-    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+         I != E; ++I) {
       // Mark values used outside their block as exported, by allocating
       // a virtual register for them.
       if (isUsedOutsideOfDefiningBlock(I))
@@ -355,7 +358,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
 /// argument. This overrides previous frame index entry for this argument,
 /// if any.
 void FunctionLoweringInfo::setArgumentFrameIndex(const Argument *A,
-                                                      int FI) {
+                                                 int FI) {
   ByValArgFrameIndexMap[A] = FI;
 }
 
@@ -367,10 +370,34 @@ int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) {
     ByValArgFrameIndexMap.find(A);
   if (I != ByValArgFrameIndexMap.end())
     return I->second;
-  DEBUG(dbgs() << "Argument does not have assigned frame index!");
+  DEBUG(dbgs() << "Argument does not have assigned frame index!\n");
   return 0;
 }
 
+/// ComputeUsesVAFloatArgument - Determine if any floating-point values are
+/// being passed to this variadic function, and set the MachineModuleInfo's
+/// usesVAFloatArgument flag if so. This flag is used to emit an undefined
+/// reference to _fltused on Windows, which will link in MSVCRT's
+/// floating-point support.
+void llvm::ComputeUsesVAFloatArgument(const CallInst &I,
+                                      MachineModuleInfo *MMI)
+{
+  FunctionType *FT = cast<FunctionType>(
+    I.getCalledValue()->getType()->getContainedType(0));
+  if (FT->isVarArg() && !MMI->usesVAFloatArgument()) {
+    for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+      Type* T = I.getArgOperand(i)->getType();
+      for (po_iterator<Type*> i = po_begin(T), e = po_end(T);
+           i != e; ++i) {
+        if (i->isFloatingPointTy()) {
+          MMI->setUsesVAFloatArgument(true);
+          return;
+        }
+      }
+    }
+  }
+}
+
 /// AddCatchInfo - Extract the personality and type infos from an eh.selector
 /// call, and add them to the specified machine basic block.
 void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
@@ -425,34 +452,6 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
   }
 }
 
-void llvm::CopyCatchInfo(const BasicBlock *SuccBB, const BasicBlock *LPad,
-                         MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) {
-  SmallPtrSet<const BasicBlock*, 4> Visited;
-
-  // The 'eh.selector' call may not be in the direct successor of a basic block,
-  // but could be several successors deeper. If we don't find it, try going one
-  // level further. <rdar://problem/8824861>
-  while (Visited.insert(SuccBB)) {
-    for (BasicBlock::const_iterator I = SuccBB->begin(), E = --SuccBB->end();
-         I != E; ++I)
-      if (const EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) {
-        // Apply the catch info to LPad.
-        AddCatchInfo(*EHSel, MMI, FLI.MBBMap[LPad]);
-#ifndef NDEBUG
-        if (!FLI.MBBMap[SuccBB]->isLandingPad())
-          FLI.CatchInfoFound.insert(EHSel);
-#endif
-        return;
-      }
-
-    const BranchInst *Br = dyn_cast<BranchInst>(SuccBB->getTerminator());
-    if (Br && Br->isUnconditional())
-      SuccBB = Br->getSuccessor(0);
-    else
-      break;
-  }
-}
-
 /// AddLandingPadInfo - Extract the exception handling information from the
 /// landingpad instruction and add them to the specified machine module info.
 void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI,
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 2ff66f8f8715..1467d887789c 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -294,7 +294,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
     const TargetRegisterClass *DstRC = 0;
     if (IIOpNum < II->getNumOperands())
       DstRC = TII->getRegClass(*II, IIOpNum, TRI);
-    assert((DstRC || (MCID.isVariadic() && IIOpNum >= MCID.getNumOperands())) &&
+    assert((DstRC || (MI->isVariadic() && IIOpNum >= MCID.getNumOperands())) &&
            "Don't have operand info for this instruction!");
     if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) {
       unsigned NewVReg = MRI->createVirtualRegister(DstRC);
@@ -351,6 +351,8 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
     MI->addOperand(MachineOperand::CreateFPImm(CFP));
   } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
     MI->addOperand(MachineOperand::CreateReg(R->getReg(), false));
+  } else if (RegisterMaskSDNode *RM = dyn_cast<RegisterMaskSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateRegMask(RM->getRegMask()));
   } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) {
     MI->addOperand(MachineOperand::CreateGA(TGA->getGlobal(), TGA->getOffset(),
                                             TGA->getTargetFlags()));
@@ -574,14 +576,19 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
   for (unsigned i = 1; i != NumOps; ++i) {
     SDValue Op = Node->getOperand(i);
     if ((i & 1) == 0) {
-      unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
-      unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
-      const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
-      const TargetRegisterClass *SRC =
+      RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(i-1));
+      // Skip physical registers as they don't have a vreg to get and we'll
+      // insert copies for them in TwoAddressInstructionPass anyway.
+      if (!R || !TargetRegisterInfo::isPhysicalRegister(R->getReg())) {
+        unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
+        unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
+        const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
+        const TargetRegisterClass *SRC =
         TRI->getMatchingSuperRegClass(RC, TRC, SubIdx);
-      if (SRC && SRC != RC) {
-        MRI->setRegClass(NewVReg, SRC);
-        RC = SRC;
+        if (SRC && SRC != RC) {
+          MRI->setRegClass(NewVReg, SRC);
+          RC = SRC;
+        }
       }
     }
     AddOperand(MI, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false,
@@ -700,33 +707,6 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
   // Create the new machine instruction.
   MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II);
 
-  // The MachineInstr constructor adds implicit-def operands. Scan through
-  // these to determine which are dead.
-  if (MI->getNumOperands() != 0 &&
-      Node->getValueType(Node->getNumValues()-1) == MVT::Glue) {
-    // First, collect all used registers.
-    SmallVector<unsigned, 8> UsedRegs;
-    for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser())
-      if (F->getOpcode() == ISD::CopyFromReg)
-        UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
-      else {
-        // Collect declared implicit uses.
-        const MCInstrDesc &MCID = TII->get(F->getMachineOpcode());
-        UsedRegs.append(MCID.getImplicitUses(),
-                        MCID.getImplicitUses() + MCID.getNumImplicitUses());
-        // In addition to declared implicit uses, we must also check for
-        // direct RegisterSDNode operands.
-        for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
-          if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
-            unsigned Reg = R->getReg();
-            if (TargetRegisterInfo::isPhysicalRegister(Reg))
-              UsedRegs.push_back(Reg);
-          }
-      }
-    // Then mark unused registers as dead.
-    MI->setPhysRegsDeadExcept(UsedRegs, *TRI);
-  }
-
   // Add result register values for things that are defined by this
   // instruction.
   if (NumResults)
@@ -751,30 +731,63 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
   // hook knows where in the block to insert the replacement code.
   MBB->insert(InsertPos, MI);
 
+  // The MachineInstr may also define physregs instead of virtregs.  These
+  // physreg values can reach other instructions in different ways:
+  //
+  // 1. When there is a use of a Node value beyond the explicitly defined
+  //    virtual registers, we emit a CopyFromReg for one of the implicitly
+  //    defined physregs.  This only happens when HasPhysRegOuts is true.
+  //
+  // 2. A CopyFromReg reading a physreg may be glued to this instruction.
+  //
+  // 3. A glued instruction may implicitly use a physreg.
+  //
+  // 4. A glued instruction may use a RegisterSDNode operand.
+  //
+  // Collect all the used physreg defs, and make sure that any unused physreg
+  // defs are marked as dead.
+  SmallVector<unsigned, 8> UsedRegs;
+
   // Additional results must be physical register defs.
   if (HasPhysRegOuts) {
     for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {
       unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()];
-      if (Node->hasAnyUseOfValue(i))
-        EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
-      // If there are no uses, mark the register as dead now, so that
-      // MachineLICM/Sink can see that it's dead. Don't do this if the
-      // node has a Glue value, for the benefit of targets still using
-      // Glue for values in physregs.
-      else if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)
-        MI->addRegisterDead(Reg, TRI);
+      if (!Node->hasAnyUseOfValue(i))
+        continue;
+      // This implicitly defined physreg has a use.
+      UsedRegs.push_back(Reg);
+      EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
     }
   }
 
-  // If the instruction has implicit defs and the node doesn't, mark the
-  // implicit def as dead.  If the node has any glue outputs, we don't do this
-  // because we don't know what implicit defs are being used by glued nodes.
-  if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)
-    if (const unsigned *IDList = II.getImplicitDefs()) {
-      for (unsigned i = NumResults, e = II.getNumDefs()+II.getNumImplicitDefs();
-           i != e; ++i)
-        MI->addRegisterDead(IDList[i-II.getNumDefs()], TRI);
+  // Scan the glue chain for any used physregs.
+  if (Node->getValueType(Node->getNumValues()-1) == MVT::Glue) {
+    for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) {
+      if (F->getOpcode() == ISD::CopyFromReg) {
+        UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
+        continue;
+      } else if (F->getOpcode() == ISD::CopyToReg) {
+        // Skip CopyToReg nodes that are internal to the glue chain.
+        continue;
+      }
+      // Collect declared implicit uses.
+      const MCInstrDesc &MCID = TII->get(F->getMachineOpcode());
+      UsedRegs.append(MCID.getImplicitUses(),
+                      MCID.getImplicitUses() + MCID.getNumImplicitUses());
+      // In addition to declared implicit uses, we must also check for
+      // direct RegisterSDNode operands.
+      for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
+        if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
+          unsigned Reg = R->getReg();
+          if (TargetRegisterInfo::isPhysicalRegister(Reg))
+            UsedRegs.push_back(Reg);
+        }
     }
+  }
+
+  // Finally mark unused registers as dead.
+  if (!UsedRegs.empty() || II.getImplicitDefs())
+    MI->setPhysRegsDeadExcept(UsedRegs, *TRI);
 
   // Run post-isel target hook to adjust this instruction if needed.
 #ifdef NDEBUG
@@ -794,10 +807,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
     Node->dump();
 #endif
     llvm_unreachable("This target-independent node should have been selected!");
-    break;
   case ISD::EntryToken:
     llvm_unreachable("EntryToken should have been excluded from the schedule!");
-    break;
   case ISD::MERGE_VALUES:
   case ISD::TokenFactor: // fall thru
     break;
diff --git a/lib/CodeGen/SelectionDAG/LLVMBuild.txt b/lib/CodeGen/SelectionDAG/LLVMBuild.txt
new file mode 100644
index 000000000000..81d2e000a2e8
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/CodeGen/SelectionDAG/LLVMBuild.txt -----------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = SelectionDAG
+parent = CodeGen
+required_libraries = Analysis CodeGen Core MC Support Target TransformUtils
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 63255ae2ebd9..a96a99781f4e 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -46,37 +46,18 @@ using namespace llvm;
 /// will attempt merge setcc and brc instructions into brcc's.
 ///
 namespace {
-class SelectionDAGLegalize {
+class SelectionDAGLegalize : public SelectionDAG::DAGUpdateListener {
   const TargetMachine &TM;
   const TargetLowering &TLI;
   SelectionDAG &DAG;
 
-  // Libcall insertion helpers.
-
-  /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been
-  /// legalized.  We use this to ensure that calls are properly serialized
-  /// against each other, including inserted libcalls.
-  SDValue LastCALLSEQ_END;
-
-  /// IsLegalizingCall - This member is used *only* for purposes of providing
-  /// helpful assertions that a libcall isn't created while another call is
-  /// being legalized (which could lead to non-serialized call sequences).
-  bool IsLegalizingCall;
-
-  /// LegalizedNodes - For nodes that are of legal width, and that have more
-  /// than one use, this map indicates what regularized operand to use.  This
-  /// allows us to avoid legalizing the same thing more than once.
-  DenseMap<SDValue, SDValue> LegalizedNodes;
+  /// LegalizePosition - The iterator for walking through the node list.
+  SelectionDAG::allnodes_iterator LegalizePosition;
 
-  void AddLegalizedOperand(SDValue From, SDValue To) {
-    LegalizedNodes.insert(std::make_pair(From, To));
-    // If someone requests legalization of the new node, return itself.
-    if (From != To)
-      LegalizedNodes.insert(std::make_pair(To, To));
+  /// LegalizedNodes - The set of nodes which have already been legalized.
+  SmallPtrSet<SDNode *, 16> LegalizedNodes;
 
-    // Transfer SDDbgValues.
-    DAG.TransferDbgValues(From, To);
-  }
+  // Libcall insertion helpers.
 
 public:
   explicit SelectionDAGLegalize(SelectionDAG &DAG);
@@ -84,9 +65,8 @@ public:
   void LegalizeDAG();
 
 private:
-  /// LegalizeOp - Return a legal replacement for the given operation, with
-  /// all legal operands.
-  SDValue LegalizeOp(SDValue O);
+  /// LegalizeOp - Legalizes the given operation.
+  void LegalizeOp(SDNode *Node);
 
   SDValue OptimizeFloatStore(StoreSDNode *ST);
 
@@ -105,10 +85,7 @@ private:
   /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
   SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl,
                                      SDValue N1, SDValue N2,
-                                     SmallVectorImpl<int> &Mask) const;
-
-  bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
-                                    SmallPtrSet<SDNode*, 32> &NodesLeadingTo);
+                                     ArrayRef<int> Mask) const;
 
   void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
                              DebugLoc dl);
@@ -150,10 +127,46 @@ private:
   SDValue ExpandInsertToVectorThroughStack(SDValue Op);
   SDValue ExpandVectorBuildThroughStack(SDNode* Node);
 
+  SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP);
+
   std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
 
-  void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
-  void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+  void ExpandNode(SDNode *Node);
+  void PromoteNode(SDNode *Node);
+
+  void ForgetNode(SDNode *N) {
+    LegalizedNodes.erase(N);
+    if (LegalizePosition == SelectionDAG::allnodes_iterator(N))
+      ++LegalizePosition;
+  }
+
+public:
+  // DAGUpdateListener implementation.
+  virtual void NodeDeleted(SDNode *N, SDNode *E) {
+    ForgetNode(N);
+  }
+  virtual void NodeUpdated(SDNode *N) {}
+
+  // Node replacement helpers
+  void ReplacedNode(SDNode *N) {
+    if (N->use_empty()) {
+      DAG.RemoveDeadNode(N, this);
+    } else {
+      ForgetNode(N);
+    }
+  }
+  void ReplaceNode(SDNode *Old, SDNode *New) {
+    DAG.ReplaceAllUsesWith(Old, New, this);
+    ReplacedNode(Old);
+  }
+  void ReplaceNode(SDValue Old, SDValue New) {
+    DAG.ReplaceAllUsesWith(Old, New, this);
+    ReplacedNode(Old.getNode());
+  }
+  void ReplaceNode(SDNode *Old, const SDValue *New) {
+    DAG.ReplaceAllUsesWith(Old, New, this);
+    ReplacedNode(Old);
+  }
 };
 }
 
@@ -164,7 +177,7 @@ private:
 SDValue
 SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT,  DebugLoc dl,
                                                  SDValue N1, SDValue N2,
-                                             SmallVectorImpl<int> &Mask) const {
+                                                 ArrayRef<int> Mask) const {
   unsigned NumMaskElts = VT.getVectorNumElements();
   unsigned NumDestElts = NVT.getVectorNumElements();
   unsigned NumEltsGrowth = NumDestElts / NumMaskElts;
@@ -195,145 +208,37 @@ SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag)
 }
 
 void SelectionDAGLegalize::LegalizeDAG() {
-  LastCALLSEQ_END = DAG.getEntryNode();
-  IsLegalizingCall = false;
-
-  // The legalize process is inherently a bottom-up recursive process (users
-  // legalize their uses before themselves).  Given infinite stack space, we
-  // could just start legalizing on the root and traverse the whole graph.  In
-  // practice however, this causes us to run out of stack space on large basic
-  // blocks.  To avoid this problem, compute an ordering of the nodes where each
-  // node is only legalized after all of its operands are legalized.
   DAG.AssignTopologicalOrder();
-  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
-       E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I)
-    LegalizeOp(SDValue(I, 0));
 
-  // Finally, it's possible the root changed.  Get the new root.
-  SDValue OldRoot = DAG.getRoot();
-  assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
-  DAG.setRoot(LegalizedNodes[OldRoot]);
-
-  LegalizedNodes.clear();
-
-  // Remove dead nodes now.
-  DAG.RemoveDeadNodes();
-}
-
-
-/// FindCallEndFromCallStart - Given a chained node that is part of a call
-/// sequence, find the CALLSEQ_END node that terminates the call sequence.
-static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) {
-  // Nested CALLSEQ_START/END constructs aren't yet legal,
-  // but we can DTRT and handle them correctly here.
-  if (Node->getOpcode() == ISD::CALLSEQ_START)
-    depth++;
-  else if (Node->getOpcode() == ISD::CALLSEQ_END) {
-    depth--;
-    if (depth == 0)
-      return Node;
-  }
-  if (Node->use_empty())
-    return 0;   // No CallSeqEnd
-
-  // The chain is usually at the end.
-  SDValue TheChain(Node, Node->getNumValues()-1);
-  if (TheChain.getValueType() != MVT::Other) {
-    // Sometimes it's at the beginning.
-    TheChain = SDValue(Node, 0);
-    if (TheChain.getValueType() != MVT::Other) {
-      // Otherwise, hunt for it.
-      for (unsigned i = 1, e = Node->getNumValues(); i != e; ++i)
-        if (Node->getValueType(i) == MVT::Other) {
-          TheChain = SDValue(Node, i);
-          break;
-        }
-
-      // Otherwise, we walked into a node without a chain.
-      if (TheChain.getValueType() != MVT::Other)
-        return 0;
+  // Visit all the nodes. We start in topological order, so that we see
+  // nodes with their original operands intact. Legalization can produce
+  // new nodes which may themselves need to be legalized. Iterate until all
+  // nodes have been legalized.
+  for (;;) {
+    bool AnyLegalized = false;
+    for (LegalizePosition = DAG.allnodes_end();
+         LegalizePosition != DAG.allnodes_begin(); ) {
+      --LegalizePosition;
+
+      SDNode *N = LegalizePosition;
+      if (LegalizedNodes.insert(N)) {
+        AnyLegalized = true;
+        LegalizeOp(N);
+      }
     }
-  }
-
-  for (SDNode::use_iterator UI = Node->use_begin(),
-       E = Node->use_end(); UI != E; ++UI) {
-
-    // Make sure to only follow users of our token chain.
-    SDNode *User = *UI;
-    for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i)
-      if (User->getOperand(i) == TheChain)
-        if (SDNode *Result = FindCallEndFromCallStart(User, depth))
-          return Result;
-  }
-  return 0;
-}
-
-/// FindCallStartFromCallEnd - Given a chained node that is part of a call
-/// sequence, find the CALLSEQ_START node that initiates the call sequence.
-static SDNode *FindCallStartFromCallEnd(SDNode *Node) {
-  int nested = 0;
-  assert(Node && "Didn't find callseq_start for a call??");
-  while (Node->getOpcode() != ISD::CALLSEQ_START || nested) {
-    Node = Node->getOperand(0).getNode();
-    assert(Node->getOperand(0).getValueType() == MVT::Other &&
-           "Node doesn't have a token chain argument!");
-    switch (Node->getOpcode()) {
-    default:
+    if (!AnyLegalized)
       break;
-    case ISD::CALLSEQ_START:
-      if (!nested)
-        return Node;
-      nested--;
-      break;
-    case ISD::CALLSEQ_END:
-      nested++;
-      break;
-    }
-  }
-  return 0;
-}
-
-/// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to
-/// see if any uses can reach Dest.  If no dest operands can get to dest,
-/// legalize them, legalize ourself, and return false, otherwise, return true.
-///
-/// Keep track of the nodes we fine that actually do lead to Dest in
-/// NodesLeadingTo.  This avoids retraversing them exponential number of times.
-///
-bool SelectionDAGLegalize::LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
-                                     SmallPtrSet<SDNode*, 32> &NodesLeadingTo) {
-  if (N == Dest) return true;  // N certainly leads to Dest :)
-
-  // If we've already processed this node and it does lead to Dest, there is no
-  // need to reprocess it.
-  if (NodesLeadingTo.count(N)) return true;
-
-  // If the first result of this node has been already legalized, then it cannot
-  // reach N.
-  if (LegalizedNodes.count(SDValue(N, 0))) return false;
-
-  // Okay, this node has not already been legalized.  Check and legalize all
-  // operands.  If none lead to Dest, then we can legalize this node.
-  bool OperandsLeadToDest = false;
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
-    OperandsLeadToDest |=     // If an operand leads to Dest, so do we.
-      LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest,
-                                   NodesLeadingTo);
 
-  if (OperandsLeadToDest) {
-    NodesLeadingTo.insert(N);
-    return true;
   }
 
-  // Okay, this node looks safe, legalize it and return false.
-  LegalizeOp(SDValue(N, 0));
-  return false;
+  // Remove dead nodes now.
+  DAG.RemoveDeadNodes();
 }
 
 /// ExpandConstantFP - Expands the ConstantFP node to an integer constant or
 /// a load from the constant pool.
-static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
-                                SelectionDAG &DAG, const TargetLowering &TLI) {
+SDValue
+SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
   bool Extend = false;
   DebugLoc dl = CFP->getDebugLoc();
 
@@ -369,20 +274,27 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
 
   SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy());
   unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
-  if (Extend)
-    return DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT,
-                          DAG.getEntryNode(),
-                          CPIdx, MachinePointerInfo::getConstantPool(),
-                          VT, false, false, Alignment);
-  return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
-                     MachinePointerInfo::getConstantPool(), false, false,
-                     Alignment);
+  if (Extend) {
+    SDValue Result =
+      DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT,
+                     DAG.getEntryNode(),
+                     CPIdx, MachinePointerInfo::getConstantPool(),
+                     VT, false, false, Alignment);
+    return Result;
+  }
+  SDValue Result =
+    DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
+                MachinePointerInfo::getConstantPool(), false, false, false,
+                Alignment);
+  return Result;
 }
 
 /// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores.
-static
-SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
-                             const TargetLowering &TLI) {
+static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
+                                 const TargetLowering &TLI,
+                                 SelectionDAGLegalize *DAGLegalize) {
+  assert(ST->getAddressingMode() == ISD::UNINDEXED &&
+         "unaligned indexed stores not implemented!");
   SDValue Chain = ST->getChain();
   SDValue Ptr = ST->getBasePtr();
   SDValue Val = ST->getValue();
@@ -397,8 +309,10 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
       // same size, then a (misaligned) int store.
       // FIXME: Does not handle truncating floating point stores!
       SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
-      return DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
-                          ST->isVolatile(), ST->isNonTemporal(), Alignment);
+      Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
+                           ST->isVolatile(), ST->isNonTemporal(), Alignment);
+      DAGLegalize->ReplaceNode(SDValue(ST, 0), Result);
+      return;
     }
     // Do a (aligned) store to a stack slot, then copy from the stack slot
     // to the final destination using (unaligned) integer loads and stores.
@@ -427,7 +341,7 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
       // Load one integer register's worth from the stack slot.
       SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr,
                                  MachinePointerInfo(),
-                                 false, false, 0);
+                                 false, false, false, 0);
       // Store it to the final location.  Remember the store.
       Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
                                   ST->getPointerInfo().getWithOffset(Offset),
@@ -458,8 +372,11 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
                                        ST->isNonTemporal(),
                                        MinAlign(ST->getAlignment(), Offset)));
     // The order of the stores doesn't matter - say it with a TokenFactor.
-    return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
-                       Stores.size());
+    SDValue Result =
+      DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
+                  Stores.size());
+    DAGLegalize->ReplaceNode(SDValue(ST, 0), Result);
+    return;
   }
   assert(ST->getMemoryVT().isInteger() &&
          !ST->getMemoryVT().isVector() &&
@@ -488,13 +405,18 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
                              NewStoredVT, ST->isVolatile(), ST->isNonTemporal(),
                              Alignment);
 
-  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
+  SDValue Result =
+    DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
+  DAGLegalize->ReplaceNode(SDValue(ST, 0), Result);
 }
 
 /// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads.
-static
-SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
-                            const TargetLowering &TLI) {
+static void
+ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
+                    const TargetLowering &TLI,
+                    SDValue &ValResult, SDValue &ChainResult) {
+  assert(LD->getAddressingMode() == ISD::UNINDEXED &&
+         "unaligned indexed loads not implemented!");
   SDValue Chain = LD->getChain();
   SDValue Ptr = LD->getBasePtr();
   EVT VT = LD->getValueType(0);
@@ -507,13 +429,15 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
       // then bitconvert to floating point or vector.
       SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(),
                                     LD->isVolatile(),
-                                    LD->isNonTemporal(), LD->getAlignment());
+                                    LD->isNonTemporal(),
+                                    LD->isInvariant(), LD->getAlignment());
       SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
       if (VT.isFloatingPoint() && LoadedVT != VT)
         Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result);
 
-      SDValue Ops[] = { Result, Chain };
-      return DAG.getMergeValues(Ops, 2, dl);
+      ValResult = Result;
+      ChainResult = Chain;
+      return;
     }
 
     // Copy the value to a (aligned) stack slot using (unaligned) integer
@@ -537,6 +461,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
       SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr,
                                  LD->getPointerInfo().getWithOffset(Offset),
                                  LD->isVolatile(), LD->isNonTemporal(),
+                                 LD->isInvariant(),
                                  MinAlign(LD->getAlignment(), Offset));
       // Follow the load with a store to the stack slot.  Remember the store.
       Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
@@ -572,8 +497,9 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
                           MachinePointerInfo(), LoadedVT, false, false, 0);
 
     // Callers expect a MERGE_VALUES node.
-    SDValue Ops[] = { Load, TF };
-    return DAG.getMergeValues(Ops, 2, dl);
+    ValResult = Load;
+    ChainResult = TF;
+    return;
   }
   assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
          "Unaligned load of unsupported type.");
@@ -626,8 +552,8 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
                              Hi.getValue(1));
 
-  SDValue Ops[] = { Result, TF };
-  return DAG.getMergeValues(Ops, 2, dl);
+  ValResult = Result;
+  ChainResult = TF;
 }
 
 /// PerformInsertVectorEltInMemory - Some target cannot handle a variable
@@ -672,7 +598,8 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
                          false, false, 0);
   // Load the updated vector.
   return DAG.getLoad(VT, dl, Ch, StackPtr,
-                     MachinePointerInfo::getFixedStack(SPFI), false, false, 0);
+                     MachinePointerInfo::getFixedStack(SPFI), false, false, 
+                     false, 0);
 }
 
 
@@ -763,11 +690,10 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
 
 /// LegalizeOp - Return a legal replacement for the given operation, with
 /// all legal operands.
-SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
-  if (Op.getOpcode() == ISD::TargetConstant) // Allow illegal target nodes.
-    return Op;
+void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
+  if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes.
+    return;
 
-  SDNode *Node = Op.getNode();
   DebugLoc dl = Node->getDebugLoc();
 
   for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
@@ -782,13 +708,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
             Node->getOperand(i).getOpcode() == ISD::TargetConstant) &&
            "Unexpected illegal type!");
 
-  // Note that LegalizeOp may be reentered even from single-use nodes, which
-  // means that we always must cache transformed nodes.
-  DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
-  if (I != LegalizedNodes.end()) return I->second;
-
   SDValue Tmp1, Tmp2, Tmp3, Tmp4;
-  SDValue Result = Op;
   bool isCustom = false;
 
   // Figure out the correct action; the way to query this varies by opcode
@@ -798,10 +718,15 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
   case ISD::INTRINSIC_W_CHAIN:
   case ISD::INTRINSIC_WO_CHAIN:
   case ISD::INTRINSIC_VOID:
-  case ISD::VAARG:
   case ISD::STACKSAVE:
     Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
     break;
+  case ISD::VAARG:
+    Action = TLI.getOperationAction(Node->getOpcode(),
+                                    Node->getValueType(0));
+    if (Action != TargetLowering::Promote)
+      Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
+    break;
   case ISD::SINT_TO_FP:
   case ISD::UINT_TO_FP:
   case ISD::EXTRACT_VECTOR_ELT:
@@ -865,7 +790,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
   case ISD::FRAME_TO_ARGS_OFFSET:
   case ISD::EH_SJLJ_SETJMP:
   case ISD::EH_SJLJ_LONGJMP:
-  case ISD::EH_SJLJ_DISPATCHSETUP:
     // These operations lie about being legal: when they claim to be legal,
     // they should actually be expanded.
     Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
@@ -882,17 +806,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     if (Action == TargetLowering::Legal)
       Action = TargetLowering::Custom;
     break;
-  case ISD::BUILD_VECTOR:
-    // A weird case: legalization for BUILD_VECTOR never legalizes the
-    // operands!
-    // FIXME: This really sucks... changing it isn't semantically incorrect,
-    // but it massively pessimizes the code for floating-point BUILD_VECTORs
-    // because ConstantFP operands get legalized into constant pool loads
-    // before the BUILD_VECTOR code can see them.  It doesn't usually bite,
-    // though, because BUILD_VECTORS usually get lowered into other nodes
-    // which get legalized properly.
-    SimpleFinishLegalizing = false;
-    break;
   default:
     if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
       Action = TargetLowering::Legal;
@@ -903,22 +816,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
   }
 
   if (SimpleFinishLegalizing) {
-    SmallVector<SDValue, 8> Ops, ResultVals;
+    SmallVector<SDValue, 8> Ops;
     for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
-      Ops.push_back(LegalizeOp(Node->getOperand(i)));
+      Ops.push_back(Node->getOperand(i));
     switch (Node->getOpcode()) {
     default: break;
-    case ISD::BR:
-    case ISD::BRIND:
-    case ISD::BR_JT:
-    case ISD::BR_CC:
-    case ISD::BRCOND:
-      // Branches tweak the chain to include LastCALLSEQ_END
-      Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0],
-                           LastCALLSEQ_END);
-      Ops[0] = LegalizeOp(Ops[0]);
-      LastCALLSEQ_END = DAG.getEntryNode();
-      break;
     case ISD::SHL:
     case ISD::SRL:
     case ISD::SRA:
@@ -926,57 +828,66 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     case ISD::ROTR:
       // Legalizing shifts/rotates requires adjusting the shift amount
       // to the appropriate width.
-      if (!Ops[1].getValueType().isVector())
-        Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(),
-                                                      Ops[1]));
+      if (!Ops[1].getValueType().isVector()) {
+        SDValue SAO = DAG.getShiftAmountOperand(Ops[0].getValueType(), Ops[1]);
+        HandleSDNode Handle(SAO);
+        LegalizeOp(SAO.getNode());
+        Ops[1] = Handle.getValue();
+      }
       break;
     case ISD::SRL_PARTS:
     case ISD::SRA_PARTS:
     case ISD::SHL_PARTS:
       // Legalizing shifts/rotates requires adjusting the shift amount
       // to the appropriate width.
-      if (!Ops[2].getValueType().isVector())
-        Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(),
-                                                      Ops[2]));
+      if (!Ops[2].getValueType().isVector()) {
+        SDValue SAO = DAG.getShiftAmountOperand(Ops[0].getValueType(), Ops[2]);
+        HandleSDNode Handle(SAO);
+        LegalizeOp(SAO.getNode());
+        Ops[2] = Handle.getValue();
+      }
       break;
     }
 
-    Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), Ops.data(),
-                                            Ops.size()), 0);
+    SDNode *NewNode = DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size());
+    if (NewNode != Node) {
+      DAG.ReplaceAllUsesWith(Node, NewNode, this);
+      for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+        DAG.TransferDbgValues(SDValue(Node, i), SDValue(NewNode, i));
+      ReplacedNode(Node);
+      Node = NewNode;
+    }
     switch (Action) {
     case TargetLowering::Legal:
-      for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
-        ResultVals.push_back(Result.getValue(i));
-      break;
+      return;
     case TargetLowering::Custom:
       // FIXME: The handling for custom lowering with multiple results is
       // a complete mess.
-      Tmp1 = TLI.LowerOperation(Result, DAG);
+      Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG);
       if (Tmp1.getNode()) {
+        SmallVector<SDValue, 8> ResultVals;
         for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) {
           if (e == 1)
             ResultVals.push_back(Tmp1);
           else
             ResultVals.push_back(Tmp1.getValue(i));
         }
-        break;
+        if (Tmp1.getNode() != Node || Tmp1.getResNo() != 0) {
+          DAG.ReplaceAllUsesWith(Node, ResultVals.data(), this);
+          for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+            DAG.TransferDbgValues(SDValue(Node, i), ResultVals[i]);
+          ReplacedNode(Node);
+        }
+        return;
       }
 
       // FALL THROUGH
     case TargetLowering::Expand:
-      ExpandNode(Result.getNode(), ResultVals);
-      break;
+      ExpandNode(Node);
+      return;
     case TargetLowering::Promote:
-      PromoteNode(Result.getNode(), ResultVals);
-      break;
-    }
-    if (!ResultVals.empty()) {
-      for (unsigned i = 0, e = ResultVals.size(); i != e; ++i) {
-        if (ResultVals[i] != SDValue(Node, i))
-          ResultVals[i] = LegalizeOp(ResultVals[i]);
-        AddLegalizedOperand(SDValue(Node, i), ResultVals[i]);
-      }
-      return ResultVals[Op.getResNo()];
+      PromoteNode(Node);
+      return;
     }
   }
 
@@ -987,160 +898,24 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     Node->dump( &DAG);
     dbgs() << "\n";
 #endif
-    assert(0 && "Do not know how to legalize this operator!");
+    llvm_unreachable("Do not know how to legalize this operator!");
 
-  case ISD::SRA:
-  case ISD::SRL:
-  case ISD::SHL: {
-    // Scalarize vector SRA/SRL/SHL.
-    EVT VT = Node->getValueType(0);
-    assert(VT.isVector() && "Unable to legalize non-vector shift");
-    assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal");
-    unsigned NumElem = VT.getVectorNumElements();
-
-    SmallVector<SDValue, 8> Scalars;
-    for (unsigned Idx = 0; Idx < NumElem; Idx++) {
-      SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-                               VT.getScalarType(),
-                               Node->getOperand(0), DAG.getIntPtrConstant(Idx));
-      SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-                               VT.getScalarType(),
-                               Node->getOperand(1), DAG.getIntPtrConstant(Idx));
-      Scalars.push_back(DAG.getNode(Node->getOpcode(), dl,
-                                    VT.getScalarType(), Ex, Sh));
-    }
-    Result = DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0),
-                         &Scalars[0], Scalars.size());
-    break;
-  }
-
-  case ISD::BUILD_VECTOR:
-    switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) {
-    default: assert(0 && "This action is not supported yet!");
-    case TargetLowering::Custom:
-      Tmp3 = TLI.LowerOperation(Result, DAG);
-      if (Tmp3.getNode()) {
-        Result = Tmp3;
-        break;
-      }
-      // FALLTHROUGH
-    case TargetLowering::Expand:
-      Result = ExpandBUILD_VECTOR(Result.getNode());
-      break;
-    }
-    break;
-  case ISD::CALLSEQ_START: {
-    SDNode *CallEnd = FindCallEndFromCallStart(Node);
-
-    // Recursively Legalize all of the inputs of the call end that do not lead
-    // to this call start.  This ensures that any libcalls that need be inserted
-    // are inserted *before* the CALLSEQ_START.
-    {SmallPtrSet<SDNode*, 32> NodesLeadingTo;
-    for (unsigned i = 0, e = CallEnd->getNumOperands(); i != e; ++i)
-      LegalizeAllNodesNotLeadingTo(CallEnd->getOperand(i).getNode(), Node,
-                                   NodesLeadingTo);
-    }
-
-    // Now that we have legalized all of the inputs (which may have inserted
-    // libcalls), create the new CALLSEQ_START node.
-    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
-
-    // Merge in the last call to ensure that this call starts after the last
-    // call ended.
-    if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) {
-      Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                         Tmp1, LastCALLSEQ_END);
-      Tmp1 = LegalizeOp(Tmp1);
-    }
-
-    // Do not try to legalize the target-specific arguments (#1+).
-    if (Tmp1 != Node->getOperand(0)) {
-      SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
-      Ops[0] = Tmp1;
-      Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), &Ops[0],
-                                              Ops.size()), Result.getResNo());
-    }
-
-    // Remember that the CALLSEQ_START is legalized.
-    AddLegalizedOperand(Op.getValue(0), Result);
-    if (Node->getNumValues() == 2)    // If this has a flag result, remember it.
-      AddLegalizedOperand(Op.getValue(1), Result.getValue(1));
-
-    // Now that the callseq_start and all of the non-call nodes above this call
-    // sequence have been legalized, legalize the call itself.  During this
-    // process, no libcalls can/will be inserted, guaranteeing that no calls
-    // can overlap.
-    assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!");
-    // Note that we are selecting this call!
-    LastCALLSEQ_END = SDValue(CallEnd, 0);
-    IsLegalizingCall = true;
-
-    // Legalize the call, starting from the CALLSEQ_END.
-    LegalizeOp(LastCALLSEQ_END);
-    assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!");
-    return Result;
-  }
+  case ISD::CALLSEQ_START:
   case ISD::CALLSEQ_END:
-    // If the CALLSEQ_START node hasn't been legalized first, legalize it.  This
-    // will cause this node to be legalized as well as handling libcalls right.
-    if (LastCALLSEQ_END.getNode() != Node) {
-      LegalizeOp(SDValue(FindCallStartFromCallEnd(Node), 0));
-      DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
-      assert(I != LegalizedNodes.end() &&
-             "Legalizing the call start should have legalized this node!");
-      return I->second;
-    }
-
-    // Otherwise, the call start has been legalized and everything is going
-    // according to plan.  Just legalize ourselves normally here.
-    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
-    // Do not try to legalize the target-specific arguments (#1+), except for
-    // an optional flag input.
-    if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Glue){
-      if (Tmp1 != Node->getOperand(0)) {
-        SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
-        Ops[0] = Tmp1;
-        Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
-                                                &Ops[0], Ops.size()),
-                         Result.getResNo());
-      }
-    } else {
-      Tmp2 = LegalizeOp(Node->getOperand(Node->getNumOperands()-1));
-      if (Tmp1 != Node->getOperand(0) ||
-          Tmp2 != Node->getOperand(Node->getNumOperands()-1)) {
-        SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
-        Ops[0] = Tmp1;
-        Ops.back() = Tmp2;
-        Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
-                                                &Ops[0], Ops.size()),
-                         Result.getResNo());
-      }
-    }
-    assert(IsLegalizingCall && "Call sequence imbalance between start/end?");
-    // This finishes up call legalization.
-    IsLegalizingCall = false;
-
-    // If the CALLSEQ_END node has a flag, remember that we legalized it.
-    AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0));
-    if (Node->getNumValues() == 2)
-      AddLegalizedOperand(SDValue(Node, 1), Result.getValue(1));
-    return Result.getValue(Op.getResNo());
+    break;
   case ISD::LOAD: {
     LoadSDNode *LD = cast<LoadSDNode>(Node);
-    Tmp1 = LegalizeOp(LD->getChain());   // Legalize the chain.
-    Tmp2 = LegalizeOp(LD->getBasePtr()); // Legalize the base pointer.
+    Tmp1 = LD->getChain();   // Legalize the chain.
+    Tmp2 = LD->getBasePtr(); // Legalize the base pointer.
 
     ISD::LoadExtType ExtType = LD->getExtensionType();
     if (ExtType == ISD::NON_EXTLOAD) {
       EVT VT = Node->getValueType(0);
-      Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
-                                              Tmp1, Tmp2, LD->getOffset()),
-                       Result.getResNo());
-      Tmp3 = Result.getValue(0);
-      Tmp4 = Result.getValue(1);
+      Tmp3 = SDValue(Node, 0);
+      Tmp4 = SDValue(Node, 1);
 
       switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
-      default: assert(0 && "This action is not supported yet!");
+      default: llvm_unreachable("This action is not supported yet!");
       case TargetLowering::Legal:
         // If this is an unaligned load and the target doesn't support it,
         // expand it.
@@ -1148,20 +923,16 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
           unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
           if (LD->getAlignment() < ABIAlignment){
-            Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
-                                         DAG, TLI);
-            Tmp3 = Result.getOperand(0);
-            Tmp4 = Result.getOperand(1);
-            Tmp3 = LegalizeOp(Tmp3);
-            Tmp4 = LegalizeOp(Tmp4);
+            ExpandUnalignedLoad(cast<LoadSDNode>(Node),
+                                DAG, TLI, Tmp3, Tmp4);
           }
         }
         break;
       case TargetLowering::Custom:
         Tmp1 = TLI.LowerOperation(Tmp3, DAG);
         if (Tmp1.getNode()) {
-          Tmp3 = LegalizeOp(Tmp1);
-          Tmp4 = LegalizeOp(Tmp1.getValue(1));
+          Tmp3 = Tmp1;
+          Tmp4 = Tmp1.getValue(1);
         }
         break;
       case TargetLowering::Promote: {
@@ -1172,17 +943,19 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
 
         Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getPointerInfo(),
                            LD->isVolatile(), LD->isNonTemporal(),
-                           LD->getAlignment());
-        Tmp3 = LegalizeOp(DAG.getNode(ISD::BITCAST, dl, VT, Tmp1));
-        Tmp4 = LegalizeOp(Tmp1.getValue(1));
+                           LD->isInvariant(), LD->getAlignment());
+        Tmp3 = DAG.getNode(ISD::BITCAST, dl, VT, Tmp1);
+        Tmp4 = Tmp1.getValue(1);
         break;
       }
       }
-      // Since loads produce two values, make sure to remember that we
-      // legalized both of them.
-      AddLegalizedOperand(SDValue(Node, 0), Tmp3);
-      AddLegalizedOperand(SDValue(Node, 1), Tmp4);
-      return Op.getResNo() ? Tmp4 : Tmp3;
+      if (Tmp4.getNode() != Node) {
+        assert(Tmp3.getNode() != Node && "Load must be completely replaced");
+        DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp3);
+        DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp4);
+        ReplacedNode(Node);
+      }
+      return;
     }
 
     EVT SrcVT = LD->getMemoryVT();
@@ -1213,9 +986,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
       ISD::LoadExtType NewExtType =
         ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
 
-      Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
-                              Tmp1, Tmp2, LD->getPointerInfo(),
-                              NVT, isVolatile, isNonTemporal, Alignment);
+      SDValue Result =
+        DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
+                       Tmp1, Tmp2, LD->getPointerInfo(),
+                       NVT, isVolatile, isNonTemporal, Alignment);
 
       Ch = Result.getValue(1); // The chain.
 
@@ -1230,8 +1004,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
                              Result.getValueType(), Result,
                              DAG.getValueType(SrcVT));
 
-      Tmp1 = LegalizeOp(Result);
-      Tmp2 = LegalizeOp(Ch);
+      Tmp1 = Result;
+      Tmp2 = Ch;
     } else if (SrcWidth & (SrcWidth - 1)) {
       // If not loading a power-of-2 number of bits, expand as two loads.
       assert(!SrcVT.isVector() && "Unsupported extload!");
@@ -1274,7 +1048,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
                                       TLI.getShiftAmountTy(Hi.getValueType())));
 
         // Join the hi and lo parts.
-        Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+        Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
       } else {
         // Big endian - avoid unaligned loads.
         // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
@@ -1304,29 +1078,25 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
                                       TLI.getShiftAmountTy(Hi.getValueType())));
 
         // Join the hi and lo parts.
-        Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+        Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
       }
 
-      Tmp1 = LegalizeOp(Result);
-      Tmp2 = LegalizeOp(Ch);
+      Tmp2 = Ch;
     } else {
       switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
-      default: assert(0 && "This action is not supported yet!");
+      default: llvm_unreachable("This action is not supported yet!");
       case TargetLowering::Custom:
         isCustom = true;
         // FALLTHROUGH
       case TargetLowering::Legal:
-        Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
-                                                Tmp1, Tmp2, LD->getOffset()),
-                         Result.getResNo());
-        Tmp1 = Result.getValue(0);
-        Tmp2 = Result.getValue(1);
+        Tmp1 = SDValue(Node, 0);
+        Tmp2 = SDValue(Node, 1);
 
         if (isCustom) {
-          Tmp3 = TLI.LowerOperation(Result, DAG);
+          Tmp3 = TLI.LowerOperation(SDValue(Node, 0), DAG);
           if (Tmp3.getNode()) {
-            Tmp1 = LegalizeOp(Tmp3);
-            Tmp2 = LegalizeOp(Tmp3.getValue(1));
+            Tmp1 = Tmp3;
+            Tmp2 = Tmp3.getValue(1);
           }
         } else {
           // If this is an unaligned load and the target doesn't support it,
@@ -1337,12 +1107,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
             unsigned ABIAlignment =
               TLI.getTargetData()->getABITypeAlignment(Ty);
             if (LD->getAlignment() < ABIAlignment){
-              Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
-                                           DAG, TLI);
-              Tmp1 = Result.getOperand(0);
-              Tmp2 = Result.getOperand(1);
-              Tmp1 = LegalizeOp(Tmp1);
-              Tmp2 = LegalizeOp(Tmp2);
+              ExpandUnalignedLoad(cast<LoadSDNode>(Node),
+                                  DAG, TLI, Tmp1, Tmp2);
             }
           }
         }
@@ -1352,7 +1118,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2,
                                      LD->getPointerInfo(),
                                      LD->isVolatile(), LD->isNonTemporal(),
-                                     LD->getAlignment());
+                                     LD->isInvariant(), LD->getAlignment());
           unsigned ExtendOp;
           switch (ExtType) {
           case ISD::EXTLOAD:
@@ -1363,95 +1129,13 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
           default: llvm_unreachable("Unexpected extend load type!");
           }
-          Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
-          Tmp1 = LegalizeOp(Result);  // Relegalize new nodes.
-          Tmp2 = LegalizeOp(Load.getValue(1));
+          Tmp1 = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
+          Tmp2 = Load.getValue(1);
           break;
         }
 
-        // If this is a promoted vector load, and the vector element types are
-        // legal, then scalarize it.
-        if (ExtType == ISD::EXTLOAD && SrcVT.isVector() &&
-          TLI.isTypeLegal(Node->getValueType(0).getScalarType())) {
-          SmallVector<SDValue, 8> LoadVals;
-          SmallVector<SDValue, 8> LoadChains;
-          unsigned NumElem = SrcVT.getVectorNumElements();
-          unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
-
-          for (unsigned Idx=0; Idx<NumElem; Idx++) {
-            Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
-                                DAG.getIntPtrConstant(Stride));
-            SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl,
-                  Node->getValueType(0).getScalarType(),
-                  Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride),
-                  SrcVT.getScalarType(),
-                  LD->isVolatile(), LD->isNonTemporal(),
-                  LD->getAlignment());
-
-            LoadVals.push_back(ScalarLoad.getValue(0));
-            LoadChains.push_back(ScalarLoad.getValue(1));
-          }
-          Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-            &LoadChains[0], LoadChains.size());
-          SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl,
-            Node->getValueType(0), &LoadVals[0], LoadVals.size());
-
-          Tmp1 = LegalizeOp(ValRes);  // Relegalize new nodes.
-          Tmp2 = LegalizeOp(Result.getValue(0));  // Relegalize new nodes.
-          break;
-        }
-
-        // If this is a promoted vector load, and the vector element types are
-        // illegal, create the promoted vector from bitcasted segments.
-        if (ExtType == ISD::EXTLOAD && SrcVT.isVector()) {
-          EVT MemElemTy = Node->getValueType(0).getScalarType();
-          EVT SrcSclrTy = SrcVT.getScalarType();
-          unsigned SizeRatio =
-            (MemElemTy.getSizeInBits() / SrcSclrTy.getSizeInBits());
-
-          SmallVector<SDValue, 8> LoadVals;
-          SmallVector<SDValue, 8> LoadChains;
-          unsigned NumElem = SrcVT.getVectorNumElements();
-          unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
-
-          for (unsigned Idx=0; Idx<NumElem; Idx++) {
-            Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
-                                DAG.getIntPtrConstant(Stride));
-            SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl,
-                  SrcVT.getScalarType(),
-                  Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride),
-                  SrcVT.getScalarType(),
-                  LD->isVolatile(), LD->isNonTemporal(),
-                  LD->getAlignment());
-            if (TLI.isBigEndian()) {
-              // MSB (which is garbage, comes first)
-              LoadVals.push_back(ScalarLoad.getValue(0));
-              for (unsigned i = 0; i<SizeRatio-1; ++i)
-                LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType()));
-            } else {
-              // LSB (which is data, comes first)
-              for (unsigned i = 0; i<SizeRatio-1; ++i)
-                LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType()));
-              LoadVals.push_back(ScalarLoad.getValue(0));
-            }
-            LoadChains.push_back(ScalarLoad.getValue(1));
-          }
-
-          Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-            &LoadChains[0], LoadChains.size());
-          EVT TempWideVector = EVT::getVectorVT(*DAG.getContext(),
-            SrcVT.getScalarType(), NumElem*SizeRatio);
-          SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl, 
-            TempWideVector, &LoadVals[0], LoadVals.size());
-
-          // Cast to the correct type
-          ValRes = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), ValRes);
-
-          Tmp1 = LegalizeOp(ValRes);  // Relegalize new nodes.
-          Tmp2 = LegalizeOp(Result.getValue(0));  // Relegalize new nodes.
-          break;
-
-        }
+        assert(!SrcVT.isVector() &&
+               "Vector Loads are handled in LegalizeVectorOps");
 
         // FIXME: This does not work for vectors on most targets.  Sign- and
         // zero-extend operations are currently folded into extending loads,
@@ -1461,10 +1145,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
                "EXTLOAD should always be supported!");
         // Turn the unsupported load into an EXTLOAD followed by an explicit
         // zero/sign extend inreg.
-        Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
-                                Tmp1, Tmp2, LD->getPointerInfo(), SrcVT,
-                                LD->isVolatile(), LD->isNonTemporal(),
-                                LD->getAlignment());
+        SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
+                                        Tmp1, Tmp2, LD->getPointerInfo(), SrcVT,
+                                        LD->isVolatile(), LD->isNonTemporal(),
+                                        LD->getAlignment());
         SDValue ValRes;
         if (ExtType == ISD::SEXTLOAD)
           ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
@@ -1472,42 +1156,41 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
                                Result, DAG.getValueType(SrcVT));
         else
           ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType());
-        Tmp1 = LegalizeOp(ValRes);  // Relegalize new nodes.
-        Tmp2 = LegalizeOp(Result.getValue(1));  // Relegalize new nodes.
+        Tmp1 = ValRes;
+        Tmp2 = Result.getValue(1);
         break;
       }
     }
 
     // Since loads produce two values, make sure to remember that we legalized
     // both of them.
-    AddLegalizedOperand(SDValue(Node, 0), Tmp1);
-    AddLegalizedOperand(SDValue(Node, 1), Tmp2);
-    return Op.getResNo() ? Tmp2 : Tmp1;
+    if (Tmp2.getNode() != Node) {
+      assert(Tmp1.getNode() != Node && "Load must be completely replaced");
+      DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp1);
+      DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp2);
+      ReplacedNode(Node);
+    }
+    break;
   }
   case ISD::STORE: {
     StoreSDNode *ST = cast<StoreSDNode>(Node);
-    Tmp1 = LegalizeOp(ST->getChain());    // Legalize the chain.
-    Tmp2 = LegalizeOp(ST->getBasePtr());  // Legalize the pointer.
+    Tmp1 = ST->getChain();
+    Tmp2 = ST->getBasePtr();
     unsigned Alignment = ST->getAlignment();
     bool isVolatile = ST->isVolatile();
     bool isNonTemporal = ST->isNonTemporal();
 
     if (!ST->isTruncatingStore()) {
       if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
-        Result = SDValue(OptStore, 0);
+        ReplaceNode(ST, OptStore);
         break;
       }
 
       {
-        Tmp3 = LegalizeOp(ST->getValue());
-        Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
-                                                Tmp1, Tmp3, Tmp2,
-                                                ST->getOffset()),
-                         Result.getResNo());
-
+        Tmp3 = ST->getValue();
         EVT VT = Tmp3.getValueType();
         switch (TLI.getOperationAction(ISD::STORE, VT)) {
-        default: assert(0 && "This action is not supported yet!");
+        default: llvm_unreachable("This action is not supported yet!");
         case TargetLowering::Legal:
           // If this is an unaligned store and the target doesn't support it,
           // expand it.
@@ -1515,27 +1198,31 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
             Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
             unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
             if (ST->getAlignment() < ABIAlignment)
-              Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()),
-                                            DAG, TLI);
+              ExpandUnalignedStore(cast<StoreSDNode>(Node),
+                                   DAG, TLI, this);
           }
           break;
         case TargetLowering::Custom:
-          Tmp1 = TLI.LowerOperation(Result, DAG);
-          if (Tmp1.getNode()) Result = Tmp1;
+          Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG);
+          if (Tmp1.getNode())
+            ReplaceNode(SDValue(Node, 0), Tmp1);
           break;
-        case TargetLowering::Promote:
+        case TargetLowering::Promote: {
           assert(VT.isVector() && "Unknown legal promote case!");
           Tmp3 = DAG.getNode(ISD::BITCAST, dl,
                              TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3);
-          Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
-                                ST->getPointerInfo(), isVolatile,
-                                isNonTemporal, Alignment);
+          SDValue Result =
+            DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
+                         ST->getPointerInfo(), isVolatile,
+                         isNonTemporal, Alignment);
+          ReplaceNode(SDValue(Node, 0), Result);
           break;
         }
+        }
         break;
       }
     } else {
-      Tmp3 = LegalizeOp(ST->getValue());
+      Tmp3 = ST->getValue();
 
       EVT StVT = ST->getMemoryVT();
       unsigned StWidth = StVT.getSizeInBits();
@@ -1547,8 +1234,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
                                     StVT.getStoreSizeInBits());
         Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT);
-        Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
-                                   NVT, isVolatile, isNonTemporal, Alignment);
+        SDValue Result =
+          DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+                            NVT, isVolatile, isNonTemporal, Alignment);
+        ReplaceNode(SDValue(Node, 0), Result);
       } else if (StWidth & (StWidth - 1)) {
         // If not storing a power-of-2 number of bits, expand as two stores.
         assert(!StVT.isVector() && "Unsupported truncstore!");
@@ -1602,17 +1291,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         }
 
         // The order of the stores doesn't matter.
-        Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+        SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+        ReplaceNode(SDValue(Node, 0), Result);
       } else {
-        if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() ||
-            Tmp2 != ST->getBasePtr())
-          Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
-                                                  Tmp1, Tmp3, Tmp2,
-                                                  ST->getOffset()),
-                           Result.getResNo());
-
         switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
-        default: assert(0 && "This action is not supported yet!");
+        default: llvm_unreachable("This action is not supported yet!");
         case TargetLowering::Legal:
           // If this is an unaligned store and the target doesn't support it,
           // expand it.
@@ -1620,120 +1303,24 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
             Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
             unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
             if (ST->getAlignment() < ABIAlignment)
-              Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()),
-                                            DAG, TLI);
+              ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
           }
           break;
         case TargetLowering::Custom:
-          Result = TLI.LowerOperation(Result, DAG);
+          ReplaceNode(SDValue(Node, 0),
+                      TLI.LowerOperation(SDValue(Node, 0), DAG));
           break;
         case TargetLowering::Expand:
-
-          EVT WideScalarVT = Tmp3.getValueType().getScalarType();
-          EVT NarrowScalarVT = StVT.getScalarType();
-
-          if (StVT.isVector()) {
-            unsigned NumElem = StVT.getVectorNumElements();
-            // The type of the data we want to save
-            EVT RegVT = Tmp3.getValueType();
-            EVT RegSclVT = RegVT.getScalarType();
-            // The type of data as saved in memory.
-            EVT MemSclVT = StVT.getScalarType();
-
-            bool RegScalarLegal = TLI.isTypeLegal(RegSclVT);
-            bool MemScalarLegal = TLI.isTypeLegal(MemSclVT);
-
-            // We need to expand this store. If the register element type
-            // is legal then we can scalarize the vector and use
-            // truncating stores.
-            if (RegScalarLegal) {
-              // Cast floats into integers
-              unsigned ScalarSize = MemSclVT.getSizeInBits();
-              EVT EltVT = EVT::getIntegerVT(*DAG.getContext(), ScalarSize);
-
-              // Round odd types to the next pow of two.
-              if (!isPowerOf2_32(ScalarSize))
-                ScalarSize = NextPowerOf2(ScalarSize);
-
-              // Store Stride in bytes
-              unsigned Stride = ScalarSize/8;
-              // Extract each of the elements from the original vector
-              // and save them into memory individually.
-              SmallVector<SDValue, 8> Stores;
-              for (unsigned Idx = 0; Idx < NumElem; Idx++) {
-                SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-                                RegSclVT, Tmp3, DAG.getIntPtrConstant(Idx));
-
-                Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
-                                   DAG.getIntPtrConstant(Stride));
-
-                // This scalar TruncStore may be illegal, but we lehalize it
-                // later.
-                SDValue Store = DAG.getTruncStore(Tmp1, dl, Ex, Tmp2,
-                      ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT,
-                      isVolatile, isNonTemporal, Alignment);
-
-                Stores.push_back(Store);
-              }
-
-              Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                                   &Stores[0], Stores.size());
-              break;
-            }
-
-            // The scalar register type is illegal.
-            // For example saving <2 x i64> -> <2 x i32> on a x86.
-            // In here we bitcast the value into a vector of smaller parts and
-            // save it using smaller scalars.
-            if (!RegScalarLegal && MemScalarLegal) {
-              // Store Stride in bytes
-              unsigned Stride = MemSclVT.getSizeInBits()/8;
-
-              unsigned SizeRatio =
-                (RegSclVT.getSizeInBits() / MemSclVT.getSizeInBits());
-
-              EVT CastValueVT = EVT::getVectorVT(*DAG.getContext(),
-                                                 MemSclVT,
-                                                 SizeRatio * NumElem);
-
-              // Cast the wide elem vector to wider vec with smaller elem type.
-              // Example <2 x i64> -> <4 x i32>
-              Tmp3 = DAG.getNode(ISD::BITCAST, dl, CastValueVT, Tmp3);
-
-              SmallVector<SDValue, 8> Stores;
-              for (unsigned Idx=0; Idx < NumElem * SizeRatio; Idx++) {
-                // Extract the Ith element.
-                SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-                               NarrowScalarVT, Tmp3, DAG.getIntPtrConstant(Idx));
-                // Bump pointer.
-                Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
-                                   DAG.getIntPtrConstant(Stride));
-
-                // Store if, this element is:
-                //  - First element on big endian, or
-                //  - Last element on little endian
-                if (( TLI.isBigEndian() && (Idx % SizeRatio == 0)) ||
-                    ((!TLI.isBigEndian() && (Idx % SizeRatio == SizeRatio-1)))) {
-                  SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2,
-                                  ST->getPointerInfo().getWithOffset(Idx*Stride),
-                                           isVolatile, isNonTemporal, Alignment);
-                  Stores.push_back(Store);
-                }
-              }
-              Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                                   &Stores[0], Stores.size());
-              break;
-            }
-
-            assert(false && "Unable to legalize the vector trunc store!");
-          }// is vector
-
+          assert(!StVT.isVector() &&
+                 "Vector Stores are handled in LegalizeVectorOps");
 
           // TRUNCSTORE:i16 i32 -> STORE i16
           assert(TLI.isTypeLegal(StVT) && "Do not know how to expand this store!");
           Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3);
-          Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
-                                isVolatile, isNonTemporal, Alignment);
+          SDValue Result =
+            DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+                         isVolatile, isNonTemporal, Alignment);
+          ReplaceNode(SDValue(Node, 0), Result);
           break;
         }
       }
@@ -1741,17 +1328,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     break;
   }
   }
-  assert(Result.getValueType() == Op.getValueType() &&
-         "Bad legalization!");
-
-  // Make sure that the generated code is itself legal.
-  if (Result != Op)
-    Result = LegalizeOp(Result);
-
-  // Note that LegalizeOp may be reentered even from single-use nodes, which
-  // means that we always must cache transformed nodes.
-  AddLegalizedOperand(Op, Result);
-  return Result;
 }
 
 SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
@@ -1778,7 +1354,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
 
   if (Op.getValueType().isVector())
     return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,MachinePointerInfo(),
-                       false, false, 0);
+                       false, false, false, 0);
   return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
                         MachinePointerInfo(),
                         Vec.getValueType().getVectorElementType(),
@@ -1826,7 +1402,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
 
   // Finally, load the updated vector.
   return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo,
-                     false, false, 0);
+                     false, false, false, 0);
 }
 
 SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
@@ -1876,7 +1452,8 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
     StoreChain = DAG.getEntryNode();
 
   // Result is a load from the stack slot.
-  return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, false, false, 0);
+  return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, 
+                     false, false, false, 0);
 }
 
 SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
@@ -1905,7 +1482,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
       assert(FloatVT.isByteSized() && "Unsupported floating point type!");
       // Load out a legal integer with the same sign bit as the float.
       SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, MachinePointerInfo(),
-                            false, false, 0);
+                            false, false, false, 0);
     } else { // Little endian
       SDValue LoadPtr = StackPtr;
       // The float may be wider than the integer we are going to load.  Advance
@@ -1916,7 +1493,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
                             LoadPtr, DAG.getIntPtrConstant(ByteOffset));
       // Load a legal integer containing the sign bit.
       SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(),
-                            false, false, 0);
+                            false, false, false, 0);
       // Move the sign bit to the top bit of the loaded integer.
       unsigned BitShift = LoadTy.getSizeInBits() -
         (FloatVT.getSizeInBits() - 8 * ByteOffset);
@@ -1984,7 +1561,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
   EVT OpVT = LHS.getValueType();
   ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
   switch (TLI.getCondCodeAction(CCCode, OpVT)) {
-  default: assert(0 && "Unknown condition code action!");
+  default: llvm_unreachable("Unknown condition code action!");
   case TargetLowering::Legal:
     // Nothing to do.
     break;
@@ -1992,7 +1569,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
     ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
     unsigned Opc = 0;
     switch (CCCode) {
-    default: assert(0 && "Don't know how to expand this condition!");
+    default: llvm_unreachable("Don't know how to expand this condition!");
     case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO;  Opc = ISD::AND; break;
     case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO;  Opc = ISD::AND; break;
     case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO;  Opc = ISD::AND; break;
@@ -2058,7 +1635,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
   // Result is a load from the stack slot.
   if (SlotSize == DestSize)
     return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo,
-                       false, false, DestAlign);
+                       false, false, false, DestAlign);
 
   assert(SlotSize < DestSize && "Unknown extension!");
   return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr,
@@ -2081,7 +1658,7 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
                                  false, false, 0);
   return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr,
                      MachinePointerInfo::getFixedStack(SPFI),
-                     false, false, 0);
+                     false, false, false, 0);
 }
 
 
@@ -2127,7 +1704,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
 
   // If all elements are constants, create a load from the constant pool.
   if (isConstant) {
-    std::vector<Constant*> CV;
+    SmallVector<Constant*, 16> CV;
     for (unsigned i = 0, e = NumElems; i != e; ++i) {
       if (ConstantFPSDNode *V =
           dyn_cast<ConstantFPSDNode>(Node->getOperand(i))) {
@@ -2155,7 +1732,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
     unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
     return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
                        MachinePointerInfo::getConstantPool(),
-                       false, false, Alignment);
+                       false, false, false, Alignment);
   }
 
   if (!MoreThanTwoValues) {
@@ -2190,12 +1767,6 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
 // and leave the Hi part unset.
 SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
                                             bool isSigned) {
-  assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
-  // The input chain to this libcall is the entry node of the function.
-  // Legalizing the call will automatically add the previous call to the
-  // dependence.
-  SDValue InChain = DAG.getEntryNode();
-
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
   for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
@@ -2209,26 +1780,31 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
   SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
                                          TLI.getPointerTy());
 
-  // Splice the libcall in wherever FindInputOutputChains tells us to.
   Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
 
+  // By default, the input chain to this libcall is the entry node of the
+  // function. If the libcall is going to be emitted as a tail call then
+  // TLI.isUsedByReturnOnly will change it to the right chain if the return
+  // node which is being folded has a non-entry input chain.
+  SDValue InChain = DAG.getEntryNode();
+
   // isTailCall may be true since the callee does not reference caller stack
   // frame. Check if it's in the right position.
-  bool isTailCall = isInTailCallPosition(DAG, Node, TLI);
+  SDValue TCChain = InChain;
+  bool isTailCall = isInTailCallPosition(DAG, Node, TCChain, TLI);
+  if (isTailCall)
+    InChain = TCChain;
+
   std::pair<SDValue, SDValue> CallInfo =
     TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
                     0, TLI.getLibcallCallingConv(LC), isTailCall,
-                    /*isReturnValueUsed=*/true,
+                    /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
                     Callee, Args, DAG, Node->getDebugLoc());
 
   if (!CallInfo.second.getNode())
     // It's a tailcall, return the chain (which is the DAG root).
     return DAG.getRoot();
 
-  // Legalize the call sequence, starting with the chain.  This will advance
-  // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
-  // was added by LowerCallTo (guaranteeing proper serialization of calls).
-  LegalizeOp(CallInfo.second);
   return CallInfo.first;
 }
 
@@ -2254,15 +1830,10 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
   Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
   std::pair<SDValue,SDValue> CallInfo =
   TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
-                  false, 0, TLI.getLibcallCallingConv(LC), false,
-                  /*isReturnValueUsed=*/true,
+                  false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
+                  /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
                   Callee, Args, DAG, dl);
 
-  // Legalize the call sequence, starting with the chain.  This will advance
-  // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
-  // was added by LowerCallTo (guaranteeing proper serialization of calls).
-  LegalizeOp(CallInfo.second);
-
   return CallInfo.first;
 }
 
@@ -2272,7 +1843,6 @@ std::pair<SDValue, SDValue>
 SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
                                          SDNode *Node,
                                          bool isSigned) {
-  assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
   SDValue InChain = Node->getOperand(0);
 
   TargetLowering::ArgListTy Args;
@@ -2289,18 +1859,13 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
   SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
                                          TLI.getPointerTy());
 
-  // Splice the libcall in wherever FindInputOutputChains tells us to.
   Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
   std::pair<SDValue, SDValue> CallInfo =
     TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
                     0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
-                    /*isReturnValueUsed=*/true,
+                    /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
                     Callee, Args, DAG, Node->getDebugLoc());
 
-  // Legalize the call sequence, starting with the chain.  This will advance
-  // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
-  // was added by LowerCallTo (guaranteeing proper serialization of calls).
-  LegalizeOp(CallInfo.second);
   return CallInfo;
 }
 
@@ -2311,7 +1876,7 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
                                               RTLIB::Libcall Call_PPCF128) {
   RTLIB::Libcall LC;
   switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
-  default: assert(0 && "Unexpected request for libcall!");
+  default: llvm_unreachable("Unexpected request for libcall!");
   case MVT::f32: LC = Call_F32; break;
   case MVT::f64: LC = Call_F64; break;
   case MVT::f80: LC = Call_F80; break;
@@ -2328,7 +1893,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
                                                RTLIB::Libcall Call_I128) {
   RTLIB::Libcall LC;
   switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
-  default: assert(0 && "Unexpected request for libcall!");
+  default: llvm_unreachable("Unexpected request for libcall!");
   case MVT::i8:   LC = Call_I8; break;
   case MVT::i16:  LC = Call_I16; break;
   case MVT::i32:  LC = Call_I32; break;
@@ -2343,7 +1908,7 @@ static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
                                      const TargetLowering &TLI) {
   RTLIB::Libcall LC;
   switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
-  default: assert(0 && "Unexpected request for libcall!");
+  default: llvm_unreachable("Unexpected request for libcall!");
   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
@@ -2388,7 +1953,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
 
   RTLIB::Libcall LC;
   switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
-  default: assert(0 && "Unexpected request for libcall!");
+  default: llvm_unreachable("Unexpected request for libcall!");
   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
@@ -2426,21 +1991,16 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
   SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
                                          TLI.getPointerTy());
 
-  // Splice the libcall in wherever FindInputOutputChains tells us to.
   DebugLoc dl = Node->getDebugLoc();
   std::pair<SDValue, SDValue> CallInfo =
     TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
                     0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
-                    /*isReturnValueUsed=*/true, Callee, Args, DAG, dl);
-
-  // Legalize the call sequence, starting with the chain.  This will advance
-  // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that
-  // was added by LowerCallTo (guaranteeing proper serialization of calls).
-  LegalizeOp(CallInfo.second);
+                    /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+                    Callee, Args, DAG, dl);
 
   // Remainder is loaded back from the stack frame.
-  SDValue Rem = DAG.getLoad(RetVT, dl, LastCALLSEQ_END, FIPtr,
-                            MachinePointerInfo(), false, false, 0);
+  SDValue Rem = DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr,
+                            MachinePointerInfo(), false, false, false, 0);
   Results.push_back(CallInfo.first);
   Results.push_back(Rem);
 }
@@ -2489,7 +2049,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
                                   false, false, 0);
     // load the constructed double
     SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot,
-                               MachinePointerInfo(), false, false, 0);
+                               MachinePointerInfo(), false, false, false, 0);
     // FP constant to bias correct the final result
     SDValue Bias = DAG.getConstantFP(isSigned ?
                                      BitsToDouble(0x4330000080000000ULL) :
@@ -2611,7 +2171,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
   // offset depending on the data type.
   uint64_t FF;
   switch (Op0.getValueType().getSimpleVT().SimpleTy) {
-  default: assert(0 && "Unsupported integer type!");
+  default: llvm_unreachable("Unsupported integer type!");
   case MVT::i8 : FF = 0x43800000ULL; break;  // 2^8  (as a float)
   case MVT::i16: FF = 0x47800000ULL; break;  // 2^16 (as a float)
   case MVT::i32: FF = 0x4F800000ULL; break;  // 2^32 (as a float)
@@ -2629,13 +2189,15 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
   if (DestVT == MVT::f32)
     FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx,
                              MachinePointerInfo::getConstantPool(),
-                             false, false, Alignment);
+                             false, false, false, Alignment);
   else {
-    FudgeInReg =
-      LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
-                                DAG.getEntryNode(), CPIdx,
-                                MachinePointerInfo::getConstantPool(),
-                                MVT::f32, false, false, Alignment));
+    SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
+                                  DAG.getEntryNode(), CPIdx,
+                                  MachinePointerInfo::getConstantPool(),
+                                  MVT::f32, false, false, Alignment);
+    HandleSDNode Handle(Load);
+    LegalizeOp(Load.getNode());
+    FudgeInReg = Handle.getValue();
   }
 
   return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg);
@@ -2731,7 +2293,7 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {
   EVT SHVT = TLI.getShiftAmountTy(VT);
   SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
   switch (VT.getSimpleVT().SimpleTy) {
-  default: assert(0 && "Unhandled Expand type in BSWAP!");
+  default: llvm_unreachable("Unhandled Expand type in BSWAP!");
   case MVT::i16:
     Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
     Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
@@ -2788,7 +2350,7 @@ static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {
 SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
                                              DebugLoc dl) {
   switch (Opc) {
-  default: assert(0 && "Cannot expand this yet!");
+  default: llvm_unreachable("Cannot expand this yet!");
   case ISD::CTPOP: {
     EVT VT = Op.getValueType();
     EVT ShVT = TLI.getShiftAmountTy(VT);
@@ -2831,6 +2393,9 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
 
     return Op;
   }
+  case ISD::CTLZ_ZERO_UNDEF:
+    // This trivially expands to CTLZ.
+    return DAG.getNode(ISD::CTLZ, dl, Op.getValueType(), Op);
   case ISD::CTLZ: {
     // for now, we do this:
     // x = x | (x >> 1);
@@ -2852,6 +2417,9 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
     Op = DAG.getNOT(dl, Op, VT);
     return DAG.getNode(ISD::CTPOP, dl, VT, Op);
   }
+  case ISD::CTTZ_ZERO_UNDEF:
+    // This trivially expands to CTTZ.
+    return DAG.getNode(ISD::CTTZ, dl, Op.getValueType(), Op);
   case ISD::CTTZ: {
     // for now, we use: { return popcount(~x & (x - 1)); }
     // unless the target has ctlz but not ctpop, in which case we use:
@@ -2881,7 +2449,6 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
   switch (Opc) {
   default:
     llvm_unreachable("Unhandled atomic intrinsic Expand!");
-    break;
   case ISD::ATOMIC_SWAP:
     switch (VT.SimpleTy) {
     default: llvm_unreachable("Unexpected value type for atomic!");
@@ -2959,14 +2526,16 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
   return ExpandChainLibCall(LC, Node, false);
 }
 
-void SelectionDAGLegalize::ExpandNode(SDNode *Node,
-                                      SmallVectorImpl<SDValue> &Results) {
+void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
+  SmallVector<SDValue, 8> Results;
   DebugLoc dl = Node->getDebugLoc();
   SDValue Tmp1, Tmp2, Tmp3, Tmp4;
   switch (Node->getOpcode()) {
   case ISD::CTPOP:
   case ISD::CTLZ:
+  case ISD::CTLZ_ZERO_UNDEF:
   case ISD::CTTZ:
+  case ISD::CTTZ_ZERO_UNDEF:
     Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl);
     Results.push_back(Tmp1);
     break;
@@ -2986,7 +2555,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
   case ISD::PREFETCH:
   case ISD::VAEND:
   case ISD::EH_SJLJ_LONGJMP:
-  case ISD::EH_SJLJ_DISPATCHSETUP:
     // If the target didn't expand these, there's nothing to do, so just
     // preserve the chain and be done.
     Results.push_back(Node->getOperand(0));
@@ -3006,7 +2574,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
       TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
                       false, false, false, false, 0, CallingConv::C,
                       /*isTailCall=*/false,
-                      /*isReturnValueUsed=*/true,
+                      /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
                       DAG.getExternalSymbol("__sync_synchronize",
                                             TLI.getPointerTy()),
                       Args, DAG, dl);
@@ -3083,7 +2651,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
       TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
                       false, false, false, false, 0, CallingConv::C,
                       /*isTailCall=*/false,
-                      /*isReturnValueUsed=*/true,
+                      /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
                       DAG.getExternalSymbol("abort", TLI.getPointerTy()),
                       Args, DAG, dl);
     Results.push_back(CallResult.second);
@@ -3166,7 +2734,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     unsigned Align = Node->getConstantOperandVal(3);
 
     SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2,
-                                     MachinePointerInfo(V), false, false, 0);
+                                     MachinePointerInfo(V), 
+                                     false, false, false, 0);
     SDValue VAList = VAListLoad;
 
     if (Align > TLI.getMinStackArgumentAlignment()) {
@@ -3191,7 +2760,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
                         MachinePointerInfo(V), false, false, 0);
     // Load the actual argument out of the pointer VAList
     Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(),
-                                  false, false, 0));
+                                  false, false, false, 0));
     Results.push_back(Results[0].getValue(1));
     break;
   }
@@ -3202,7 +2771,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
     Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0),
                        Node->getOperand(2), MachinePointerInfo(VS),
-                       false, false, 0);
+                       false, false, false, 0);
     Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1),
                         MachinePointerInfo(VD), false, false, 0);
     Results.push_back(Tmp1);
@@ -3236,15 +2805,57 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
                                               Node->getOperand(2), dl));
     break;
   case ISD::VECTOR_SHUFFLE: {
-    SmallVector<int, 8> Mask;
-    cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
+    SmallVector<int, 32> NewMask;
+    ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Node)->getMask();
 
     EVT VT = Node->getValueType(0);
     EVT EltVT = VT.getVectorElementType();
-    if (!TLI.isTypeLegal(EltVT))
-      EltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT);
+    SDValue Op0 = Node->getOperand(0);
+    SDValue Op1 = Node->getOperand(1);
+    if (!TLI.isTypeLegal(EltVT)) {
+
+      EVT NewEltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT);
+
+      // BUILD_VECTOR operands are allowed to be wider than the element type.
+      // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept it
+      if (NewEltVT.bitsLT(EltVT)) {
+
+        // Convert shuffle node.
+        // If original node was v4i64 and the new EltVT is i32,
+        // cast operands to v8i32 and re-build the mask.
+
+        // Calculate new VT, the size of the new VT should be equal to original.
+        EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, 
+                                      VT.getSizeInBits()/NewEltVT.getSizeInBits());
+        assert(NewVT.bitsEq(VT));
+
+        // cast operands to new VT
+        Op0 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op0);
+        Op1 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op1);
+
+        // Convert the shuffle mask
+        unsigned int factor = NewVT.getVectorNumElements()/VT.getVectorNumElements();
+
+        // EltVT gets smaller
+        assert(factor > 0);
+
+        for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
+          if (Mask[i] < 0) {
+            for (unsigned fi = 0; fi < factor; ++fi)
+              NewMask.push_back(Mask[i]);
+          }
+          else {
+            for (unsigned fi = 0; fi < factor; ++fi)
+              NewMask.push_back(Mask[i]*factor+fi);
+          }
+        }
+        Mask = NewMask;
+        VT = NewVT;
+      }
+      EltVT = NewEltVT;
+    }
     unsigned NumElems = VT.getVectorNumElements();
-    SmallVector<SDValue, 8> Ops;
+    SmallVector<SDValue, 16> Ops;
     for (unsigned i = 0; i != NumElems; ++i) {
       if (Mask[i] < 0) {
         Ops.push_back(DAG.getUNDEF(EltVT));
@@ -3253,14 +2864,17 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
       unsigned Idx = Mask[i];
       if (Idx < NumElems)
         Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
-                                  Node->getOperand(0),
+                                  Op0,
                                   DAG.getIntPtrConstant(Idx)));
       else
         Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
-                                  Node->getOperand(1),
+                                  Op1,
                                   DAG.getIntPtrConstant(Idx - NumElems)));
     }
+
     Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
+    // We may have changed the BUILD_VECTOR type. Cast it back to the Node type.
+    Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), Tmp1);
     Results.push_back(Tmp1);
     break;
   }
@@ -3408,10 +3022,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node);
     // Check to see if this FP immediate is already legal.
     // If this is a legal constant, turn it into a TargetConstantFP node.
-    if (TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0)))
-      Results.push_back(SDValue(Node, 0));
-    else
-      Results.push_back(ExpandConstantFP(CFP, true, DAG, TLI));
+    if (!TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0)))
+      Results.push_back(ExpandConstantFP(CFP, true));
     break;
   }
   case ISD::EHSELECTION: {
@@ -3423,13 +3035,23 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     break;
   }
   case ISD::EXCEPTIONADDR: {
-    unsigned Reg = TLI.getExceptionAddressRegister();
+    unsigned Reg = TLI.getExceptionPointerRegister();
     assert(Reg && "Can't expand to unknown register!");
     Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, Reg,
                                          Node->getValueType(0)));
     Results.push_back(Results[0].getValue(1));
     break;
   }
+  case ISD::FSUB: {
+    EVT VT = Node->getValueType(0);
+    assert(TLI.isOperationLegalOrCustom(ISD::FADD, VT) &&
+           TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
+           "Don't know how to expand this FP subtraction!");
+    Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1));
+    Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1);
+    Results.push_back(Tmp1);
+    break;
+  }
   case ISD::SUB: {
     EVT VT = Node->getValueType(0);
     assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
@@ -3657,6 +3279,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
                                DAG.getIntPtrConstant(0));
       TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
                             DAG.getIntPtrConstant(1));
+      // Ret is a node with an illegal type. Because such things are not
+      // generally permitted during this phase of legalization, delete the
+      // node. The above EXTRACT_ELEMENT nodes should have been folded.
+      DAG.DeleteNode(Ret.getNode());
     }
 
     if (isSigned) {
@@ -3797,7 +3423,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
 
     LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()),
                           Tmp2, Tmp3, Tmp4, dl);
-    LastCALLSEQ_END = DAG.getEntryNode();
 
     assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!");
     Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
@@ -3807,6 +3432,35 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     Results.push_back(Tmp1);
     break;
   }
+  case ISD::BUILD_VECTOR:
+    Results.push_back(ExpandBUILD_VECTOR(Node));
+    break;
+  case ISD::SRA:
+  case ISD::SRL:
+  case ISD::SHL: {
+    // Scalarize vector SRA/SRL/SHL.
+    EVT VT = Node->getValueType(0);
+    assert(VT.isVector() && "Unable to legalize non-vector shift");
+    assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal");
+    unsigned NumElem = VT.getVectorNumElements();
+
+    SmallVector<SDValue, 8> Scalars;
+    for (unsigned Idx = 0; Idx < NumElem; Idx++) {
+      SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+                               VT.getScalarType(),
+                               Node->getOperand(0), DAG.getIntPtrConstant(Idx));
+      SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+                               VT.getScalarType(),
+                               Node->getOperand(1), DAG.getIntPtrConstant(Idx));
+      Scalars.push_back(DAG.getNode(Node->getOpcode(), dl,
+                                    VT.getScalarType(), Ex, Sh));
+    }
+    SDValue Result =
+      DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0),
+                  &Scalars[0], Scalars.size());
+    ReplaceNode(SDValue(Node, 0), Result);
+    break;
+  }
   case ISD::GLOBAL_OFFSET_TABLE:
   case ISD::GlobalAddress:
   case ISD::GlobalTLSAddress:
@@ -3817,13 +3471,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
   case ISD::INTRINSIC_WO_CHAIN:
   case ISD::INTRINSIC_VOID:
     // FIXME: Custom lowering for these operations shouldn't return null!
-    for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
-      Results.push_back(SDValue(Node, i));
     break;
   }
+
+  // Replace the original node with the legalized result.
+  if (!Results.empty())
+    ReplaceNode(Node, Results.data());
 }
-void SelectionDAGLegalize::PromoteNode(SDNode *Node,
-                                       SmallVectorImpl<SDValue> &Results) {
+
+void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
+  SmallVector<SDValue, 8> Results;
   EVT OVT = Node->getValueType(0);
   if (Node->getOpcode() == ISD::UINT_TO_FP ||
       Node->getOpcode() == ISD::SINT_TO_FP ||
@@ -3835,20 +3492,24 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
   SDValue Tmp1, Tmp2, Tmp3;
   switch (Node->getOpcode()) {
   case ISD::CTTZ:
+  case ISD::CTTZ_ZERO_UNDEF:
   case ISD::CTLZ:
+  case ISD::CTLZ_ZERO_UNDEF:
   case ISD::CTPOP:
     // Zero extend the argument.
     Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
-    // Perform the larger operation.
+    // Perform the larger operation. For CTPOP and CTTZ_ZERO_UNDEF, this is
+    // already the correct result.
     Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
     if (Node->getOpcode() == ISD::CTTZ) {
-      //if Tmp1 == sizeinbits(NVT) then Tmp1 = sizeinbits(Old VT)
+      // FIXME: This should set a bit in the zero extended value instead.
       Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT),
                           Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT),
                           ISD::SETEQ);
       Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2,
                           DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1);
-    } else if (Node->getOpcode() == ISD::CTLZ) {
+    } else if (Node->getOpcode() == ISD::CTLZ ||
+               Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
       // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT))
       Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1,
                           DAG.getConstant(NVT.getSizeInBits() -
@@ -3877,6 +3538,33 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
                                  Node->getOpcode() == ISD::SINT_TO_FP, dl);
     Results.push_back(Tmp1);
     break;
+  case ISD::VAARG: {
+    SDValue Chain = Node->getOperand(0); // Get the chain.
+    SDValue Ptr = Node->getOperand(1); // Get the pointer.
+
+    unsigned TruncOp;
+    if (OVT.isVector()) {
+      TruncOp = ISD::BITCAST;
+    } else {
+      assert(OVT.isInteger()
+        && "VAARG promotion is supported only for vectors or integer types");
+      TruncOp = ISD::TRUNCATE;
+    }
+
+    // Perform the larger operation, then convert back
+    Tmp1 = DAG.getVAArg(NVT, dl, Chain, Ptr, Node->getOperand(2),
+             Node->getConstantOperandVal(3));
+    Chain = Tmp1.getValue(1);
+
+    Tmp2 = DAG.getNode(TruncOp, dl, OVT, Tmp1);
+
+    // Modified the chain result - switch anything that used the old chain to
+    // use the new one.
+    DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp2);
+    DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain);
+    ReplacedNode(Node);
+    break;
+  }
   case ISD::AND:
   case ISD::OR:
   case ISD::XOR: {
@@ -3924,8 +3612,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
     break;
   }
   case ISD::VECTOR_SHUFFLE: {
-    SmallVector<int, 8> Mask;
-    cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
+    ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Node)->getMask();
 
     // Cast the two input vectors.
     Tmp1 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(0));
@@ -3950,7 +3637,31 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
                                   Tmp1, Tmp2, Node->getOperand(2)));
     break;
   }
+  case ISD::FDIV:
+  case ISD::FREM:
+  case ISD::FPOW: {
+    Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+    Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
+    Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
+    Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
+                                  Tmp3, DAG.getIntPtrConstant(0)));
+    break;
   }
+  case ISD::FLOG2:
+  case ISD::FEXP2:
+  case ISD::FLOG:
+  case ISD::FEXP: {
+    Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+    Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
+    Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
+                                  Tmp2, DAG.getIntPtrConstant(0)));
+    break;
+  }
+  }
+
+  // Replace the original node with the legalized result.
+  if (!Results.empty())
+    ReplaceNode(Node, Results.data());
 }
 
 // SelectionDAG::Legalize - This is the entry point for the file.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 7c1cc69d6a2f..e3938968b205 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -479,8 +479,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
   if (L->getExtensionType() == ISD::NON_EXTLOAD) {
     NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(),
                        NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(),
-                       L->getPointerInfo(), NVT,
-                       L->isVolatile(), L->isNonTemporal(), L->getAlignment());
+                       L->getPointerInfo(), NVT, L->isVolatile(), 
+                       L->isNonTemporal(), false, L->getAlignment());
     // Legalized the chain result - switch anything that used the old chain to
     // use the new one.
     ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
@@ -492,7 +492,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
                      L->getMemoryVT(), dl, L->getChain(),
                      L->getBasePtr(), L->getOffset(), L->getPointerInfo(),
                      L->getMemoryVT(), L->isVolatile(),
-                     L->isNonTemporal(), L->getAlignment());
+                     L->isNonTemporal(), false, L->getAlignment());
   // Legalized the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
@@ -672,7 +672,7 @@ void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
     case ISD::SETUEQ:
       LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64;
       break;
-    default: assert(false && "Do not know how to soften this setcc!");
+    default: llvm_unreachable("Do not know how to soften this setcc!");
     }
   }
 
@@ -1212,7 +1212,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
 
   switch (SrcVT.getSimpleVT().SimpleTy) {
   default:
-    assert(false && "Unsupported UINT_TO_FP!");
+    llvm_unreachable("Unsupported UINT_TO_FP!");
   case MVT::i32:
     Parts = TwoE32;
     break;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index a5c4c2ded4c5..95ddb1e0f6fb 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -20,7 +20,6 @@
 
 #include "LegalizeTypes.h"
 #include "llvm/DerivedTypes.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -57,8 +56,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::Constant:    Res = PromoteIntRes_Constant(N); break;
   case ISD::CONVERT_RNDSAT:
                          Res = PromoteIntRes_CONVERT_RNDSAT(N); break;
+  case ISD::CTLZ_ZERO_UNDEF:
   case ISD::CTLZ:        Res = PromoteIntRes_CTLZ(N); break;
   case ISD::CTPOP:       Res = PromoteIntRes_CTPOP(N); break;
+  case ISD::CTTZ_ZERO_UNDEF:
   case ISD::CTTZ:        Res = PromoteIntRes_CTTZ(N); break;
   case ISD::EXTRACT_VECTOR_ELT:
                          Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break;
@@ -211,13 +212,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
 
   switch (getTypeAction(InVT)) {
-  default:
-    assert(false && "Unknown type action!");
-    break;
   case TargetLowering::TypeLegal:
     break;
   case TargetLowering::TypePromoteInteger:
-    if (NOutVT.bitsEq(NInVT))
+    if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector() && !NInVT.isVector())
       // The input promotes to the same size.  Convert the promoted value.
       return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp));
     break;
@@ -251,9 +249,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
     return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp);
   }
   case TargetLowering::TypeWidenVector:
-    if (OutVT.bitsEq(NInVT))
-      // The input is widened to the same size.  Convert to the widened value.
-      return DAG.getNode(ISD::BITCAST, dl, OutVT, GetWidenedVector(InOp));
+    // The input is widened to the same size. Convert to the widened value.
+    // Make sure that the outgoing value is not a vector, because this would
+    // make us bitcast between two vectors which are legalized in different ways.
+    if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector())
+      return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetWidenedVector(InOp));
   }
 
   return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
@@ -312,7 +312,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
   EVT OVT = N->getValueType(0);
   EVT NVT = Op.getValueType();
-  Op = DAG.getNode(ISD::CTLZ, dl, NVT, Op);
+  Op = DAG.getNode(N->getOpcode(), dl, NVT, Op);
   // Subtract off the extra leading bits in the bigger type.
   return DAG.getNode(ISD::SUB, dl, NVT, Op,
                      DAG.getConstant(NVT.getSizeInBits() -
@@ -330,13 +330,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
   EVT OVT = N->getValueType(0);
   EVT NVT = Op.getValueType();
   DebugLoc dl = N->getDebugLoc();
-  // The count is the same in the promoted type except if the original
-  // value was zero.  This can be handled by setting the bit just off
-  // the top of the original type.
-  APInt TopBit(NVT.getSizeInBits(), 0);
-  TopBit.setBit(OVT.getSizeInBits());
-  Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT));
-  return DAG.getNode(ISD::CTTZ, dl, NVT, Op);
+  if (N->getOpcode() == ISD::CTTZ) {
+    // The count is the same in the promoted type except if the original
+    // value was zero.  This can be handled by setting the bit just off
+    // the top of the original type.
+    APInt TopBit(NVT.getSizeInBits(), 0);
+    TopBit.setBit(OVT.getSizeInBits());
+    Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT));
+  }
+  return DAG.getNode(N->getOpcode(), dl, NVT, Op);
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {
@@ -486,7 +488,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) {
-  SDValue Mask = GetPromotedInteger(N->getOperand(0));
+  SDValue Mask = N->getOperand(0);
+  EVT OpTy = N->getOperand(1).getValueType();
+
+  // Promote all the way up to the canonical SetCC type.
+  Mask = PromoteTargetBoolean(Mask, TLI.getSetCCResultType(OpTy));
   SDValue LHS = GetPromotedInteger(N->getOperand(1));
   SDValue RHS = GetPromotedInteger(N->getOperand(2));
   return DAG.getNode(ISD::VSELECT, N->getDebugLoc(),
@@ -1098,8 +1104,10 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::AssertZext:  ExpandIntRes_AssertZext(N, Lo, Hi); break;
   case ISD::BSWAP:       ExpandIntRes_BSWAP(N, Lo, Hi); break;
   case ISD::Constant:    ExpandIntRes_Constant(N, Lo, Hi); break;
+  case ISD::CTLZ_ZERO_UNDEF:
   case ISD::CTLZ:        ExpandIntRes_CTLZ(N, Lo, Hi); break;
   case ISD::CTPOP:       ExpandIntRes_CTPOP(N, Lo, Hi); break;
+  case ISD::CTTZ_ZERO_UNDEF:
   case ISD::CTTZ:        ExpandIntRes_CTTZ(N, Lo, Hi); break;
   case ISD::FP_TO_SINT:  ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
   case ISD::FP_TO_UINT:  ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
@@ -1171,7 +1179,6 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
   switch (Opc) {
   default:
     llvm_unreachable("Unhandled atomic intrinsic Expand!");
-    break;
   case ISD::ATOMIC_SWAP:
     switch (VT.SimpleTy) {
     default: llvm_unreachable("Unexpected value type for atomic!");
@@ -1355,7 +1362,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
 
   APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits));
   APInt KnownZero, KnownOne;
-  DAG.ComputeMaskedBits(N->getOperand(1), HighBitMask, KnownZero, KnownOne);
+  DAG.ComputeMaskedBits(N->getOperand(1), KnownZero, KnownOne);
 
   // If we don't know anything about the high bits, exit.
   if (((KnownZero|KnownOne) & HighBitMask) == 0)
@@ -1390,15 +1397,15 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
     }
   }
 
-#if 0
-  // FIXME: This code is broken for shifts with a zero amount!
   // If we know that all of the high bits of the shift amount are zero, then we
   // can do this as a couple of simple shifts.
   if ((KnownZero & HighBitMask) == HighBitMask) {
-    // Compute 32-amt.
-    SDValue Amt2 = DAG.getNode(ISD::SUB, ShTy,
-                                 DAG.getConstant(NVTBits, ShTy),
-                                 Amt);
+    // Calculate 31-x. 31 is used instead of 32 to avoid creating an undefined
+    // shift if x is zero.  We can use XOR here because x is known to be smaller
+    // than 32.
+    SDValue Amt2 = DAG.getNode(ISD::XOR, dl, ShTy, Amt,
+                               DAG.getConstant(NVTBits-1, ShTy));
+
     unsigned Op1, Op2;
     switch (N->getOpcode()) {
     default: llvm_unreachable("Unknown shift");
@@ -1407,13 +1414,23 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
     case ISD::SRA:  Op1 = ISD::SRL; Op2 = ISD::SHL; break;
     }
 
-    Lo = DAG.getNode(N->getOpcode(), NVT, InL, Amt);
-    Hi = DAG.getNode(ISD::OR, NVT,
-                     DAG.getNode(Op1, NVT, InH, Amt),
-                     DAG.getNode(Op2, NVT, InL, Amt2));
+    // When shifting right the arithmetic for Lo and Hi is swapped.
+    if (N->getOpcode() != ISD::SHL)
+      std::swap(InL, InH);
+
+    // Use a little trick to get the bits that move from Lo to Hi. First
+    // shift by one bit.
+    SDValue Sh1 = DAG.getNode(Op2, dl, NVT, InL, DAG.getConstant(1, ShTy));
+    // Then compute the remaining shift with amount-1.
+    SDValue Sh2 = DAG.getNode(Op2, dl, NVT, Sh1, Amt2);
+
+    Lo = DAG.getNode(N->getOpcode(), dl, NVT, InL, Amt);
+    Hi = DAG.getNode(ISD::OR, dl, NVT, DAG.getNode(Op1, dl, NVT, InH, Amt),Sh2);
+
+    if (N->getOpcode() != ISD::SHL)
+      std::swap(Hi, Lo);
     return true;
   }
-#endif
 
   return false;
 }
@@ -1493,8 +1510,6 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
     Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
     return true;
   }
-
-  return false;
 }
 
 void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
@@ -1702,8 +1717,8 @@ void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,
   SDValue HiNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Hi,
                                    DAG.getConstant(0, NVT), ISD::SETNE);
 
-  SDValue LoLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Lo);
-  SDValue HiLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Hi);
+  SDValue LoLZ = DAG.getNode(N->getOpcode(), dl, NVT, Lo);
+  SDValue HiLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, NVT, Hi);
 
   Lo = DAG.getNode(ISD::SELECT, dl, NVT, HiNotZero, HiLZ,
                    DAG.getNode(ISD::ADD, dl, NVT, LoLZ,
@@ -1732,8 +1747,8 @@ void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
   SDValue LoNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo,
                                    DAG.getConstant(0, NVT), ISD::SETNE);
 
-  SDValue LoLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Lo);
-  SDValue HiLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Hi);
+  SDValue LoLZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, NVT, Lo);
+  SDValue HiLZ = DAG.getNode(N->getOpcode(), dl, NVT, Hi);
 
   Lo = DAG.getNode(ISD::SELECT, dl, NVT, LoNotZero, LoLZ,
                    DAG.getNode(ISD::ADD, dl, NVT, HiLZ,
@@ -1778,6 +1793,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
   unsigned Alignment = N->getAlignment();
   bool isVolatile = N->isVolatile();
   bool isNonTemporal = N->isNonTemporal();
+  bool isInvariant = N->isInvariant();
   DebugLoc dl = N->getDebugLoc();
 
   assert(NVT.isByteSized() && "Expanded type not byte sized!");
@@ -1808,7 +1824,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
   } else if (TLI.isLittleEndian()) {
     // Little-endian - low bits are at low addresses.
     Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(),
-                     isVolatile, isNonTemporal, Alignment);
+                     isVolatile, isNonTemporal, isInvariant, Alignment);
 
     unsigned ExcessBits =
       N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
@@ -2305,12 +2321,14 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
   SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT);
   std::pair<SDValue, SDValue> CallInfo =
     TLI.LowerCallTo(Chain, RetTy, true, false, false, false,
-		    0, TLI.getLibcallCallingConv(LC), false,
-		    true, Func, Args, DAG, dl);
+		    0, TLI.getLibcallCallingConv(LC),
+                    /*isTailCall=*/false,
+		    /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+                    Func, Args, DAG, dl);
 
   SplitInteger(CallInfo.first, Lo, Hi);
   SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp,
-			      MachinePointerInfo(), false, false, 0);
+			      MachinePointerInfo(), false, false, false, 0);
   SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2,
                              DAG.getConstant(0, PtrVT),
                              ISD::SETNE);
@@ -2781,7 +2799,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
     else if (SrcVT == MVT::i128)
       FF = APInt(32, F32TwoE128);
     else
-      assert(false && "Unsupported UINT_TO_FP!");
+      llvm_unreachable("Unsupported UINT_TO_FP!");
 
     // Check whether the sign bit is set.
     SDValue Lo, Hi;
@@ -2926,38 +2944,28 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) {
 SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
 
-  SDValue Op0 = N->getOperand(1);
-  SDValue Op1 = N->getOperand(1);
-  assert(Op0.getValueType() == Op1.getValueType() &&
-         "Invalid input vector types");
-
   EVT OutVT = N->getValueType(0);
   EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
   assert(NOutVT.isVector() && "This type must be promoted to a vector type");
 
+  EVT InElemTy = OutVT.getVectorElementType();
   EVT OutElemTy = NOutVT.getVectorElementType();
 
-  unsigned NumElem0 = Op0.getValueType().getVectorNumElements();
-  unsigned NumElem1 = Op1.getValueType().getVectorNumElements();
+  unsigned NumElem = N->getOperand(0).getValueType().getVectorNumElements();
   unsigned NumOutElem = NOutVT.getVectorNumElements();
-  assert(NumElem0 + NumElem1 == NumOutElem &&
-         "Invalid number of incoming elements");
+  unsigned NumOperands = N->getNumOperands();
+  assert(NumElem * NumOperands == NumOutElem &&
+         "Unexpected number of elements");
 
   // Take the elements from the first vector.
   SmallVector<SDValue, 8> Ops(NumOutElem);
-  for (unsigned i = 0; i < NumElem0; ++i) {
-    SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-                              Op0.getValueType().getScalarType(), Op0,
-                              DAG.getIntPtrConstant(i));
-    Ops[i] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext);
-  }
-
-  // Take the elements from the second vector
-  for (unsigned i = 0; i < NumElem1; ++i) {
-    SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-                              Op1.getValueType().getScalarType(), Op1,
-                              DAG.getIntPtrConstant(i));
-    Ops[i + NumElem0] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext);
+  for (unsigned i = 0; i < NumOperands; ++i) {
+    SDValue Op = N->getOperand(i);
+    for (unsigned j = 0; j < NumElem; ++j) {
+      SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+                                InElemTy, Op, DAG.getIntPtrConstant(j));
+      Ops[i * NumElem + j] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext);
+    }
   }
 
   return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size());
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index a4bb577433cc..439aa4de5cf5 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -222,8 +222,6 @@ bool DAGTypeLegalizer::run() {
     for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) {
       EVT ResultVT = N->getValueType(i);
       switch (getTypeAction(ResultVT)) {
-      default:
-        assert(false && "Unknown action!");
       case TargetLowering::TypeLegal:
         break;
       // The following calls must take care of *all* of the node's results,
@@ -275,8 +273,6 @@ ScanOperands:
 
       EVT OpVT = N->getOperand(i).getValueType();
       switch (getTypeAction(OpVT)) {
-      default:
-        assert(false && "Unknown action!");
       case TargetLowering::TypeLegal:
         continue;
       // The following calls must either replace all of the node's results
@@ -752,7 +748,11 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
 }
 
 void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
-  assert(Result.getValueType() == Op.getValueType().getVectorElementType() &&
+  // Note that in some cases vector operation operands may be greater than
+  // the vector element type. For example BUILD_VECTOR of type <1 x i1> with
+  // a constant i8 operand.
+  assert(Result.getValueType().getSizeInBits() >=
+         Op.getValueType().getVectorElementType().getSizeInBits() &&
          "Invalid type for scalarized vector");
   AnalyzeNewValue(Result);
 
@@ -889,7 +889,7 @@ SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,
                                MachinePointerInfo(), false, false, 0);
   // Result is a load from the stack slot.
   return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo(),
-                     false, false, 0);
+                     false, false, false, 0);
 }
 
 /// CustomLowerNode - Replace the node's results with custom code provided
@@ -1056,8 +1056,9 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
   Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
   std::pair<SDValue,SDValue> CallInfo =
     TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
-                    false, 0, TLI.getLibcallCallingConv(LC), false,
-                    /*isReturnValueUsed=*/true,
+                    false, 0, TLI.getLibcallCallingConv(LC),
+                    /*isTailCall=*/false,
+                    /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
                     Callee, Args, DAG, dl);
   return CallInfo.first;
 }
@@ -1084,12 +1085,11 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC,
   SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
                                          TLI.getPointerTy());
 
-  // Splice the libcall in wherever FindInputOutputChains tells us to.
   Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
   std::pair<SDValue, SDValue> CallInfo =
     TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
                     0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
-                    /*isReturnValueUsed=*/true,
+                    /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
                     Callee, Args, DAG, Node->getDebugLoc());
 
   return CallInfo;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index abacdac686bc..e8664458e9a6 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -521,6 +521,7 @@ private:
   SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
   SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
   SDValue ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N);
+  SDValue ScalarizeVecRes_VSELECT(SDNode *N);
   SDValue ScalarizeVecRes_SELECT(SDNode *N);
   SDValue ScalarizeVecRes_SELECT_CC(SDNode *N);
   SDValue ScalarizeVecRes_SETCC(SDNode *N);
@@ -633,6 +634,7 @@ private:
   SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
   SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
   SDValue WidenVecOp_STORE(SDNode* N);
+  SDValue WidenVecOp_SETCC(SDNode* N);
 
   SDValue WidenVecOp_Convert(SDNode *N);
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 8e7e4985e4d0..a8ff7c65abde 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -21,7 +21,6 @@
 
 #include "LegalizeTypes.h"
 #include "llvm/Target/TargetData.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -46,8 +45,6 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
 
   // Handle some special cases efficiently.
   switch (getTypeAction(InVT)) {
-    default:
-      assert(false && "Unknown type action!");
     case TargetLowering::TypeLegal:
     case TargetLowering::TypePromoteInteger:
       break;
@@ -130,7 +127,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
                                false, false, 0);
 
   // Load the first half from the stack slot.
-  Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, false, false, 0);
+  Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, 
+                   false, false, false, 0);
 
   // Increment the pointer to the other half.
   unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
@@ -140,7 +138,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
   // Load the second half from the stack slot.
   Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr,
                    PtrInfo.getWithOffset(IncrementSize), false,
-                   false, MinAlign(Alignment, IncrementSize));
+                   false, false, MinAlign(Alignment, IncrementSize));
 
   // Handle endianness of the load.
   if (TLI.isBigEndian())
@@ -212,11 +210,12 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
   unsigned Alignment = LD->getAlignment();
   bool isVolatile = LD->isVolatile();
   bool isNonTemporal = LD->isNonTemporal();
+  bool isInvariant = LD->isInvariant();
 
   assert(NVT.isByteSized() && "Expanded type not byte sized!");
 
   Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
-                   isVolatile, isNonTemporal, Alignment);
+                   isVolatile, isNonTemporal, isInvariant, Alignment);
 
   // Increment the pointer to the other half.
   unsigned IncrementSize = NVT.getSizeInBits() / 8;
@@ -224,7 +223,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
                     DAG.getIntPtrConstant(IncrementSize));
   Hi = DAG.getLoad(NVT, dl, Chain, Ptr,
                    LD->getPointerInfo().getWithOffset(IncrementSize),
-                   isVolatile, isNonTemporal,
+                   isVolatile, isNonTemporal, isInvariant,
                    MinAlign(Alignment, IncrementSize));
 
   // Build a factor node to remember that this load is independent of the
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index f815b00db5d6..3ae8345bd198 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -64,6 +64,8 @@ class VectorLegalizer {
   // Implement vselect in terms of XOR, AND, OR when blend is not supported
   // by the target.
   SDValue ExpandVSELECT(SDValue Op);
+  SDValue ExpandLoad(SDValue Op);
+  SDValue ExpandStore(SDValue Op);
   SDValue ExpandFNEG(SDValue Op);
   // Implements vector promotion; this is essentially just bitcasting the
   // operands to a different type and bitcasting the result back to the
@@ -124,6 +126,33 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   SDValue Result =
     SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops.data(), Ops.size()), 0);
 
+  if (Op.getOpcode() == ISD::LOAD) {
+    LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
+    ISD::LoadExtType ExtType = LD->getExtensionType();
+    if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) {
+      if (TLI.isLoadExtLegal(LD->getExtensionType(), LD->getMemoryVT()))
+        return TranslateLegalizeResults(Op, Result);
+      Changed = true;
+      return LegalizeOp(ExpandLoad(Op));
+    }
+  } else if (Op.getOpcode() == ISD::STORE) {
+    StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
+    EVT StVT = ST->getMemoryVT();
+    EVT ValVT = ST->getValue().getValueType();
+    if (StVT.isVector() && ST->isTruncatingStore())
+      switch (TLI.getTruncStoreAction(ValVT, StVT)) {
+      default: llvm_unreachable("This action is not supported yet!");
+      case TargetLowering::Legal:
+        return TranslateLegalizeResults(Op, Result);
+      case TargetLowering::Custom:
+        Changed = true;
+        return LegalizeOp(TLI.LowerOperation(Result, DAG));
+      case TargetLowering::Expand:
+        Changed = true;
+        return LegalizeOp(ExpandStore(Op));
+      }
+  }
+
   bool HasVectorValue = false;
   for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
        J != E;
@@ -156,8 +185,10 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::SRL:
   case ISD::ROTL:
   case ISD::ROTR:
-  case ISD::CTTZ:
   case ISD::CTLZ:
+  case ISD::CTTZ:
+  case ISD::CTLZ_ZERO_UNDEF:
+  case ISD::CTTZ_ZERO_UNDEF:
   case ISD::CTPOP:
   case ISD::SELECT:
   case ISD::VSELECT:
@@ -262,6 +293,97 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
   return DAG.getNode(ISD::BITCAST, dl, VT, Op);
 }
 
+
+SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
+  DebugLoc dl = Op.getDebugLoc();
+  LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
+  SDValue Chain = LD->getChain();
+  SDValue BasePTR = LD->getBasePtr();
+  EVT SrcVT = LD->getMemoryVT();
+  ISD::LoadExtType ExtType = LD->getExtensionType();
+
+  SmallVector<SDValue, 8> LoadVals;
+  SmallVector<SDValue, 8> LoadChains;
+  unsigned NumElem = SrcVT.getVectorNumElements();
+  unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
+
+  for (unsigned Idx=0; Idx<NumElem; Idx++) {
+    SDValue ScalarLoad = DAG.getExtLoad(ExtType, dl,
+              Op.getNode()->getValueType(0).getScalarType(),
+              Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride),
+              SrcVT.getScalarType(),
+              LD->isVolatile(), LD->isNonTemporal(),
+              LD->getAlignment());
+
+    BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
+                       DAG.getIntPtrConstant(Stride));
+
+     LoadVals.push_back(ScalarLoad.getValue(0));
+     LoadChains.push_back(ScalarLoad.getValue(1));
+  }
+
+  SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+            &LoadChains[0], LoadChains.size());
+  SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
+            Op.getNode()->getValueType(0), &LoadVals[0], LoadVals.size());
+
+  AddLegalizedOperand(Op.getValue(0), Value);
+  AddLegalizedOperand(Op.getValue(1), NewChain);
+
+  return (Op.getResNo() ? NewChain : Value);
+}
+
+SDValue VectorLegalizer::ExpandStore(SDValue Op) {
+  DebugLoc dl = Op.getDebugLoc();
+  StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
+  SDValue Chain = ST->getChain();
+  SDValue BasePTR = ST->getBasePtr();
+  SDValue Value = ST->getValue();
+  EVT StVT = ST->getMemoryVT();
+
+  unsigned Alignment = ST->getAlignment();
+  bool isVolatile = ST->isVolatile();
+  bool isNonTemporal = ST->isNonTemporal();
+
+  unsigned NumElem = StVT.getVectorNumElements();
+  // The type of the data we want to save
+  EVT RegVT = Value.getValueType();
+  EVT RegSclVT = RegVT.getScalarType();
+  // The type of data as saved in memory.
+  EVT MemSclVT = StVT.getScalarType();
+
+  // Cast floats into integers
+  unsigned ScalarSize = MemSclVT.getSizeInBits();
+
+  // Round odd types to the next pow of two.
+  if (!isPowerOf2_32(ScalarSize))
+    ScalarSize = NextPowerOf2(ScalarSize);
+
+  // Store Stride in bytes
+  unsigned Stride = ScalarSize/8;
+  // Extract each of the elements from the original vector
+  // and save them into memory individually.
+  SmallVector<SDValue, 8> Stores;
+  for (unsigned Idx = 0; Idx < NumElem; Idx++) {
+    SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+               RegSclVT, Value, DAG.getIntPtrConstant(Idx));
+
+    // This scalar TruncStore may be illegal, but we legalize it later.
+    SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR,
+               ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT,
+               isVolatile, isNonTemporal, Alignment);
+
+    BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
+                                DAG.getIntPtrConstant(Stride));
+
+    Stores.push_back(Store);
+  }
+  SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                            &Stores[0], Stores.size());
+  AddLegalizedOperand(Op, TF);
+  return TF;
+}
+
 SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
   // Implement VSELECT in terms of XOR, AND, OR
   // on platforms which do not support blend natively.
@@ -274,10 +396,12 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
 
   // If we can't even use the basic vector operations of
   // AND,OR,XOR, we will have to scalarize the op.
-  if (!TLI.isOperationLegalOrCustom(ISD::AND, VT) ||
-      !TLI.isOperationLegalOrCustom(ISD::XOR, VT) ||
-      !TLI.isOperationLegalOrCustom(ISD::OR, VT))
-        return DAG.UnrollVectorOp(Op.getNode());
+  // Notice that the operation may be 'promoted' which means that it is
+  // 'bitcasted' to another type which is handled.
+  if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
+      TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
+      TLI.getOperationAction(ISD::OR,  VT) == TargetLowering::Expand)
+    return DAG.UnrollVectorOp(Op.getNode());
 
   assert(VT.getSizeInBits() == Op.getOperand(1).getValueType().getSizeInBits()
          && "Invalid mask size");
@@ -301,9 +425,9 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
   DebugLoc DL = Op.getDebugLoc();
 
   // Make sure that the SINT_TO_FP and SRL instructions are available.
-  if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, VT) ||
-      !TLI.isOperationLegalOrCustom(ISD::SRL, VT))
-      return DAG.UnrollVectorOp(Op.getNode());
+  if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand ||
+      TLI.getOperationAction(ISD::SRL,        VT) == TargetLowering::Expand)
+    return DAG.UnrollVectorOp(Op.getNode());
 
  EVT SVT = VT.getScalarType();
   assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) &&
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 107a42b2951c..5f23f01dafb4 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -21,7 +21,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "LegalizeTypes.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -59,6 +58,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::LOAD:           R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
   case ISD::SCALAR_TO_VECTOR:  R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
   case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
+  case ISD::VSELECT:           R = ScalarizeVecRes_VSELECT(N); break;
   case ISD::SELECT:            R = ScalarizeVecRes_SELECT(N); break;
   case ISD::SELECT_CC:         R = ScalarizeVecRes_SELECT_CC(N); break;
   case ISD::SETCC:             R = ScalarizeVecRes_SETCC(N); break;
@@ -194,7 +194,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
                                N->getPointerInfo(),
                                N->getMemoryVT().getVectorElementType(),
                                N->isVolatile(), N->isNonTemporal(),
-                               N->getOriginalAlignment());
+                               N->isInvariant(), N->getOriginalAlignment());
 
   // Legalized the chain result - switch anything that used the old chain to
   // use the new one.
@@ -227,6 +227,37 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
   return InOp;
 }
 
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
+  SDValue Cond = GetScalarizedVector(N->getOperand(0));
+  SDValue LHS = GetScalarizedVector(N->getOperand(1));
+  TargetLowering::BooleanContent ScalarBool = TLI.getBooleanContents(false);
+  TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true);
+  if (ScalarBool != VecBool) {
+    EVT CondVT = Cond.getValueType();
+    switch (ScalarBool) {
+      case TargetLowering::UndefinedBooleanContent:
+        break;
+      case TargetLowering::ZeroOrOneBooleanContent:
+        assert(VecBool == TargetLowering::UndefinedBooleanContent ||
+               VecBool == TargetLowering::ZeroOrNegativeOneBooleanContent);
+        // Vector read from all ones, scalar expects a single 1 so mask.
+        Cond = DAG.getNode(ISD::AND, N->getDebugLoc(), CondVT,
+                           Cond, DAG.getConstant(1, CondVT));
+        break;
+      case TargetLowering::ZeroOrNegativeOneBooleanContent:
+        assert(VecBool == TargetLowering::UndefinedBooleanContent ||
+               VecBool == TargetLowering::ZeroOrOneBooleanContent);
+        // Vector reads from a one, scalar from all ones so sign extend.
+        Cond = DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), CondVT,
+                           Cond, DAG.getValueType(MVT::i1));
+        break;
+    }
+  }
+  return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+                     LHS.getValueType(), Cond, LHS,
+                     GetScalarizedVector(N->getOperand(2)));
+}
+
 SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) {
   SDValue LHS = GetScalarizedVector(N->getOperand(1));
   return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
@@ -405,6 +436,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
         N->dump(&DAG);
         dbgs() << "\n");
   SDValue Lo, Hi;
+  
+  // See if the target wants to custom expand this node.
+  if (CustomLowerNode(N, N->getValueType(ResNo), true))
+    return;
 
   switch (N->getOpcode()) {
   default:
@@ -442,8 +477,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::ANY_EXTEND:
   case ISD::CONVERT_RNDSAT:
   case ISD::CTLZ:
-  case ISD::CTPOP:
   case ISD::CTTZ:
+  case ISD::CTLZ_ZERO_UNDEF:
+  case ISD::CTTZ_ZERO_UNDEF:
+  case ISD::CTPOP:
   case ISD::FABS:
   case ISD::FCEIL:
   case ISD::FCOS:
@@ -677,7 +714,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
 
   // Load the Lo part from the stack slot.
   Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
-                   false, false, 0);
+                   false, false, false, 0);
 
   // Increment the pointer to the other part.
   unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
@@ -686,7 +723,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
 
   // Load the Hi part from the stack slot.
   Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
-                   false, false, MinAlign(Alignment, IncrementSize));
+                   false, false, false, MinAlign(Alignment, IncrementSize));
 }
 
 void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
@@ -713,20 +750,21 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
   unsigned Alignment = LD->getOriginalAlignment();
   bool isVolatile = LD->isVolatile();
   bool isNonTemporal = LD->isNonTemporal();
+  bool isInvariant = LD->isInvariant();
 
   EVT LoMemVT, HiMemVT;
   GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
 
   Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
                    LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal,
-                   Alignment);
+                   isInvariant, Alignment);
 
   unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                     DAG.getIntPtrConstant(IncrementSize));
   Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
                    LD->getPointerInfo().getWithOffset(IncrementSize),
-                   HiMemVT, isVolatile, isNonTemporal, Alignment);
+                   HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment);
 
   // Build a factor node to remember that this load is independent of the
   // other one.
@@ -773,46 +811,18 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
   DebugLoc dl = N->getDebugLoc();
   GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
 
-  // Split the input.
+  // If the input also splits, handle it directly for a compile time speedup.
+  // Otherwise split it by hand.
   EVT InVT = N->getOperand(0).getValueType();
-  switch (getTypeAction(InVT)) {
-  default: llvm_unreachable("Unexpected type action!");
-  case TargetLowering::TypeLegal: {
+  if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) {
+    GetSplitVector(N->getOperand(0), Lo, Hi);
+  } else {
     EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
                                  LoVT.getVectorNumElements());
     Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
                      DAG.getIntPtrConstant(0));
     Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
                      DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
-    break;
-  }
-  case TargetLowering::TypePromoteInteger: {
-    SDValue InOp = GetPromotedInteger(N->getOperand(0));
-    EVT InNVT = EVT::getVectorVT(*DAG.getContext(),
-                                 InOp.getValueType().getVectorElementType(),
-                                 LoVT.getVectorNumElements());
-    Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
-                     DAG.getIntPtrConstant(0));
-    Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
-                     DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
-    break;
-  }
-  case TargetLowering::TypeSplitVector:
-    GetSplitVector(N->getOperand(0), Lo, Hi);
-    break;
-  case TargetLowering::TypeWidenVector: {
-    // If the result needs to be split and the input needs to be widened,
-    // the two types must have different lengths. Use the widened result
-    // and extract from it to do the split.
-    SDValue InOp = GetWidenedVector(N->getOperand(0));
-    EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
-                                 LoVT.getVectorNumElements());
-    Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
-                     DAG.getIntPtrConstant(0));
-    Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
-                     DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
-    break;
-  }
   }
 
   if (N->getOpcode() == ISD::FP_ROUND) {
@@ -1239,6 +1249,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::LOAD:              Res = WidenVecRes_LOAD(N); break;
   case ISD::SCALAR_TO_VECTOR:  Res = WidenVecRes_SCALAR_TO_VECTOR(N); break;
   case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break;
+  case ISD::VSELECT:
   case ISD::SELECT:            Res = WidenVecRes_SELECT(N); break;
   case ISD::SELECT_CC:         Res = WidenVecRes_SELECT_CC(N); break;
   case ISD::SETCC:             Res = WidenVecRes_SETCC(N); break;
@@ -1590,12 +1601,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
 
   switch (getTypeAction(InVT)) {
-  default:
-    assert(false && "Unknown type action!");
-    break;
   case TargetLowering::TypeLegal:
     break;
   case TargetLowering::TypePromoteInteger:
+    // If the incoming type is a vector that is being promoted, then
+    // we know that the elements are arranged differently and that we
+    // must perform the conversion using a stack slot.
+    if (InVT.isVector())
+      break;
+
     // If the InOp is promoted to the same size, convert it.  Otherwise,
     // fall out of the switch and widen the promoted input.
     InOp = GetPromotedInteger(InOp);
@@ -1928,7 +1942,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
   SDValue InOp1 = GetWidenedVector(N->getOperand(1));
   SDValue InOp2 = GetWidenedVector(N->getOperand(2));
   assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
-  return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
                      WidenVT, Cond1, InOp1, InOp2);
 }
 
@@ -2032,6 +2046,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) {
   case ISD::EXTRACT_SUBVECTOR:  Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
   case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
   case ISD::STORE:              Res = WidenVecOp_STORE(N); break;
+  case ISD::SETCC:              Res = WidenVecOp_SETCC(N); break;
 
   case ISD::FP_EXTEND:
   case ISD::FP_TO_SINT:
@@ -2165,6 +2180,32 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
                        MVT::Other,&StChain[0],StChain.size());
 }
 
+SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
+  SDValue InOp0 = GetWidenedVector(N->getOperand(0));
+  SDValue InOp1 = GetWidenedVector(N->getOperand(1));
+  DebugLoc dl = N->getDebugLoc();
+
+  // WARNING: In this code we widen the compare instruction with garbage.
+  // This garbage may contain denormal floats which may be slow. Is this a real
+  // concern ? Should we zero the unused lanes if this is a float compare ?
+
+  // Get a new SETCC node to compare the newly widened operands.
+  // Only some of the compared elements are legal.
+  EVT SVT = TLI.getSetCCResultType(InOp0.getValueType());
+  SDValue WideSETCC = DAG.getNode(ISD::SETCC, N->getDebugLoc(),
+                     SVT, InOp0, InOp1, N->getOperand(2));
+
+  // Extract the needed results from the result vector.
+  EVT ResVT = EVT::getVectorVT(*DAG.getContext(),
+                               SVT.getVectorElementType(),
+                               N->getValueType(0).getVectorNumElements());
+  SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
+                           ResVT, WideSETCC, DAG.getIntPtrConstant(0));
+
+  return PromoteTargetBoolean(CC, N->getValueType(0)); 
+}
+
+
 //===----------------------------------------------------------------------===//
 // Vector Widening Utilities
 //===----------------------------------------------------------------------===//
@@ -2276,6 +2317,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain,
   unsigned  Align    = LD->getAlignment();
   bool      isVolatile = LD->isVolatile();
   bool      isNonTemporal = LD->isNonTemporal();
+  bool      isInvariant = LD->isInvariant();
 
   int LdWidth = LdVT.getSizeInBits();
   int WidthDiff = WidenWidth - LdWidth;          // Difference
@@ -2285,7 +2327,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain,
   EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
   int NewVTWidth = NewVT.getSizeInBits();
   SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
-                             isVolatile, isNonTemporal, Align);
+                             isVolatile, isNonTemporal, isInvariant, Align);
   LdChain.push_back(LdOp.getValue(1));
 
   // Check if we can load the element with one instruction
@@ -2323,18 +2365,37 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain,
     BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
                           DAG.getIntPtrConstant(Increment));
 
+    SDValue L;
     if (LdWidth < NewVTWidth) {
       // Our current type we are using is too large, find a better size
       NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
       NewVTWidth = NewVT.getSizeInBits();
-    }
-
-    SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr,
+      L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
                                LD->getPointerInfo().getWithOffset(Offset),
                                isVolatile,
-                               isNonTemporal, MinAlign(Align, Increment));
-    LdChain.push_back(LdOp.getValue(1));
-    LdOps.push_back(LdOp);
+                               isNonTemporal, isInvariant,
+                               MinAlign(Align, Increment));
+      LdChain.push_back(L.getValue(1));
+      if (L->getValueType(0).isVector()) {
+        SmallVector<SDValue, 16> Loads;
+        Loads.push_back(L);
+        unsigned size = L->getValueSizeInBits(0);
+        while (size < LdOp->getValueSizeInBits(0)) {
+          Loads.push_back(DAG.getUNDEF(L->getValueType(0)));
+          size += L->getValueSizeInBits(0);
+        }
+        L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0),
+                        &Loads[0], Loads.size());
+      }
+    } else {
+      L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
+                      LD->getPointerInfo().getWithOffset(Offset), isVolatile,
+                      isNonTemporal, isInvariant, MinAlign(Align, Increment));
+      LdChain.push_back(L.getValue(1));
+    }
+
+    LdOps.push_back(L);
+
 
     LdWidth -= NewVTWidth;
   }
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
new file mode 100644
index 000000000000..ff0136e08cd9
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -0,0 +1,657 @@
+//===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ResourcePriorityQueue class, which is a
+// SchedulingPriorityQueue that prioritizes instructions using DFA state to
+// reduce the length of the critical path through the basic block
+// on VLIW platforms.
+// The scheduler is basically a top-down adaptable list scheduler with DFA
+// resource tracking added to the cost function.
+// DFA is queried as a state machine to model "packets/bundles" during
+// schedule. Currently packets/bundles are discarded at the end of
+// scheduling, affecting only order of instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "scheduler"
+#include "llvm/CodeGen/ResourcePriorityQueue.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetLowering.h"
+
+using namespace llvm;
+
+static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden,
+  cl::ZeroOrMore, cl::init(false),
+  cl::desc("Disable use of DFA during scheduling"));
+
+static cl::opt<signed> RegPressureThreshold(
+  "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5),
+  cl::desc("Track reg pressure and switch priority to in-depth"));
+
+
+ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) :
+  Picker(this),
+  InstrItins(IS->getTargetLowering().getTargetMachine().getInstrItineraryData())
+{
+   TII = IS->getTargetLowering().getTargetMachine().getInstrInfo();
+   TRI = IS->getTargetLowering().getTargetMachine().getRegisterInfo();
+   TLI = &IS->getTargetLowering();
+
+   const TargetMachine &tm = (*IS->MF).getTarget();
+   ResourcesModel = tm.getInstrInfo()->CreateTargetScheduleState(&tm,NULL);
+   // This hard requirment could be relaxed, but for now
+   // do not let it procede.
+   assert (ResourcesModel && "Unimplemented CreateTargetScheduleState.");
+
+   unsigned NumRC = TRI->getNumRegClasses();
+   RegLimit.resize(NumRC);
+   RegPressure.resize(NumRC);
+   std::fill(RegLimit.begin(), RegLimit.end(), 0);
+   std::fill(RegPressure.begin(), RegPressure.end(), 0);
+   for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+        E = TRI->regclass_end(); I != E; ++I)
+     RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, *IS->MF);
+
+   ParallelLiveRanges = 0;
+   HorizontalVerticalBalance = 0;
+}
+
+unsigned
+ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
+  unsigned NumberDeps = 0;
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl())
+      continue;
+
+    SUnit *PredSU = I->getSUnit();
+    const SDNode *ScegN = PredSU->getNode();
+
+    if (!ScegN)
+      continue;
+
+    // If value is passed to CopyToReg, it is probably
+    // live outside BB.
+    switch (ScegN->getOpcode()) {
+      default:  break;
+      case ISD::TokenFactor:    break;
+      case ISD::CopyFromReg:    NumberDeps++;  break;
+      case ISD::CopyToReg:      break;
+      case ISD::INLINEASM:      break;
+    }
+    if (!ScegN->isMachineOpcode())
+      continue;
+
+    for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
+      EVT VT = ScegN->getValueType(i);
+      if (TLI->isTypeLegal(VT)
+         && (TLI->getRegClassFor(VT)->getID() == RCId)) {
+        NumberDeps++;
+        break;
+      }
+    }
+  }
+  return NumberDeps;
+}
+
+unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU,
+                                                    unsigned RCId) {
+  unsigned NumberDeps = 0;
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    if (I->isCtrl())
+      continue;
+
+    SUnit *SuccSU = I->getSUnit();
+    const SDNode *ScegN = SuccSU->getNode();
+    if (!ScegN)
+      continue;
+
+    // If value is passed to CopyToReg, it is probably
+    // live outside BB.
+    switch (ScegN->getOpcode()) {
+      default:  break;
+      case ISD::TokenFactor:    break;
+      case ISD::CopyFromReg:    break;
+      case ISD::CopyToReg:      NumberDeps++;  break;
+      case ISD::INLINEASM:      break;
+    }
+    if (!ScegN->isMachineOpcode())
+      continue;
+
+    for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
+      const SDValue &Op = ScegN->getOperand(i);
+      EVT VT = Op.getNode()->getValueType(Op.getResNo());
+      if (TLI->isTypeLegal(VT)
+         && (TLI->getRegClassFor(VT)->getID() == RCId)) {
+        NumberDeps++;
+        break;
+      }
+    }
+  }
+  return NumberDeps;
+}
+
+static unsigned numberCtrlDepsInSU(SUnit *SU) {
+  unsigned NumberDeps = 0;
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I)
+    if (I->isCtrl())
+      NumberDeps++;
+
+  return NumberDeps;
+}
+
+static unsigned numberCtrlPredInSU(SUnit *SU) {
+  unsigned NumberDeps = 0;
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I)
+    if (I->isCtrl())
+      NumberDeps++;
+
+  return NumberDeps;
+}
+
+///
+/// Initialize nodes.
+///
+void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) {
+  SUnits = &sunits;
+  NumNodesSolelyBlocking.resize(SUnits->size(), 0);
+
+  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
+    SUnit *SU = &(*SUnits)[i];
+    initNumRegDefsLeft(SU);
+    SU->NodeQueueId = 0;
+  }
+}
+
+/// This heuristic is used if DFA scheduling is not desired
+/// for some VLIW platform.
+bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
+  // The isScheduleHigh flag allows nodes with wraparound dependencies that
+  // cannot easily be modeled as edges with latencies to be scheduled as
+  // soon as possible in a top-down schedule.
+  if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
+    return false;
+
+  if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
+    return true;
+
+  unsigned LHSNum = LHS->NodeNum;
+  unsigned RHSNum = RHS->NodeNum;
+
+  // The most important heuristic is scheduling the critical path.
+  unsigned LHSLatency = PQ->getLatency(LHSNum);
+  unsigned RHSLatency = PQ->getLatency(RHSNum);
+  if (LHSLatency < RHSLatency) return true;
+  if (LHSLatency > RHSLatency) return false;
+
+  // After that, if two nodes have identical latencies, look to see if one will
+  // unblock more other nodes than the other.
+  unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
+  unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
+  if (LHSBlocked < RHSBlocked) return true;
+  if (LHSBlocked > RHSBlocked) return false;
+
+  // Finally, just to provide a stable ordering, use the node number as a
+  // deciding factor.
+  return LHSNum < RHSNum;
+}
+
+
+/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
+/// of SU, return it, otherwise return null.
+SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
+  SUnit *OnlyAvailablePred = 0;
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    SUnit &Pred = *I->getSUnit();
+    if (!Pred.isScheduled) {
+      // We found an available, but not scheduled, predecessor.  If it's the
+      // only one we have found, keep track of it... otherwise give up.
+      if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
+        return 0;
+      OnlyAvailablePred = &Pred;
+    }
+  }
+  return OnlyAvailablePred;
+}
+
+void ResourcePriorityQueue::push(SUnit *SU) {
+  // Look at all of the successors of this node.  Count the number of nodes that
+  // this node is the sole unscheduled node for.
+  unsigned NumNodesBlocking = 0;
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I)
+    if (getSingleUnscheduledPred(I->getSUnit()) == SU)
+      ++NumNodesBlocking;
+
+  NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
+  Queue.push_back(SU);
+}
+
+/// Check if scheduling of this SU is possible
+/// in the current packet.
+bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
+  if (!SU || !SU->getNode())
+    return false;
+
+  // If this is a compound instruction,
+  // it is likely to be a call. Do not delay it.
+  if (SU->getNode()->getGluedNode())
+    return true;
+
+  // First see if the pipeline could receive this instruction
+  // in the current cycle.
+  if (SU->getNode()->isMachineOpcode())
+    switch (SU->getNode()->getMachineOpcode()) {
+    default:
+      if (!ResourcesModel->canReserveResources(&TII->get(
+          SU->getNode()->getMachineOpcode())))
+           return false;
+    case TargetOpcode::EXTRACT_SUBREG:
+    case TargetOpcode::INSERT_SUBREG:
+    case TargetOpcode::SUBREG_TO_REG:
+    case TargetOpcode::REG_SEQUENCE:
+    case TargetOpcode::IMPLICIT_DEF:
+        break;
+    }
+
+  // Now see if there are no other dependencies
+  // to instructions alredy in the packet.
+  for (unsigned i = 0, e = Packet.size(); i != e; ++i)
+    for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(),
+         E = Packet[i]->Succs.end(); I != E; ++I) {
+      // Since we do not add pseudos to packets, might as well
+      // ignor order deps.
+      if (I->isCtrl())
+        continue;
+
+      if (I->getSUnit() == SU)
+        return false;
+    }
+
+  return true;
+}
+
+/// Keep track of available resources.
+void ResourcePriorityQueue::reserveResources(SUnit *SU) {
+  // If this SU does not fit in the packet
+  // start a new one.
+  if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) {
+    ResourcesModel->clearResources();
+    Packet.clear();
+  }
+
+  if (SU->getNode() && SU->getNode()->isMachineOpcode()) {
+    switch (SU->getNode()->getMachineOpcode()) {
+    default:
+      ResourcesModel->reserveResources(&TII->get(
+        SU->getNode()->getMachineOpcode()));
+      break;
+    case TargetOpcode::EXTRACT_SUBREG:
+    case TargetOpcode::INSERT_SUBREG:
+    case TargetOpcode::SUBREG_TO_REG:
+    case TargetOpcode::REG_SEQUENCE:
+    case TargetOpcode::IMPLICIT_DEF:
+      break;
+    }
+    Packet.push_back(SU);
+  }
+  // Forcefully end packet for PseudoOps.
+  else {
+    ResourcesModel->clearResources();
+    Packet.clear();
+  }
+
+  // If packet is now full, reset the state so in the next cycle
+  // we start fresh.
+  if (Packet.size() >= InstrItins->IssueWidth) {
+    ResourcesModel->clearResources();
+    Packet.clear();
+  }
+}
+
+signed ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
+  signed RegBalance    = 0;
+
+  if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
+    return RegBalance;
+
+  // Gen estimate.
+  for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) {
+      EVT VT = SU->getNode()->getValueType(i);
+      if (TLI->isTypeLegal(VT)
+          && TLI->getRegClassFor(VT)
+          && TLI->getRegClassFor(VT)->getID() == RCId)
+        RegBalance += numberRCValSuccInSU(SU, RCId);
+  }
+  // Kill estimate.
+  for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) {
+      const SDValue &Op = SU->getNode()->getOperand(i);
+      EVT VT = Op.getNode()->getValueType(Op.getResNo());
+      if (isa<ConstantSDNode>(Op.getNode()))
+        continue;
+
+      if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT)
+          && TLI->getRegClassFor(VT)->getID() == RCId)
+        RegBalance -= numberRCValPredInSU(SU, RCId);
+  }
+  return RegBalance;
+}
+
+/// Estimates change in reg pressure from this SU.
+/// It is acheived by trivial tracking of defined
+/// and used vregs in dependent instructions.
+/// The RawPressure flag makes this function to ignore
+/// existing reg file sizes, and report raw def/use
+/// balance.
+signed ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) {
+  signed RegBalance    = 0;
+
+  if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
+    return RegBalance;
+
+  if (RawPressure) {
+    for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+             E = TRI->regclass_end(); I != E; ++I) {
+      const TargetRegisterClass *RC = *I;
+      RegBalance += rawRegPressureDelta(SU, RC->getID());
+    }
+  }
+  else {
+    for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+         E = TRI->regclass_end(); I != E; ++I) {
+      const TargetRegisterClass *RC = *I;
+      if ((RegPressure[RC->getID()] +
+           rawRegPressureDelta(SU, RC->getID()) > 0) &&
+          (RegPressure[RC->getID()] +
+           rawRegPressureDelta(SU, RC->getID())  >= RegLimit[RC->getID()]))
+        RegBalance += rawRegPressureDelta(SU, RC->getID());
+    }
+  }
+
+  return RegBalance;
+}
+
+// Constants used to denote relative importance of
+// heuristic components for cost computation.
+static const unsigned PriorityOne = 200;
+static const unsigned PriorityTwo = 100;
+static const unsigned PriorityThree = 50;
+static const unsigned PriorityFour = 15;
+static const unsigned PriorityFive = 5;
+static const unsigned ScaleOne = 20;
+static const unsigned ScaleTwo = 10;
+static const unsigned ScaleThree = 5;
+static const unsigned FactorOne = 2;
+
+/// Returns single number reflecting benefit of scheduling SU
+/// in the current cycle.
+signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
+  // Initial trivial priority.
+  signed ResCount = 1;
+
+  // Do not waste time on a node that is already scheduled.
+  if (SU->isScheduled)
+    return ResCount;
+
+  // Forced priority is high.
+  if (SU->isScheduleHigh)
+    ResCount += PriorityOne;
+
+  // Adaptable scheduling
+  // A small, but very parallel
+  // region, where reg pressure is an issue.
+  if (HorizontalVerticalBalance > RegPressureThreshold) {
+    // Critical path first
+    ResCount += (SU->getHeight() * ScaleTwo);
+    // If resources are available for it, multiply the
+    // chance of scheduling.
+    if (isResourceAvailable(SU))
+      ResCount <<= FactorOne;
+
+    // Consider change to reg pressure from scheduling
+    // this SU.
+    ResCount -= (regPressureDelta(SU,true) * ScaleOne);
+  }
+  // Default heuristic, greeady and
+  // critical path driven.
+  else {
+    // Critical path first.
+    ResCount += (SU->getHeight() * ScaleTwo);
+    // Now see how many instructions is blocked by this SU.
+    ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo);
+    // If resources are available for it, multiply the
+    // chance of scheduling.
+    if (isResourceAvailable(SU))
+      ResCount <<= FactorOne;
+
+    ResCount -= (regPressureDelta(SU) * ScaleTwo);
+  }
+
+  // These are platform specific things.
+  // Will need to go into the back end
+  // and accessed from here via a hook.
+  for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
+    if (N->isMachineOpcode()) {
+      const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
+      if (TID.isCall())
+        ResCount += (PriorityThree + (ScaleThree*N->getNumValues()));
+    }
+    else
+      switch (N->getOpcode()) {
+      default:  break;
+      case ISD::TokenFactor:
+      case ISD::CopyFromReg:
+      case ISD::CopyToReg:
+        ResCount += PriorityFive;
+        break;
+
+      case ISD::INLINEASM:
+        ResCount += PriorityFour;
+        break;
+      }
+  }
+  return ResCount;
+}
+
+
+/// Main resource tracking point.
+void ResourcePriorityQueue::scheduledNode(SUnit *SU) {
+  // Use NULL entry as an event marker to reset
+  // the DFA state.
+  if (!SU) {
+    ResourcesModel->clearResources();
+    Packet.clear();
+    return;
+  }
+
+  const SDNode *ScegN = SU->getNode();
+  // Update reg pressure tracking.
+  // First update current node.
+  if (ScegN->isMachineOpcode()) {
+    // Estimate generated regs.
+    for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
+      EVT VT = ScegN->getValueType(i);
+
+      if (TLI->isTypeLegal(VT)) {
+        const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
+        if (RC)
+          RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID());
+      }
+    }
+    // Estimate killed regs.
+    for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
+      const SDValue &Op = ScegN->getOperand(i);
+      EVT VT = Op.getNode()->getValueType(Op.getResNo());
+
+      if (TLI->isTypeLegal(VT)) {
+        const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
+        if (RC) {
+          if (RegPressure[RC->getID()] >
+            (numberRCValPredInSU(SU, RC->getID())))
+            RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID());
+          else RegPressure[RC->getID()] = 0;
+        }
+      }
+    }
+    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+                              I != E; ++I) {
+      if (I->isCtrl() || (I->getSUnit()->NumRegDefsLeft == 0))
+        continue;
+      --I->getSUnit()->NumRegDefsLeft;
+    }
+  }
+
+  // Reserve resources for this SU.
+  reserveResources(SU);
+
+  // Adjust number of parallel live ranges.
+  // Heuristic is simple - node with no data successors reduces
+  // number of live ranges. All others, increase it.
+  unsigned NumberNonControlDeps = 0;
+
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+                                  I != E; ++I) {
+    adjustPriorityOfUnscheduledPreds(I->getSUnit());
+    if (!I->isCtrl())
+      NumberNonControlDeps++;
+  }
+
+  if (!NumberNonControlDeps) {
+    if (ParallelLiveRanges >= SU->NumPreds)
+      ParallelLiveRanges -= SU->NumPreds;
+    else
+      ParallelLiveRanges = 0;
+
+  }
+  else
+    ParallelLiveRanges += SU->NumRegDefsLeft;
+
+  // Track parallel live chains.
+  HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU));
+  HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU));
+}
+
+void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) {
+  unsigned  NodeNumDefs = 0;
+  for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+    if (N->isMachineOpcode()) {
+      const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
+      // No register need be allocated for this.
+      if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
+        NodeNumDefs = 0;
+        break;
+      }
+      NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs());
+    }
+    else
+      switch(N->getOpcode()) {
+        default:     break;
+        case ISD::CopyFromReg:
+          NodeNumDefs++;
+          break;
+        case ISD::INLINEASM:
+          NodeNumDefs++;
+          break;
+      }
+
+  SU->NumRegDefsLeft = NodeNumDefs;
+}
+
+/// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
+/// scheduled.  If SU is not itself available, then there is at least one
+/// predecessor node that has not been scheduled yet.  If SU has exactly ONE
+/// unscheduled predecessor, we want to increase its priority: it getting
+/// scheduled will make this node available, so it is better than some other
+/// node of the same priority that will not make a node available.
+void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) {
+  if (SU->isAvailable) return;  // All preds scheduled.
+
+  SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
+  if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable)
+    return;
+
+  // Okay, we found a single predecessor that is available, but not scheduled.
+  // Since it is available, it must be in the priority queue.  First remove it.
+  remove(OnlyAvailablePred);
+
+  // Reinsert the node into the priority queue, which recomputes its
+  // NumNodesSolelyBlocking value.
+  push(OnlyAvailablePred);
+}
+
+
+/// Main access point - returns next instructions
+/// to be placed in scheduling sequence.
+SUnit *ResourcePriorityQueue::pop() {
+  if (empty())
+    return 0;
+
+  std::vector<SUnit *>::iterator Best = Queue.begin();
+  if (!DisableDFASched) {
+    signed BestCost = SUSchedulingCost(*Best);
+    for (std::vector<SUnit *>::iterator I = Queue.begin(),
+           E = Queue.end(); I != E; ++I) {
+      if (*I == *Best)
+        continue;
+
+      if (SUSchedulingCost(*I) > BestCost) {
+        BestCost = SUSchedulingCost(*I);
+        Best = I;
+      }
+    }
+  }
+  // Use default TD scheduling mechanism.
+  else {
+    for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
+       E = Queue.end(); I != E; ++I)
+      if (Picker(*Best, *I))
+        Best = I;
+  }
+
+  SUnit *V = *Best;
+  if (Best != prior(Queue.end()))
+    std::swap(*Best, Queue.back());
+
+  Queue.pop_back();
+
+  return V;
+}
+
+
+void ResourcePriorityQueue::remove(SUnit *SU) {
+  assert(!Queue.empty() && "Queue is empty!");
+  std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU);
+  if (I != prior(Queue.end()))
+    std::swap(*I, Queue.back());
+
+  Queue.pop_back();
+}
+
+
+#ifdef NDEBUG
+void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {}
+#else
+void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {
+  ResourcePriorityQueue q = *this;
+  while (!q.empty()) {
+    SUnit *su = q.pop();
+    dbgs() << "Height " << su->getHeight() << ": ";
+    su->dump(DAG);
+  }
+}
+#endif
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index b275c6321ae4..24da432a47a1 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -43,7 +43,7 @@ namespace {
     SmallVector<SUnit *, 16> Queue;
 
     bool empty() const { return Queue.empty(); }
-    
+
     void push(SUnit *U) {
       Queue.push_back(U);
     }
@@ -101,8 +101,8 @@ private:
   bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
   void ListScheduleBottomUp();
 
-  /// ForceUnitLatencies - The fast scheduler doesn't care about real latencies.
-  bool ForceUnitLatencies() const { return true; }
+  /// forceUnitLatencies - The fast scheduler doesn't care about real latencies.
+  bool forceUnitLatencies() const { return true; }
 };
 }  // end anonymous namespace
 
@@ -112,7 +112,7 @@ void ScheduleDAGFast::Schedule() {
   DEBUG(dbgs() << "********** List Scheduling **********\n");
 
   NumLiveRegs = 0;
-  LiveRegDefs.resize(TRI->getNumRegs(), NULL);  
+  LiveRegDefs.resize(TRI->getNumRegs(), NULL);
   LiveRegCycles.resize(TRI->getNumRegs(), 0);
 
   // Build the scheduling graph.
@@ -159,7 +159,7 @@ void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
     ReleasePred(SU, &*I);
     if (I->isAssignedRegDep()) {
       // This is a physical register dependency and it's impossible or
-      // expensive to copy the register. Make sure nothing that can 
+      // expensive to copy the register. Make sure nothing that can
       // clobber the register is scheduled between the predecessor and
       // this node.
       if (!LiveRegDefs[I->getReg()]) {
@@ -245,10 +245,10 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
     DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
                                    SDValue(LoadNode, 1));
 
-    SUnit *NewSU = NewSUnit(N);
+    SUnit *NewSU = newSUnit(N);
     assert(N->getNodeId() == -1 && "Node already inserted!");
     N->setNodeId(NewSU->NodeNum);
-      
+
     const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
     for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
       if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
@@ -268,7 +268,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
       LoadSU = &SUnits[LoadNode->getNodeId()];
       isNewLoad = false;
     } else {
-      LoadSU = NewSUnit(LoadNode);
+      LoadSU = newSUnit(LoadNode);
       LoadNode->setNodeId(LoadSU->NodeNum);
     }
 
@@ -329,7 +329,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
         D.setSUnit(LoadSU);
         AddPred(SuccDep, D);
       }
-    } 
+    }
     if (isNewLoad) {
       AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency));
     }
@@ -381,11 +381,11 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
                                               const TargetRegisterClass *DestRC,
                                               const TargetRegisterClass *SrcRC,
                                                SmallVector<SUnit*, 2> &Copies) {
-  SUnit *CopyFromSU = NewSUnit(static_cast<SDNode *>(NULL));
+  SUnit *CopyFromSU = newSUnit(static_cast<SDNode *>(NULL));
   CopyFromSU->CopySrcRC = SrcRC;
   CopyFromSU->CopyDstRC = DestRC;
 
-  SUnit *CopyToSU = NewSUnit(static_cast<SDNode *>(NULL));
+  SUnit *CopyToSU = newSUnit(static_cast<SDNode *>(NULL));
   CopyToSU->CopySrcRC = DestRC;
   CopyToSU->CopyDstRC = SrcRC;
 
@@ -425,7 +425,7 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
   const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
   assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
   unsigned NumRes = MCID.getNumDefs();
-  for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+  for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
     if (Reg == *ImpDef)
       break;
     ++NumRes;
@@ -447,7 +447,7 @@ static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
       Added = true;
     }
   }
-  for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
+  for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
     if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
       if (RegAdded.insert(*Alias)) {
         LRegs.push_back(*Alias);
@@ -508,7 +508,7 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
     const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
     if (!MCID.ImplicitDefs)
       continue;
-    for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg) {
+    for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) {
       CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
     }
   }
@@ -630,7 +630,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
   std::reverse(Sequence.begin(), Sequence.end());
 
 #ifndef NDEBUG
-  VerifySchedule(/*isBottomUp=*/true);
+  VerifyScheduledSequence(/*isBottomUp=*/true);
 #endif
 }
 
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index e757defd3895..2cb5d37d689e 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -45,10 +45,6 @@ static RegisterScheduler
                        "Bottom-up register reduction list scheduling",
                        createBURRListDAGScheduler);
 static RegisterScheduler
-  tdrListrDAGScheduler("list-tdrr",
-                       "Top-down register reduction list scheduling",
-                       createTDRRListDAGScheduler);
-static RegisterScheduler
   sourceListDAGScheduler("source",
                          "Similar to list-burr but schedules in source "
                          "order when possible",
@@ -93,6 +89,9 @@ static cl::opt<bool> DisableSchedCriticalPath(
 static cl::opt<bool> DisableSchedHeight(
   "disable-sched-height", cl::Hidden, cl::init(false),
   cl::desc("Disable scheduled-height priority in sched=list-ilp"));
+static cl::opt<bool> Disable2AddrHack(
+  "disable-2addr-hack", cl::Hidden, cl::init(true),
+  cl::desc("Disable scheduler's two-address hack"));
 
 static cl::opt<int> MaxReorderWindow(
   "max-sched-reorder", cl::Hidden, cl::init(6),
@@ -103,17 +102,6 @@ static cl::opt<unsigned> AvgIPC(
   "sched-avg-ipc", cl::Hidden, cl::init(1),
   cl::desc("Average inst/cycle whan no target itinerary exists."));
 
-#ifndef NDEBUG
-namespace {
-  // For sched=list-ilp, Count the number of times each factor comes into play.
-  enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth,
-         FactStatic, FactOther, NumFactors };
-}
-static const char *FactorName[NumFactors] =
-{"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"};
-static int FactorCount[NumFactors];
-#endif //!NDEBUG
-
 namespace {
 //===----------------------------------------------------------------------===//
 /// ScheduleDAGRRList - The actual register reduction list scheduler
@@ -121,10 +109,6 @@ namespace {
 ///
 class ScheduleDAGRRList : public ScheduleDAGSDNodes {
 private:
-  /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
-  /// it is top-down.
-  bool isBottomUp;
-
   /// NeedLatency - True if the scheduler will make use of latency information.
   ///
   bool NeedLatency;
@@ -162,11 +146,15 @@ private:
   /// and similar queries.
   ScheduleDAGTopologicalSort Topo;
 
+  // Hack to keep track of the inverse of FindCallSeqStart without more crazy
+  // DAG crawling.
+  DenseMap<SUnit*, SUnit*> CallSeqEndForStart;
+
 public:
   ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
                     SchedulingPriorityQueue *availqueue,
                     CodeGenOpt::Level OptLevel)
-    : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()),
+    : ScheduleDAGSDNodes(mf),
       NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
       Topo(SUnits) {
 
@@ -221,8 +209,6 @@ private:
 
   void ReleasePred(SUnit *SU, const SDep *PredEdge);
   void ReleasePredecessors(SUnit *SU);
-  void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
-  void ReleaseSuccessors(SUnit *SU);
   void ReleasePending();
   void AdvanceToCycle(unsigned NextCycle);
   void AdvancePastStalls(SUnit *SU);
@@ -242,15 +228,11 @@ private:
   SUnit *PickNodeToScheduleBottomUp();
   void ListScheduleBottomUp();
 
-  void ScheduleNodeTopDown(SUnit*);
-  void ListScheduleTopDown();
-
-
   /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
   /// Updates the topological ordering if required.
   SUnit *CreateNewSUnit(SDNode *N) {
     unsigned NumSUnits = SUnits.size();
-    SUnit *NewNode = NewSUnit(N);
+    SUnit *NewNode = newSUnit(N);
     // Update the topological ordering.
     if (NewNode->NodeNum >= NumSUnits)
       Topo.InitDAGTopologicalSorting();
@@ -268,9 +250,9 @@ private:
     return NewNode;
   }
 
-  /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
+  /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
   /// need actual latency information but the hybrid scheduler does.
-  bool ForceUnitLatencies() const {
+  bool forceUnitLatencies() const {
     return !NeedLatency;
   }
 };
@@ -278,7 +260,7 @@ private:
 
 /// GetCostForDef - Looks up the register class and cost for a given definition.
 /// Typically this just means looking up the representative register class,
-/// but for untyped values (MVT::untyped) it means inspecting the node's
+/// but for untyped values (MVT::Untyped) it means inspecting the node's
 /// opcode to determine what register class is being generated.
 static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
                           const TargetLowering *TLI,
@@ -289,7 +271,7 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
 
   // Special handling for untyped values.  These values can only come from
   // the expansion of custom DAG-to-DAG patterns.
-  if (VT == MVT::untyped) {
+  if (VT == MVT::Untyped) {
     const SDNode *Node = RegDefPos.GetNode();
     unsigned Opcode = Node->getMachineOpcode();
 
@@ -319,18 +301,16 @@ void ScheduleDAGRRList::Schedule() {
   DEBUG(dbgs()
         << "********** List Scheduling BB#" << BB->getNumber()
         << " '" << BB->getName() << "' **********\n");
-#ifndef NDEBUG
-  for (int i = 0; i < NumFactors; ++i) {
-    FactorCount[i] = 0;
-  }
-#endif //!NDEBUG
 
   CurCycle = 0;
   IssueCount = 0;
   MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
   NumLiveRegs = 0;
-  LiveRegDefs.resize(TRI->getNumRegs(), NULL);
-  LiveRegGens.resize(TRI->getNumRegs(), NULL);
+  // Allocate slots for each physical register, plus one for a special register
+  // to track the virtual resource of a calling sequence.
+  LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL);
+  LiveRegGens.resize(TRI->getNumRegs() + 1, NULL);
+  CallSeqEndForStart.clear();
 
   // Build the scheduling graph.
   BuildSchedGraph(NULL);
@@ -343,18 +323,16 @@ void ScheduleDAGRRList::Schedule() {
 
   HazardRec->Reset();
 
-  // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
-  if (isBottomUp)
-    ListScheduleBottomUp();
-  else
-    ListScheduleTopDown();
+  // Execute the actual scheduling loop.
+  ListScheduleBottomUp();
 
-#ifndef NDEBUG
-  for (int i = 0; i < NumFactors; ++i) {
-    DEBUG(dbgs() << FactorName[i] << "\t" << FactorCount[i] << "\n");
-  }
-#endif // !NDEBUG
   AvailableQueue->releaseState();
+
+  DEBUG({
+      dbgs() << "*** Final schedule ***\n";
+      dumpSchedule();
+      dbgs() << '\n';
+    });
 }
 
 //===----------------------------------------------------------------------===//
@@ -376,7 +354,7 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
 #endif
   --PredSU->NumSuccsLeft;
 
-  if (!ForceUnitLatencies()) {
+  if (!forceUnitLatencies()) {
     // Updating predecessor's height. This is now the cycle when the
     // predecessor can be scheduled without causing a pipeline stall.
     PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
@@ -403,6 +381,109 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
   }
 }
 
+/// IsChainDependent - Test if Outer is reachable from Inner through
+/// chain dependencies.
+static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
+                             unsigned NestLevel,
+                             const TargetInstrInfo *TII) {
+  SDNode *N = Outer;
+  for (;;) {
+    if (N == Inner)
+      return true;
+    // For a TokenFactor, examine each operand. There may be multiple ways
+    // to get to the CALLSEQ_BEGIN, but we need to find the path with the
+    // most nesting in order to ensure that we find the corresponding match.
+    if (N->getOpcode() == ISD::TokenFactor) {
+      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+        if (IsChainDependent(N->getOperand(i).getNode(), Inner, NestLevel, TII))
+          return true;
+      return false;
+    }
+    // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
+    if (N->isMachineOpcode()) {
+      if (N->getMachineOpcode() ==
+          (unsigned)TII->getCallFrameDestroyOpcode()) {
+        ++NestLevel;
+      } else if (N->getMachineOpcode() ==
+                 (unsigned)TII->getCallFrameSetupOpcode()) {
+        if (NestLevel == 0)
+          return false;
+        --NestLevel;
+      }
+    }
+    // Otherwise, find the chain and continue climbing.
+    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+      if (N->getOperand(i).getValueType() == MVT::Other) {
+        N = N->getOperand(i).getNode();
+        goto found_chain_operand;
+      }
+    return false;
+  found_chain_operand:;
+    if (N->getOpcode() == ISD::EntryToken)
+      return false;
+  }
+}
+
+/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
+/// the corresponding (lowered) CALLSEQ_BEGIN node.
+///
+/// NestLevel and MaxNested are used in recursion to indcate the current level
+/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
+/// level seen so far.
+///
+/// TODO: It would be better to give CALLSEQ_END an explicit operand to point
+/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
+static SDNode *
+FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
+                 const TargetInstrInfo *TII) {
+  for (;;) {
+    // For a TokenFactor, examine each operand. There may be multiple ways
+    // to get to the CALLSEQ_BEGIN, but we need to find the path with the
+    // most nesting in order to ensure that we find the corresponding match.
+    if (N->getOpcode() == ISD::TokenFactor) {
+      SDNode *Best = 0;
+      unsigned BestMaxNest = MaxNest;
+      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+        unsigned MyNestLevel = NestLevel;
+        unsigned MyMaxNest = MaxNest;
+        if (SDNode *New = FindCallSeqStart(N->getOperand(i).getNode(),
+                                           MyNestLevel, MyMaxNest, TII))
+          if (!Best || (MyMaxNest > BestMaxNest)) {
+            Best = New;
+            BestMaxNest = MyMaxNest;
+          }
+      }
+      assert(Best);
+      MaxNest = BestMaxNest;
+      return Best;
+    }
+    // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
+    if (N->isMachineOpcode()) {
+      if (N->getMachineOpcode() ==
+          (unsigned)TII->getCallFrameDestroyOpcode()) {
+        ++NestLevel;
+        MaxNest = std::max(MaxNest, NestLevel);
+      } else if (N->getMachineOpcode() ==
+                 (unsigned)TII->getCallFrameSetupOpcode()) {
+        assert(NestLevel != 0);
+        --NestLevel;
+        if (NestLevel == 0)
+          return N;
+      }
+    }
+    // Otherwise, find the chain and continue climbing.
+    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+      if (N->getOperand(i).getValueType() == MVT::Other) {
+        N = N->getOperand(i).getNode();
+        goto found_chain_operand;
+      }
+    return 0;
+  found_chain_operand:;
+    if (N->getOpcode() == ISD::EntryToken)
+      return 0;
+  }
+}
+
 /// Call ReleasePred for each predecessor, then update register live def/gen.
 /// Always update LiveRegDefs for a register dependence even if the current SU
 /// also defines the register. This effectively create one large live range
@@ -440,6 +521,27 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
       }
     }
   }
+
+  // If we're scheduling a lowered CALLSEQ_END, find the corresponding
+  // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
+  // these nodes, to prevent other calls from being interscheduled with them.
+  unsigned CallResource = TRI->getNumRegs();
+  if (!LiveRegDefs[CallResource])
+    for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
+      if (Node->isMachineOpcode() &&
+          Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
+        unsigned NestLevel = 0;
+        unsigned MaxNest = 0;
+        SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
+
+        SUnit *Def = &SUnits[N->getNodeId()];
+        CallSeqEndForStart[Def] = SU;
+
+        ++NumLiveRegs;
+        LiveRegDefs[CallResource] = Def;
+        LiveRegGens[CallResource] = SU;
+        break;
+      }
 }
 
 /// Check to see if any of the pending instructions are ready to issue.  If
@@ -457,8 +559,7 @@ void ScheduleDAGRRList::ReleasePending() {
   // Check to see if any of the pending instructions are ready to issue.  If
   // so, add them to the available queue.
   for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
-    unsigned ReadyCycle =
-      isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth();
+    unsigned ReadyCycle = PendingQueue[i]->getHeight();
     if (ReadyCycle < MinAvailableCycle)
       MinAvailableCycle = ReadyCycle;
 
@@ -487,10 +588,7 @@ void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
   }
   else {
     for (; CurCycle != NextCycle; ++CurCycle) {
-      if (isBottomUp)
-        HazardRec->RecedeCycle();
-      else
-        HazardRec->AdvanceCycle();
+      HazardRec->RecedeCycle();
     }
   }
   // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
@@ -511,7 +609,7 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
   // currently need to treat these nodes like real instructions.
   // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
 
-  unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth();
+  unsigned ReadyCycle = SU->getHeight();
 
   // Bump CurCycle to account for latency. We assume the latency of other
   // available instructions may be hidden by the stall (not a full pipe stall).
@@ -522,7 +620,7 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
   // Calls are scheduled in their preceding cycle, so don't conflict with
   // hazards from instructions after the call. EmitNode will reset the
   // scoreboard state before emitting the call.
-  if (isBottomUp && SU->isCall)
+  if (SU->isCall)
     return;
 
   // FIXME: For resource conflicts in very long non-pipelined stages, we
@@ -530,7 +628,7 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
   int Stalls = 0;
   while (true) {
     ScheduleHazardRecognizer::HazardType HT =
-      HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls);
+      HazardRec->getHazardType(SU, -Stalls);
 
     if (HT == ScheduleHazardRecognizer::NoHazard)
       break;
@@ -568,17 +666,13 @@ void ScheduleDAGRRList::EmitNode(SUnit *SU) {
     HazardRec->Reset();
     return;
   }
-  if (isBottomUp && SU->isCall) {
+  if (SU->isCall) {
     // Calls are scheduled with their preceding instructions. For bottom-up
     // scheduling, clear the pipeline state before emitting.
     HazardRec->Reset();
   }
 
   HazardRec->EmitInstruction(SU);
-
-  if (!isBottomUp && SU->isCall) {
-    HazardRec->Reset();
-  }
 }
 
 static void resetVRegCycle(SUnit *SU);
@@ -607,7 +701,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
 
   Sequence.push_back(SU);
 
-  AvailableQueue->ScheduledNode(SU);
+  AvailableQueue->scheduledNode(SU);
 
   // If HazardRec is disabled, and each inst counts as one cycle, then
   // advance CurCycle before ReleasePredecessors to avoid useless pushes to
@@ -630,6 +724,20 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
       LiveRegGens[I->getReg()] = NULL;
     }
   }
+  // Release the special call resource dependence, if this is the beginning
+  // of a call.
+  unsigned CallResource = TRI->getNumRegs();
+  if (LiveRegDefs[CallResource] == SU)
+    for (const SDNode *SUNode = SU->getNode(); SUNode;
+         SUNode = SUNode->getGluedNode()) {
+      if (SUNode->isMachineOpcode() &&
+          SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
+        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+        --NumLiveRegs;
+        LiveRegDefs[CallResource] = NULL;
+        LiveRegGens[CallResource] = NULL;
+      }
+    }
 
   resetVRegCycle(SU);
 
@@ -686,15 +794,41 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
     }
   }
 
+  // Reclaim the special call resource dependence, if this is the beginning
+  // of a call.
+  unsigned CallResource = TRI->getNumRegs();
+  for (const SDNode *SUNode = SU->getNode(); SUNode;
+       SUNode = SUNode->getGluedNode()) {
+    if (SUNode->isMachineOpcode() &&
+        SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
+      ++NumLiveRegs;
+      LiveRegDefs[CallResource] = SU;
+      LiveRegGens[CallResource] = CallSeqEndForStart[SU];
+    }
+  }
+
+  // Release the special call resource dependence, if this is the end
+  // of a call.
+  if (LiveRegGens[CallResource] == SU)
+    for (const SDNode *SUNode = SU->getNode(); SUNode;
+         SUNode = SUNode->getGluedNode()) {
+      if (SUNode->isMachineOpcode() &&
+          SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
+        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+        --NumLiveRegs;
+        LiveRegDefs[CallResource] = NULL;
+        LiveRegGens[CallResource] = NULL;
+      }
+    }
+
   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
        I != E; ++I) {
     if (I->isAssignedRegDep()) {
+      if (!LiveRegDefs[I->getReg()])
+        ++NumLiveRegs;
       // This becomes the nearest def. Note that an earlier def may still be
       // pending if this is a two-address node.
       LiveRegDefs[I->getReg()] = SU;
-      if (!LiveRegDefs[I->getReg()]) {
-        ++NumLiveRegs;
-      }
       if (LiveRegGens[I->getReg()] == NULL ||
           I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
         LiveRegGens[I->getReg()] = I->getSUnit();
@@ -714,7 +848,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
   else {
     AvailableQueue->push(SU);
   }
-  AvailableQueue->UnscheduledNode(SU);
+  AvailableQueue->unscheduledNode(SU);
 }
 
 /// After backtracking, the hazard checker needs to be restored to a state
@@ -805,6 +939,11 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
     if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
       return NULL;
 
+    // unfolding an x86 DEC64m operation results in store, dec, load which
+    // can't be handled here so quit
+    if (NewNodes.size() == 3)
+      return NULL;
+
     DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
     assert(NewNodes.size() == 2 && "Expected a load folding node!");
 
@@ -830,7 +969,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
       LoadNode->setNodeId(LoadSU->NodeNum);
 
       InitNumRegDefsLeft(LoadSU);
-      ComputeLatency(LoadSU);
+      computeLatency(LoadSU);
     }
 
     SUnit *NewSU = CreateNewSUnit(N);
@@ -848,7 +987,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
       NewSU->isCommutable = true;
 
     InitNumRegDefsLeft(NewSU);
-    ComputeLatency(NewSU);
+    computeLatency(NewSU);
 
     // Record all the edges to and from the old SU, by category.
     SmallVector<SDep, 4> ChainPreds;
@@ -1027,7 +1166,7 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
   const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
   assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
   unsigned NumRes = MCID.getNumDefs();
-  for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+  for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
     if (Reg == *ImpDef)
       break;
     ++NumRes;
@@ -1042,7 +1181,7 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
                                SmallSet<unsigned, 4> &RegAdded,
                                SmallVector<unsigned, 4> &LRegs,
                                const TargetRegisterInfo *TRI) {
-  for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
+  for (const uint16_t *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
 
     // Check if Ref is live.
     if (!LiveRegDefs[*AliasI]) continue;
@@ -1057,6 +1196,31 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
   }
 }
 
+/// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
+/// by RegMask, and add them to LRegs.
+static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
+                                     std::vector<SUnit*> &LiveRegDefs,
+                                     SmallSet<unsigned, 4> &RegAdded,
+                                     SmallVector<unsigned, 4> &LRegs) {
+  // Look at all live registers. Skip Reg0 and the special CallResource.
+  for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
+    if (!LiveRegDefs[i]) continue;
+    if (LiveRegDefs[i] == SU) continue;
+    if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
+    if (RegAdded.insert(i))
+      LRegs.push_back(i);
+  }
+}
+
+/// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
+static const uint32_t *getNodeRegMask(const SDNode *N) {
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+    if (const RegisterMaskSDNode *Op =
+        dyn_cast<RegisterMaskSDNode>(N->getOperand(i).getNode()))
+      return Op->getRegMask();
+  return NULL;
+}
+
 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
 /// scheduling of the given node to satisfy live physical register dependencies.
 /// If the specific node is the last one that's available to schedule, do
@@ -1108,10 +1272,27 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
 
     if (!Node->isMachineOpcode())
       continue;
+    // If we're in the middle of scheduling a call, don't begin scheduling
+    // another call. Also, don't allow any physical registers to be live across
+    // the call.
+    if (Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
+      // Check the special calling-sequence resource.
+      unsigned CallResource = TRI->getNumRegs();
+      if (LiveRegDefs[CallResource]) {
+        SDNode *Gen = LiveRegGens[CallResource]->getNode();
+        while (SDNode *Glued = Gen->getGluedNode())
+          Gen = Glued;
+        if (!IsChainDependent(Gen, Node, 0, TII) && RegAdded.insert(CallResource))
+          LRegs.push_back(CallResource);
+      }
+    }
+    if (const uint32_t *RegMask = getNodeRegMask(Node))
+      CheckForLiveRegDefMasked(SU, RegMask, LiveRegDefs, RegAdded, LRegs);
+
     const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
     if (!MCID.ImplicitDefs)
       continue;
-    for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg)
+    for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
       CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
   }
 
@@ -1300,99 +1481,10 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
   std::reverse(Sequence.begin(), Sequence.end());
 
 #ifndef NDEBUG
-  VerifySchedule(isBottomUp);
-#endif
-}
-
-//===----------------------------------------------------------------------===//
-//  Top-Down Scheduling
-//===----------------------------------------------------------------------===//
-
-/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
-/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
-void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
-  SUnit *SuccSU = SuccEdge->getSUnit();
-
-#ifndef NDEBUG
-  if (SuccSU->NumPredsLeft == 0) {
-    dbgs() << "*** Scheduling failed! ***\n";
-    SuccSU->dump(this);
-    dbgs() << " has been released too many times!\n";
-    llvm_unreachable(0);
-  }
-#endif
-  --SuccSU->NumPredsLeft;
-
-  // If all the node's predecessors are scheduled, this node is ready
-  // to be scheduled. Ignore the special ExitSU node.
-  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
-    SuccSU->isAvailable = true;
-    AvailableQueue->push(SuccSU);
-  }
-}
-
-void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
-  // Top down: release successors
-  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
-       I != E; ++I) {
-    assert(!I->isAssignedRegDep() &&
-           "The list-tdrr scheduler doesn't yet support physreg dependencies!");
-
-    ReleaseSucc(SU, &*I);
-  }
-}
-
-/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
-/// count of its successors. If a successor pending count is zero, add it to
-/// the Available queue.
-void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) {
-  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
-  DEBUG(SU->dump(this));
-
-  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
-  SU->setDepthToAtLeast(CurCycle);
-  Sequence.push_back(SU);
-
-  ReleaseSuccessors(SU);
-  SU->isScheduled = true;
-  AvailableQueue->ScheduledNode(SU);
-}
-
-/// ListScheduleTopDown - The main loop of list scheduling for top-down
-/// schedulers.
-void ScheduleDAGRRList::ListScheduleTopDown() {
-  AvailableQueue->setCurCycle(CurCycle);
-
-  // Release any successors of the special Entry node.
-  ReleaseSuccessors(&EntrySU);
-
-  // All leaves to Available queue.
-  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
-    // It is available if it has no predecessors.
-    if (SUnits[i].Preds.empty()) {
-      AvailableQueue->push(&SUnits[i]);
-      SUnits[i].isAvailable = true;
-    }
-  }
-
-  // While Available queue is not empty, grab the node with the highest
-  // priority. If it is not ready put it back.  Schedule the node.
-  Sequence.reserve(SUnits.size());
-  while (!AvailableQueue->empty()) {
-    SUnit *CurSU = AvailableQueue->pop();
-
-    if (CurSU)
-      ScheduleNodeTopDown(CurSU);
-    ++CurCycle;
-    AvailableQueue->setCurCycle(CurCycle);
-  }
-
-#ifndef NDEBUG
-  VerifySchedule(isBottomUp);
+  VerifyScheduledSequence(/*isBottomUp=*/true);
 #endif
 }
 
-
 //===----------------------------------------------------------------------===//
 //                RegReductionPriorityQueue Definition
 //===----------------------------------------------------------------------===//
@@ -1437,21 +1529,6 @@ struct bu_ls_rr_sort : public queue_sort {
   bool operator()(SUnit* left, SUnit* right) const;
 };
 
-// td_ls_rr_sort - Priority function for top down register pressure reduction
-// scheduler.
-struct td_ls_rr_sort : public queue_sort {
-  enum {
-    IsBottomUp = false,
-    HasReadyFilter = false
-  };
-
-  RegReductionPQBase *SPQ;
-  td_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
-  td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
-
-  bool operator()(const SUnit* left, const SUnit* right) const;
-};
-
 // src_ls_rr_sort - Priority function for source order scheduler.
 struct src_ls_rr_sort : public queue_sort {
   enum {
@@ -1510,6 +1587,7 @@ protected:
   std::vector<SUnit*> Queue;
   unsigned CurQueueId;
   bool TracksRegPressure;
+  bool SrcOrder;
 
   // SUnits - The SUnits for the current graph.
   std::vector<SUnit> *SUnits;
@@ -1535,11 +1613,12 @@ public:
   RegReductionPQBase(MachineFunction &mf,
                      bool hasReadyFilter,
                      bool tracksrp,
+                     bool srcorder,
                      const TargetInstrInfo *tii,
                      const TargetRegisterInfo *tri,
                      const TargetLowering *tli)
     : SchedulingPriorityQueue(hasReadyFilter),
-      CurQueueId(0), TracksRegPressure(tracksrp),
+      CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder),
       MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
     if (TracksRegPressure) {
       unsigned NumRC = TRI->getNumRegClasses();
@@ -1610,9 +1689,9 @@ public:
 
   int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
 
-  void ScheduledNode(SUnit *SU);
+  void scheduledNode(SUnit *SU);
 
-  void UnscheduledNode(SUnit *SU);
+  void unscheduledNode(SUnit *SU);
 
 protected:
   bool canClobber(const SUnit *SU, const SUnit *Op);
@@ -1654,10 +1733,12 @@ class RegReductionPriorityQueue : public RegReductionPQBase {
 public:
   RegReductionPriorityQueue(MachineFunction &mf,
                             bool tracksrp,
+                            bool srcorder,
                             const TargetInstrInfo *tii,
                             const TargetRegisterInfo *tri,
                             const TargetLowering *tli)
-    : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli),
+    : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
+                         tii, tri, tli),
       Picker(this) {}
 
   bool isBottomUp() const { return SF::IsBottomUp; }
@@ -1680,10 +1761,7 @@ public:
     SF DumpPicker = Picker;
     while (!DumpQueue.empty()) {
       SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
-      if (isBottomUp())
-        dbgs() << "Height " << SU->getHeight() << ": ";
-      else
-        dbgs() << "Depth " << SU->getDepth() << ": ";
+      dbgs() << "Height " << SU->getHeight() << ": ";
       SU->dump(DAG);
     }
   }
@@ -1692,9 +1770,6 @@ public:
 typedef RegReductionPriorityQueue<bu_ls_rr_sort>
 BURegReductionPriorityQueue;
 
-typedef RegReductionPriorityQueue<td_ls_rr_sort>
-TDRegReductionPriorityQueue;
-
 typedef RegReductionPriorityQueue<src_ls_rr_sort>
 SrcRegReductionPriorityQueue;
 
@@ -1919,7 +1994,7 @@ int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
   return PDiff;
 }
 
-void RegReductionPQBase::ScheduledNode(SUnit *SU) {
+void RegReductionPQBase::scheduledNode(SUnit *SU) {
   if (!TracksRegPressure)
     return;
 
@@ -1988,7 +2063,7 @@ void RegReductionPQBase::ScheduledNode(SUnit *SU) {
   dumpRegPressure();
 }
 
-void RegReductionPQBase::UnscheduledNode(SUnit *SU) {
+void RegReductionPQBase::unscheduledNode(SUnit *SU) {
   if (!TracksRegPressure)
     return;
 
@@ -2235,37 +2310,29 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
   int LHeight = (int)left->getHeight() + LPenalty;
   int RHeight = (int)right->getHeight() + RPenalty;
 
-  bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) &&
+  bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
     BUHasStall(left, LHeight, SPQ);
-  bool RStall = (!checkPref || right->SchedulingPref == Sched::Latency) &&
+  bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
     BUHasStall(right, RHeight, SPQ);
 
   // If scheduling one of the node will cause a pipeline stall, delay it.
   // If scheduling either one of the node will cause a pipeline stall, sort
   // them according to their height.
   if (LStall) {
-    if (!RStall) {
-      DEBUG(++FactorCount[FactStall]);
+    if (!RStall)
       return 1;
-    }
-    if (LHeight != RHeight) {
-      DEBUG(++FactorCount[FactStall]);
+    if (LHeight != RHeight)
       return LHeight > RHeight ? 1 : -1;
-    }
-  } else if (RStall) {
-    DEBUG(++FactorCount[FactStall]);
+  } else if (RStall)
     return -1;
-  }
 
   // If either node is scheduling for latency, sort them by height/depth
   // and latency.
-  if (!checkPref || (left->SchedulingPref == Sched::Latency ||
-                     right->SchedulingPref == Sched::Latency)) {
+  if (!checkPref || (left->SchedulingPref == Sched::ILP ||
+                     right->SchedulingPref == Sched::ILP)) {
     if (DisableSchedCycles) {
-      if (LHeight != RHeight) {
-        DEBUG(++FactorCount[FactHeight]);
+      if (LHeight != RHeight)
         return LHeight > RHeight ? 1 : -1;
-      }
     }
     else {
       // If neither instruction stalls (!LStall && !RStall) then
@@ -2274,17 +2341,14 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
       int LDepth = left->getDepth() - LPenalty;
       int RDepth = right->getDepth() - RPenalty;
       if (LDepth != RDepth) {
-        DEBUG(++FactorCount[FactDepth]);
         DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
               << ") depth " << LDepth << " vs SU (" << right->NodeNum
               << ") depth " << RDepth << "\n");
         return LDepth < RDepth ? 1 : -1;
       }
     }
-    if (left->Latency != right->Latency) {
-      DEBUG(++FactorCount[FactOther]);
+    if (left->Latency != right->Latency)
       return left->Latency > right->Latency ? 1 : -1;
-    }
   }
   return 0;
 }
@@ -2298,7 +2362,6 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
     bool LHasPhysReg = left->hasPhysRegDefs;
     bool RHasPhysReg = right->hasPhysRegDefs;
     if (LHasPhysReg != RHasPhysReg) {
-      DEBUG(++FactorCount[FactRegUses]);
       #ifndef NDEBUG
       const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"};
       #endif
@@ -2324,10 +2387,8 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
     LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
   }
 
-  if (LPriority != RPriority) {
-    DEBUG(++FactorCount[FactStatic]);
+  if (LPriority != RPriority)
     return LPriority > RPriority;
-  }
 
   // One or both of the nodes are calls and their sethi-ullman numbers are the
   // same, then keep source order.
@@ -2360,18 +2421,14 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
   // This creates more short live intervals.
   unsigned LDist = closestSucc(left);
   unsigned RDist = closestSucc(right);
-  if (LDist != RDist) {
-    DEBUG(++FactorCount[FactOther]);
+  if (LDist != RDist)
     return LDist < RDist;
-  }
 
   // How many registers becomes live when the node is scheduled.
   unsigned LScratch = calcMaxScratches(left);
   unsigned RScratch = calcMaxScratches(right);
-  if (LScratch != RScratch) {
-    DEBUG(++FactorCount[FactOther]);
+  if (LScratch != RScratch)
     return LScratch > RScratch;
-  }
 
   // Comparing latency against a call makes little sense unless the node
   // is register pressure-neutral.
@@ -2386,20 +2443,15 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
       return result > 0;
   }
   else {
-    if (left->getHeight() != right->getHeight()) {
-      DEBUG(++FactorCount[FactHeight]);
+    if (left->getHeight() != right->getHeight())
       return left->getHeight() > right->getHeight();
-    }
 
-    if (left->getDepth() != right->getDepth()) {
-      DEBUG(++FactorCount[FactDepth]);
+    if (left->getDepth() != right->getDepth())
       return left->getDepth() < right->getDepth();
-    }
   }
 
   assert(left->NodeQueueId && right->NodeQueueId &&
          "NodeQueueId cannot be zero");
-  DEBUG(++FactorCount[FactOther]);
   return (left->NodeQueueId > right->NodeQueueId);
 }
 
@@ -2459,13 +2511,11 @@ bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
   // Avoid causing spills. If register pressure is high, schedule for
   // register pressure reduction.
   if (LHigh && !RHigh) {
-    DEBUG(++FactorCount[FactPressureDiff]);
     DEBUG(dbgs() << "  pressure SU(" << left->NodeNum << ") > SU("
           << right->NodeNum << ")\n");
     return true;
   }
   else if (!LHigh && RHigh) {
-    DEBUG(++FactorCount[FactPressureDiff]);
     DEBUG(dbgs() << "  pressure SU(" << right->NodeNum << ") > SU("
           << left->NodeNum << ")\n");
     return false;
@@ -2529,7 +2579,6 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
     RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
   }
   if (!DisableSchedRegPressure && LPDiff != RPDiff) {
-    DEBUG(++FactorCount[FactPressureDiff]);
     DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff
           << " != SU(" << right->NodeNum << "): " << RPDiff << "\n");
     return LPDiff > RPDiff;
@@ -2538,7 +2587,6 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
   if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
     bool LReduce = canEnableCoalescing(left);
     bool RReduce = canEnableCoalescing(right);
-    DEBUG(if (LReduce != RReduce) ++FactorCount[FactPressureDiff]);
     if (LReduce && !RReduce) return false;
     if (RReduce && !LReduce) return true;
   }
@@ -2546,17 +2594,14 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
   if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
     DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
           << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
-    DEBUG(++FactorCount[FactRegUses]);
     return LLiveUses < RLiveUses;
   }
 
   if (!DisableSchedStalls) {
     bool LStall = BUHasStall(left, left->getHeight(), SPQ);
     bool RStall = BUHasStall(right, right->getHeight(), SPQ);
-    if (LStall != RStall) {
-      DEBUG(++FactorCount[FactHeight]);
+    if (LStall != RStall)
       return left->getHeight() > right->getHeight();
-    }
   }
 
   if (!DisableSchedCriticalPath) {
@@ -2565,17 +2610,14 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
       DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
             << left->getDepth() << " != SU(" << right->NodeNum << "): "
             << right->getDepth() << "\n");
-      DEBUG(++FactorCount[FactDepth]);
       return left->getDepth() < right->getDepth();
     }
   }
 
   if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
     int spread = (int)left->getHeight() - (int)right->getHeight();
-    if (std::abs(spread) > MaxReorderWindow) {
-      DEBUG(++FactorCount[FactHeight]);
+    if (std::abs(spread) > MaxReorderWindow)
       return left->getHeight() > right->getHeight();
-    }
   }
 
   return BURRSort(left, right, SPQ);
@@ -2584,9 +2626,10 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
 void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
   SUnits = &sunits;
   // Add pseudo dependency edges for two-address nodes.
-  AddPseudoTwoAddrDeps();
+  if (!Disable2AddrHack)
+    AddPseudoTwoAddrDeps();
   // Reroute edges to nodes with multiple uses.
-  if (!TracksRegPressure)
+  if (!TracksRegPressure && !SrcOrder)
     PrescheduleNodesWithMultipleUses();
   // Calculate node priorities.
   CalculateSethiUllmanNumbers();
@@ -2628,9 +2671,10 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
                                          ScheduleDAGRRList *scheduleDAG,
                                          const TargetInstrInfo *TII,
                                          const TargetRegisterInfo *TRI) {
-  const unsigned *ImpDefs
+  const uint16_t *ImpDefs
     = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
-  if(!ImpDefs)
+  const uint32_t *RegMask = getNodeRegMask(SU->getNode());
+  if(!ImpDefs && !RegMask)
     return false;
 
   for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end();
@@ -2641,14 +2685,18 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
       if (!PI->isAssignedRegDep())
         continue;
 
-      for (const unsigned *ImpDef = ImpDefs; *ImpDef; ++ImpDef) {
-        // Return true if SU clobbers this physical register use and the
-        // definition of the register reaches from DepSU. IsReachable queries a
-        // topological forward sort of the DAG (following the successors).
-        if (TRI->regsOverlap(*ImpDef, PI->getReg()) &&
-            scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
-          return true;
-      }
+      if (RegMask && MachineOperand::clobbersPhysReg(RegMask, PI->getReg()) &&
+          scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
+        return true;
+
+      if (ImpDefs)
+        for (const uint16_t *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
+          // Return true if SU clobbers this physical register use and the
+          // definition of the register reaches from DepSU. IsReachable queries
+          // a topological forward sort of the DAG (following the successors).
+          if (TRI->regsOverlap(*ImpDef, PI->getReg()) &&
+              scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
+            return true;
     }
   }
   return false;
@@ -2661,16 +2709,17 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
                                   const TargetRegisterInfo *TRI) {
   SDNode *N = SuccSU->getNode();
   unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
-  const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
+  const uint16_t *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
   assert(ImpDefs && "Caller should check hasPhysRegDefs");
   for (const SDNode *SUNode = SU->getNode(); SUNode;
        SUNode = SUNode->getGluedNode()) {
     if (!SUNode->isMachineOpcode())
       continue;
-    const unsigned *SUImpDefs =
+    const uint16_t *SUImpDefs =
       TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
-    if (!SUImpDefs)
-      return false;
+    const uint32_t *SURegMask = getNodeRegMask(SUNode);
+    if (!SUImpDefs && !SURegMask)
+      continue;
     for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
       EVT VT = N->getValueType(i);
       if (VT == MVT::Glue || VT == MVT::Other)
@@ -2678,6 +2727,10 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
       if (!N->hasAnyUseOfValue(i))
         continue;
       unsigned Reg = ImpDefs[i - NumDefs];
+      if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
+        return true;
+      if (!SUImpDefs)
+        continue;
       for (;*SUImpDefs; ++SUImpDefs) {
         unsigned SUReg = *SUImpDefs;
         if (TRI->regsOverlap(Reg, SUReg))
@@ -2887,69 +2940,6 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() {
   }
 }
 
-/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
-/// predecessors of the successors of the SUnit SU. Stop when the provided
-/// limit is exceeded.
-static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
-                                                    unsigned Limit) {
-  unsigned Sum = 0;
-  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
-       I != E; ++I) {
-    const SUnit *SuccSU = I->getSUnit();
-    for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
-         EE = SuccSU->Preds.end(); II != EE; ++II) {
-      SUnit *PredSU = II->getSUnit();
-      if (!PredSU->isScheduled)
-        if (++Sum > Limit)
-          return Sum;
-    }
-  }
-  return Sum;
-}
-
-
-// Top down
-bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
-  if (int res = checkSpecialNodes(left, right))
-    return res < 0;
-
-  unsigned LPriority = SPQ->getNodePriority(left);
-  unsigned RPriority = SPQ->getNodePriority(right);
-  bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
-  bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
-  bool LIsFloater = LIsTarget && left->NumPreds == 0;
-  bool RIsFloater = RIsTarget && right->NumPreds == 0;
-  unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
-  unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
-
-  if (left->NumSuccs == 0 && right->NumSuccs != 0)
-    return false;
-  else if (left->NumSuccs != 0 && right->NumSuccs == 0)
-    return true;
-
-  if (LIsFloater)
-    LBonus -= 2;
-  if (RIsFloater)
-    RBonus -= 2;
-  if (left->NumSuccs == 1)
-    LBonus += 2;
-  if (right->NumSuccs == 1)
-    RBonus += 2;
-
-  if (LPriority+LBonus != RPriority+RBonus)
-    return LPriority+LBonus < RPriority+RBonus;
-
-  if (left->getDepth() != right->getDepth())
-    return left->getDepth() < right->getDepth();
-
-  if (left->NumSuccsLeft != right->NumSuccsLeft)
-    return left->NumSuccsLeft > right->NumSuccsLeft;
-
-  assert(left->NodeQueueId && right->NodeQueueId &&
-         "NodeQueueId cannot be zero");
-  return (left->NodeQueueId > right->NodeQueueId);
-}
-
 //===----------------------------------------------------------------------===//
 //                         Public Constructor Functions
 //===----------------------------------------------------------------------===//
@@ -2962,21 +2952,7 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
   const TargetRegisterInfo *TRI = TM.getRegisterInfo();
 
   BURegReductionPriorityQueue *PQ =
-    new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
-  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
-  PQ->setScheduleDAG(SD);
-  return SD;
-}
-
-llvm::ScheduleDAGSDNodes *
-llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS,
-                                 CodeGenOpt::Level OptLevel) {
-  const TargetMachine &TM = IS->TM;
-  const TargetInstrInfo *TII = TM.getInstrInfo();
-  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
-
-  TDRegReductionPriorityQueue *PQ =
-    new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
+    new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, 0);
   ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
   PQ->setScheduleDAG(SD);
   return SD;
@@ -2990,7 +2966,7 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
   const TargetRegisterInfo *TRI = TM.getRegisterInfo();
 
   SrcRegReductionPriorityQueue *PQ =
-    new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
+    new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, 0);
   ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
   PQ->setScheduleDAG(SD);
   return SD;
@@ -3005,7 +2981,7 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
   const TargetLowering *TLI = &IS->getTargetLowering();
 
   HybridBURRPriorityQueue *PQ =
-    new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
+    new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
 
   ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
   PQ->setScheduleDAG(SD);
@@ -3021,7 +2997,7 @@ llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
   const TargetLowering *TLI = &IS->getTargetLowering();
 
   ILPBURRPriorityQueue *PQ =
-    new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
+    new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
   ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
   PQ->setScheduleDAG(SD);
   return SD;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 71f07d6fa47a..69dd813b24e0 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -17,6 +17,8 @@
 #include "ScheduleDAGSDNodes.h"
 #include "InstrEmitter.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -44,20 +46,26 @@ static cl::opt<int> HighLatencyCycles(
            "instructions take for targets with no itinerary"));
 
 ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
-  : ScheduleDAG(mf),
+  : ScheduleDAG(mf), BB(0), DAG(0),
     InstrItins(mf.getTarget().getInstrItineraryData()) {}
 
 /// Run - perform scheduling.
 ///
-void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb,
-                             MachineBasicBlock::iterator insertPos) {
+void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb) {
+  BB = bb;
   DAG = dag;
-  ScheduleDAG::Run(bb, insertPos);
+
+  // Clear the scheduler's SUnit DAG.
+  ScheduleDAG::clearDAG();
+  Sequence.clear();
+
+  // Invoke the target's selection of scheduler.
+  Schedule();
 }
 
 /// NewSUnit - Creates a new SUnit and return a ptr to it.
 ///
-SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) {
+SUnit *ScheduleDAGSDNodes::newSUnit(SDNode *N) {
 #ifndef NDEBUG
   const SUnit *Addr = 0;
   if (!SUnits.empty())
@@ -79,7 +87,7 @@ SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) {
 }
 
 SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
-  SUnit *SU = NewSUnit(Old->getNode());
+  SUnit *SU = newSUnit(Old->getNode());
   SU->OrigNode = Old->OrigNode;
   SU->Latency = Old->Latency;
   SU->isVRegCycle = Old->isVRegCycle;
@@ -302,7 +310,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
     // If this node has already been processed, stop now.
     if (NI->getNodeId() != -1) continue;
 
-    SUnit *NodeSUnit = NewSUnit(NI);
+    SUnit *NodeSUnit = newSUnit(NI);
 
     // See if anything is glued to this node, if so, add them to glued
     // nodes.  Nodes can have at most one glue input and one glue output.  Glue
@@ -360,7 +368,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
     InitNumRegDefsLeft(NodeSUnit);
 
     // Assign the Latency field of NodeSUnit using target-provided information.
-    ComputeLatency(NodeSUnit);
+    computeLatency(NodeSUnit);
   }
 
   // Find all call operands.
@@ -382,7 +390,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
   const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
 
   // Check to see if the scheduler cares about latencies.
-  bool UnitLatencies = ForceUnitLatencies();
+  bool UnitLatencies = forceUnitLatencies();
 
   // Pass 2: add the preds, succs, etc.
   for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
@@ -448,7 +456,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
         const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
                                OpLatency, PhysReg);
         if (!isChain && !UnitLatencies) {
-          ComputeOperandLatency(OpN, N, i, const_cast<SDep &>(dep));
+          computeOperandLatency(OpN, N, i, const_cast<SDep &>(dep));
           ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
         }
 
@@ -541,7 +549,7 @@ void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
   }
 }
 
-void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
+void ScheduleDAGSDNodes::computeLatency(SUnit *SU) {
   SDNode *N = SU->getNode();
 
   // TokenFactor operands are considered zero latency, and some schedulers
@@ -553,7 +561,7 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
   }
 
   // Check to see if the scheduler cares about latencies.
-  if (ForceUnitLatencies()) {
+  if (forceUnitLatencies()) {
     SU->Latency = 1;
     return;
   }
@@ -575,10 +583,10 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
       SU->Latency += TII->getInstrLatency(InstrItins, N);
 }
 
-void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
+void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
                                                unsigned OpIdx, SDep& dep) const{
   // Check to see if the scheduler cares about latencies.
-  if (ForceUnitLatencies())
+  if (forceUnitLatencies())
     return;
 
   if (dep.getKind() != SDep::Data)
@@ -621,6 +629,30 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
   }
 }
 
+void ScheduleDAGSDNodes::dumpSchedule() const {
+  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+    if (SUnit *SU = Sequence[i])
+      SU->dump(this);
+    else
+      dbgs() << "**** NOOP ****\n";
+  }
+}
+
+#ifndef NDEBUG
+/// VerifyScheduledSequence - Verify that all SUnits were scheduled and that
+/// their state is consistent with the nodes listed in Sequence.
+///
+void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) {
+  unsigned ScheduledNodes = ScheduleDAG::VerifyScheduledDAG(isBottomUp);
+  unsigned Noops = 0;
+  for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
+    if (!Sequence[i])
+      ++Noops;
+  assert(Sequence.size() - Noops == ScheduledNodes &&
+         "The number of nodes scheduled doesn't match the expected number!");
+}
+#endif // NDEBUG
+
 namespace {
   struct OrderSorter {
     bool operator()(const std::pair<unsigned, MachineInstr*> &A,
@@ -686,9 +718,48 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
   ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order);
 }
 
+void ScheduleDAGSDNodes::
+EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
+                MachineBasicBlock::iterator InsertPos) {
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl()) continue;  // ignore chain preds
+    if (I->getSUnit()->CopyDstRC) {
+      // Copy to physical register.
+      DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit());
+      assert(VRI != VRBaseMap.end() && "Node emitted out of order - late");
+      // Find the destination physical register.
+      unsigned Reg = 0;
+      for (SUnit::const_succ_iterator II = SU->Succs.begin(),
+             EE = SU->Succs.end(); II != EE; ++II) {
+        if (II->isCtrl()) continue;  // ignore chain preds
+        if (II->getReg()) {
+          Reg = II->getReg();
+          break;
+        }
+      }
+      BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg)
+        .addReg(VRI->second);
+    } else {
+      // Copy from physical register.
+      assert(I->getReg() && "Unknown physical register!");
+      unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
+      bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
+      (void)isNew; // Silence compiler warning.
+      assert(isNew && "Node emitted out of order - early");
+      BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase)
+        .addReg(I->getReg());
+    }
+    break;
+  }
+}
 
-/// EmitSchedule - Emit the machine code in scheduled order.
-MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
+/// EmitSchedule - Emit the machine code in scheduled order. Return the new
+/// InsertPos and MachineBasicBlock that contains this insertion
+/// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does
+/// not necessarily refer to returned BB. The emitter may split blocks.
+MachineBasicBlock *ScheduleDAGSDNodes::
+EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
   InstrEmitter Emitter(BB, InsertPos);
   DenseMap<SDValue, unsigned> VRBaseMap;
   DenseMap<SUnit*, unsigned> CopyVRBaseMap;
@@ -711,7 +782,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
     SUnit *SU = Sequence[i];
     if (!SU) {
       // Null SUnit* is a noop.
-      EmitNoop();
+      TII->insertNoop(*Emitter.getBlock(), InsertPos);
       continue;
     }
 
@@ -719,7 +790,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
     // SDNode and any glued SDNodes and append them to the block.
     if (!SU->getNode()) {
       // Emit a copy.
-      EmitPhysRegCopy(SU, CopyVRBaseMap);
+      EmitPhysRegCopy(SU, CopyVRBaseMap, InsertPos);
       continue;
     }
 
@@ -784,19 +855,24 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
     }
     // Add trailing DbgValue's before the terminator. FIXME: May want to add
     // some of them before one or more conditional branches?
+    SmallVector<MachineInstr*, 8> DbgMIs;
     while (DI != DE) {
-      MachineBasicBlock *InsertBB = Emitter.getBlock();
-      MachineBasicBlock::iterator Pos= Emitter.getBlock()->getFirstTerminator();
-      if (!(*DI)->isInvalidated()) {
-        MachineInstr *DbgMI= Emitter.EmitDbgValue(*DI, VRBaseMap);
-        if (DbgMI)
-          InsertBB->insert(Pos, DbgMI);
-      }
+      if (!(*DI)->isInvalidated())
+        if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap))
+          DbgMIs.push_back(DbgMI);
       ++DI;
     }
+
+    MachineBasicBlock *InsertBB = Emitter.getBlock();
+    MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator();
+    InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end());
   }
 
-  BB = Emitter.getBlock();
   InsertPos = Emitter.getInsertPos();
-  return BB;
+  return Emitter.getBlock();
+}
+
+/// Return the basic block label.
+std::string ScheduleDAGSDNodes::getDAGName() const {
+  return "sunit-dag." + BB->getFullName();
 }
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 9c27b2ea02ec..75940ec33ddc 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -35,17 +35,20 @@ namespace llvm {
   ///
   class ScheduleDAGSDNodes : public ScheduleDAG {
   public:
+    MachineBasicBlock *BB;
     SelectionDAG *DAG;                    // DAG of the current basic block
     const InstrItineraryData *InstrItins;
 
+    /// The schedule. Null SUnit*'s represent noop instructions.
+    std::vector<SUnit*> Sequence;
+
     explicit ScheduleDAGSDNodes(MachineFunction &mf);
 
     virtual ~ScheduleDAGSDNodes() {}
 
     /// Run - perform scheduling.
     ///
-    void Run(SelectionDAG *dag, MachineBasicBlock *bb,
-             MachineBasicBlock::iterator insertPos);
+    void Run(SelectionDAG *dag, MachineBasicBlock *bb);
 
     /// isPassiveNode - Return true if the node is a non-scheduled leaf.
     ///
@@ -53,6 +56,7 @@ namespace llvm {
       if (isa<ConstantSDNode>(Node))       return true;
       if (isa<ConstantFPSDNode>(Node))     return true;
       if (isa<RegisterSDNode>(Node))       return true;
+      if (isa<RegisterMaskSDNode>(Node))   return true;
       if (isa<GlobalAddressSDNode>(Node))  return true;
       if (isa<BasicBlockSDNode>(Node))     return true;
       if (isa<FrameIndexSDNode>(Node))     return true;
@@ -67,7 +71,7 @@ namespace llvm {
 
     /// NewSUnit - Creates a new SUnit and return a ptr to it.
     ///
-    SUnit *NewSUnit(SDNode *N);
+    SUnit *newSUnit(SDNode *N);
 
     /// Clone - Creates a clone of the specified SUnit. It does not copy the
     /// predecessors / successors info nor the temporary scheduling states.
@@ -78,7 +82,7 @@ namespace llvm {
     /// are input.  This SUnit graph is similar to the SelectionDAG, but
     /// excludes nodes that aren't interesting to scheduling, and represents
     /// flagged together nodes with a single SUnit.
-    virtual void BuildSchedGraph(AliasAnalysis *AA);
+    void BuildSchedGraph(AliasAnalysis *AA);
 
     /// InitVRegCycleFlag - Set isVRegCycle if this node's single use is
     /// CopyToReg and its only active data operands are CopyFromReg within a
@@ -90,30 +94,41 @@ namespace llvm {
     ///
     void InitNumRegDefsLeft(SUnit *SU);
 
-    /// ComputeLatency - Compute node latency.
+    /// computeLatency - Compute node latency.
     ///
-    virtual void ComputeLatency(SUnit *SU);
+    virtual void computeLatency(SUnit *SU);
 
-    /// ComputeOperandLatency - Override dependence edge latency using
+    /// computeOperandLatency - Override dependence edge latency using
     /// operand use/def information
     ///
-    virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use,
+    virtual void computeOperandLatency(SUnit *Def, SUnit *Use,
                                        SDep& dep) const { }
 
-    virtual void ComputeOperandLatency(SDNode *Def, SDNode *Use,
+    virtual void computeOperandLatency(SDNode *Def, SDNode *Use,
                                        unsigned OpIdx, SDep& dep) const;
 
-    virtual MachineBasicBlock *EmitSchedule();
-
     /// Schedule - Order nodes according to selected style, filling
     /// in the Sequence member.
     ///
     virtual void Schedule() = 0;
 
+    /// VerifyScheduledSequence - Verify that all SUnits are scheduled and
+    /// consistent with the Sequence of scheduled instructions.
+    void VerifyScheduledSequence(bool isBottomUp);
+
+    /// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock
+    /// according to the order specified in Sequence.
+    ///
+    MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos);
+
     virtual void dumpNode(const SUnit *SU) const;
 
+    void dumpSchedule() const;
+
     virtual std::string getGraphNodeLabel(const SUnit *SU) const;
 
+    virtual std::string getDAGName() const;
+
     virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const;
 
     /// RegDefIter - In place iteration over the values defined by an
@@ -159,6 +174,9 @@ namespace llvm {
     /// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph.
     void BuildSchedUnits();
     void AddSchedEdges();
+
+    void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
+                         MachineBasicBlock::iterator InsertPos);
   };
 }
 
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index 430283d5eff9..c8512914c1e2 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -1,4 +1,4 @@
-//===---- ScheduleDAGList.cpp - Implement a list scheduler for isel DAG ---===//
+//===- ScheduleDAGVLIW.cpp - SelectionDAG list scheduler for VLIW -*- C++ -*-=//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -31,6 +31,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/ResourcePriorityQueue.h"
 #include <climits>
 using namespace llvm;
 
@@ -38,15 +39,15 @@ STATISTIC(NumNoops , "Number of noops inserted");
 STATISTIC(NumStalls, "Number of pipeline stalls");
 
 static RegisterScheduler
-  tdListDAGScheduler("list-td", "Top-down list scheduler",
-                     createTDListDAGScheduler);
+  VLIWScheduler("vliw-td", "VLIW scheduler",
+                createVLIWDAGScheduler);
 
 namespace {
 //===----------------------------------------------------------------------===//
-/// ScheduleDAGList - The actual list scheduler implementation.  This supports
-/// top-down scheduling.
+/// ScheduleDAGVLIW - The actual DFA list scheduler implementation.  This
+/// supports / top-down scheduling.
 ///
-class ScheduleDAGList : public ScheduleDAGSDNodes {
+class ScheduleDAGVLIW : public ScheduleDAGSDNodes {
 private:
   /// AvailableQueue - The priority queue to use for the available SUnits.
   ///
@@ -61,16 +62,20 @@ private:
   /// HazardRec - The hazard recognizer to use.
   ScheduleHazardRecognizer *HazardRec;
 
+  /// AA - AliasAnalysis for making memory reference queries.
+  AliasAnalysis *AA;
+
 public:
-  ScheduleDAGList(MachineFunction &mf,
+  ScheduleDAGVLIW(MachineFunction &mf,
+                  AliasAnalysis *aa,
                   SchedulingPriorityQueue *availqueue)
-    : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue) {
+    : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) {
 
     const TargetMachine &tm = mf.getTarget();
     HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
   }
 
-  ~ScheduleDAGList() {
+  ~ScheduleDAGVLIW() {
     delete HazardRec;
     delete AvailableQueue;
   }
@@ -78,23 +83,25 @@ public:
   void Schedule();
 
 private:
-  void ReleaseSucc(SUnit *SU, const SDep &D);
-  void ReleaseSuccessors(SUnit *SU);
-  void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
-  void ListScheduleTopDown();
+  void releaseSucc(SUnit *SU, const SDep &D);
+  void releaseSuccessors(SUnit *SU);
+  void scheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+  void listScheduleTopDown();
 };
 }  // end anonymous namespace
 
 /// Schedule - Schedule the DAG using list scheduling.
-void ScheduleDAGList::Schedule() {
-  DEBUG(dbgs() << "********** List Scheduling **********\n");
+void ScheduleDAGVLIW::Schedule() {
+  DEBUG(dbgs()
+        << "********** List Scheduling BB#" << BB->getNumber()
+        << " '" << BB->getName() << "' **********\n");
 
   // Build the scheduling graph.
-  BuildSchedGraph(NULL);
+  BuildSchedGraph(AA);
 
   AvailableQueue->initNodes(SUnits);
 
-  ListScheduleTopDown();
+  listScheduleTopDown();
 
   AvailableQueue->releaseState();
 }
@@ -103,9 +110,9 @@ void ScheduleDAGList::Schedule() {
 //  Top-Down Scheduling
 //===----------------------------------------------------------------------===//
 
-/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// releaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
 /// the PendingQueue if the count reaches zero. Also update its cycle bound.
-void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) {
+void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) {
   SUnit *SuccSU = D.getSUnit();
 
 #ifndef NDEBUG
@@ -122,25 +129,26 @@ void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) {
 
   // If all the node's predecessors are scheduled, this node is ready
   // to be scheduled. Ignore the special ExitSU node.
-  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
+  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
     PendingQueue.push_back(SuccSU);
+  }
 }
 
-void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) {
+void ScheduleDAGVLIW::releaseSuccessors(SUnit *SU) {
   // Top down: release successors.
   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
        I != E; ++I) {
     assert(!I->isAssignedRegDep() &&
            "The list-td scheduler doesn't yet support physreg dependencies!");
 
-    ReleaseSucc(SU, *I);
+    releaseSucc(SU, *I);
   }
 }
 
-/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// scheduleNodeTopDown - Add the node to the schedule. Decrement the pending
 /// count of its successors. If a successor pending count is zero, add it to
 /// the Available queue.
-void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+void ScheduleDAGVLIW::scheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
   DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
   DEBUG(SU->dump(this));
 
@@ -148,20 +156,20 @@ void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
   assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
   SU->setDepthToAtLeast(CurCycle);
 
-  ReleaseSuccessors(SU);
+  releaseSuccessors(SU);
   SU->isScheduled = true;
-  AvailableQueue->ScheduledNode(SU);
+  AvailableQueue->scheduledNode(SU);
 }
 
-/// ListScheduleTopDown - The main loop of list scheduling for top-down
+/// listScheduleTopDown - The main loop of list scheduling for top-down
 /// schedulers.
-void ScheduleDAGList::ListScheduleTopDown() {
+void ScheduleDAGVLIW::listScheduleTopDown() {
   unsigned CurCycle = 0;
 
   // Release any successors of the special Entry node.
-  ReleaseSuccessors(&EntrySU);
+  releaseSuccessors(&EntrySU);
 
-  // All leaves to Available queue.
+  // All leaves to AvailableQueue.
   for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
     // It is available if it has no predecessors.
     if (SUnits[i].Preds.empty()) {
@@ -170,7 +178,7 @@ void ScheduleDAGList::ListScheduleTopDown() {
     }
   }
 
-  // While Available queue is not empty, grab the node with the highest
+  // While AvailableQueue is not empty, grab the node with the highest
   // priority. If it is not ready put it back.  Schedule the node.
   std::vector<SUnit*> NotReady;
   Sequence.reserve(SUnits.size());
@@ -184,7 +192,8 @@ void ScheduleDAGList::ListScheduleTopDown() {
         PendingQueue[i] = PendingQueue.back();
         PendingQueue.pop_back();
         --i; --e;
-      } else {
+      }
+      else {
         assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?");
       }
     }
@@ -192,6 +201,8 @@ void ScheduleDAGList::ListScheduleTopDown() {
     // If there are no instructions available, don't try to issue anything, and
     // don't advance the hazard recognizer.
     if (AvailableQueue->empty()) {
+      // Reset DFA state.
+      AvailableQueue->scheduledNode(0);
       ++CurCycle;
       continue;
     }
@@ -223,7 +234,7 @@ void ScheduleDAGList::ListScheduleTopDown() {
 
     // If we found a node to schedule, do it now.
     if (FoundSUnit) {
-      ScheduleNodeTopDown(FoundSUnit, CurCycle);
+      scheduleNodeTopDown(FoundSUnit, CurCycle);
       HazardRec->EmitInstruction(FoundSUnit);
 
       // If this is a pseudo-op node, we don't want to increment the current
@@ -250,7 +261,7 @@ void ScheduleDAGList::ListScheduleTopDown() {
   }
 
 #ifndef NDEBUG
-  VerifySchedule(/*isBottomUp=*/false);
+  VerifyScheduledSequence(/*isBottomUp=*/false);
 #endif
 }
 
@@ -258,8 +269,8 @@ void ScheduleDAGList::ListScheduleTopDown() {
 //                         Public Constructor Functions
 //===----------------------------------------------------------------------===//
 
-/// createTDListDAGScheduler - This creates a top-down list scheduler.
+/// createVLIWDAGScheduler - This creates a top-down list scheduler.
 ScheduleDAGSDNodes *
-llvm::createTDListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
-  return new ScheduleDAGList(*IS->MF, new LatencyPriorityQueue());
+llvm::createVLIWDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+  return new ScheduleDAGVLIW(*IS->MF, IS->AA, new ResourcePriorityQueue(IS));
 }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 20bea8e4c9e9..92671d1678c6 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -28,7 +28,6 @@
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLowering.h"
@@ -63,6 +62,7 @@ static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) {
 static const fltSemantics *EVTToAPFloatSemantics(EVT VT) {
   switch (VT.getSimpleVT().SimpleTy) {
   default: llvm_unreachable("Unknown FP format");
+  case MVT::f16:     return &APFloat::IEEEhalf;
   case MVT::f32:     return &APFloat::IEEEsingle;
   case MVT::f64:     return &APFloat::IEEEdouble;
   case MVT::f80:     return &APFloat::x87DoubleExtended;
@@ -125,20 +125,29 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) {
   if (i == e) return false;
 
   // Do not accept build_vectors that aren't all constants or which have non-~0
-  // elements.
+  // elements. We have to be a bit careful here, as the type of the constant
+  // may not be the same as the type of the vector elements due to type
+  // legalization (the elements are promoted to a legal type for the target and
+  // a vector of a type may be legal when the base element type is not).
+  // We only want to check enough bits to cover the vector elements, because
+  // we care if the resultant vector is all ones, not whether the individual
+  // constants are.
   SDValue NotZero = N->getOperand(i);
+  unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
   if (isa<ConstantSDNode>(NotZero)) {
-    if (!cast<ConstantSDNode>(NotZero)->isAllOnesValue())
+    if (cast<ConstantSDNode>(NotZero)->getAPIntValue().countTrailingOnes() <
+        EltSize)
       return false;
   } else if (isa<ConstantFPSDNode>(NotZero)) {
-    if (!cast<ConstantFPSDNode>(NotZero)->getValueAPF().
-                bitcastToAPInt().isAllOnesValue())
+    if (cast<ConstantFPSDNode>(NotZero)->getValueAPF()
+              .bitcastToAPInt().countTrailingOnes() < EltSize)
       return false;
   } else
     return false;
 
   // Okay, we have at least one ~0 value, check to see if the rest match or are
-  // undefs.
+  // undefs. Even with the above element type twiddling, this should be OK, as
+  // the same type legalization should have applied to all the elements.
   for (++i; i != e; ++i)
     if (N->getOperand(i) != NotZero &&
         N->getOperand(i).getOpcode() != ISD::UNDEF)
@@ -384,7 +393,9 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
   case ISD::Register:
     ID.AddInteger(cast<RegisterSDNode>(N)->getReg());
     break;
-
+  case ISD::RegisterMask:
+    ID.AddPointer(cast<RegisterMaskSDNode>(N)->getRegMask());
+    break;
   case ISD::SRCVALUE:
     ID.AddPointer(cast<SrcValueSDNode>(N)->getValue());
     break;
@@ -475,7 +486,7 @@ static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) {
 ///
 static inline unsigned
 encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile,
-                     bool isNonTemporal) {
+                     bool isNonTemporal, bool isInvariant) {
   assert((ConvType & 3) == ConvType &&
          "ConvType may not require more than 2 bits!");
   assert((AM & 7) == AM &&
@@ -483,7 +494,8 @@ encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile,
   return ConvType |
          (AM << 2) |
          (isVolatile << 5) |
-         (isNonTemporal << 6);
+         (isNonTemporal << 6) |
+         (isInvariant << 7);
 }
 
 //===----------------------------------------------------------------------===//
@@ -564,6 +576,12 @@ void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes,
 
 void SelectionDAG::RemoveDeadNode(SDNode *N, DAGUpdateListener *UpdateListener){
   SmallVector<SDNode*, 16> DeadNodes(1, N);
+
+  // Create a dummy node that adds a reference to the root node, preventing
+  // it from being deleted.  (This matters if the root is an operand of the
+  // dead node.)
+  HandleSDNode Dummy(getRoot());
+
   RemoveDeadNodes(DeadNodes, UpdateListener);
 }
 
@@ -834,9 +852,9 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
 }
 
 // EntryNode could meaningfully have debug info if we can find it...
-SelectionDAG::SelectionDAG(const TargetMachine &tm)
+SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
   : TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()),
-    EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)),
+    OptLevel(OL), EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)),
     Root(getEntryNode()), Ordering(0) {
   AllNodes.push_back(&EntryNode);
   Ordering = new SDNodeOrdering();
@@ -1025,16 +1043,14 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) {
     return getConstantFP(APFloat((float)Val), VT, isTarget);
   else if (EltVT==MVT::f64)
     return getConstantFP(APFloat(Val), VT, isTarget);
-  else if (EltVT==MVT::f80 || EltVT==MVT::f128) {
+  else if (EltVT==MVT::f80 || EltVT==MVT::f128 || EltVT==MVT::f16) {
     bool ignored;
     APFloat apf = APFloat(Val);
     apf.convert(*EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,
                 &ignored);
     return getConstantFP(apf, VT, isTarget);
-  } else {
-    assert(0 && "Unsupported type in getConstantFP");
-    return SDValue();
-  }
+  } else
+    llvm_unreachable("Unsupported type in getConstantFP");
 }
 
 SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL,
@@ -1369,6 +1385,20 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
   return SDValue(N, 0);
 }
 
+SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) {
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), 0, 0);
+  ID.AddPointer(RegMask);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = new (NodeAllocator) RegisterMaskSDNode(RegMask);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
 SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) {
   FoldingSetNodeID ID;
   SDValue Ops[] = { Root };
@@ -1598,7 +1628,7 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
 bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
                                      unsigned Depth) const {
   APInt KnownZero, KnownOne;
-  ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth);
+  ComputeMaskedBits(Op, KnownZero, KnownOne, Depth);
   assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
   return (KnownZero & Mask) == Mask;
 }
@@ -1607,15 +1637,12 @@ bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
 /// known to be either zero or one and return them in the KnownZero/KnownOne
 /// bitsets.  This code only analyzes bits in Mask, in order to short-circuit
 /// processing.
-void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
-                                     APInt &KnownZero, APInt &KnownOne,
-                                     unsigned Depth) const {
-  unsigned BitWidth = Mask.getBitWidth();
-  assert(BitWidth == Op.getValueType().getScalarType().getSizeInBits() &&
-         "Mask size mismatches value type size!");
+void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
+                                     APInt &KnownOne, unsigned Depth) const {
+  unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
 
   KnownZero = KnownOne = APInt(BitWidth, 0);   // Don't know anything.
-  if (Depth == 6 || Mask == 0)
+  if (Depth == 6)
     return;  // Limit search depth.
 
   APInt KnownZero2, KnownOne2;
@@ -1623,14 +1650,13 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
   switch (Op.getOpcode()) {
   case ISD::Constant:
     // We know all of the bits for a constant!
-    KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & Mask;
-    KnownZero = ~KnownOne & Mask;
+    KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue();
+    KnownZero = ~KnownOne;
     return;
   case ISD::AND:
     // If either the LHS or the RHS are Zero, the result is zero.
-    ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
-    ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownZero,
-                      KnownZero2, KnownOne2, Depth+1);
+    ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
     assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
     assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
 
@@ -1640,9 +1666,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     KnownZero |= KnownZero2;
     return;
   case ISD::OR:
-    ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
-    ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownOne,
-                      KnownZero2, KnownOne2, Depth+1);
+    ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
     assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
     assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
 
@@ -1652,8 +1677,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     KnownOne |= KnownOne2;
     return;
   case ISD::XOR: {
-    ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
-    ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
+    ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
     assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
     assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
 
@@ -1665,9 +1690,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     return;
   }
   case ISD::MUL: {
-    APInt Mask2 = APInt::getAllOnesValue(BitWidth);
-    ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero, KnownOne, Depth+1);
-    ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
+    ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
     assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
     assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
 
@@ -1686,33 +1710,29 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     LeadZ = std::min(LeadZ, BitWidth);
     KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
                 APInt::getHighBitsSet(BitWidth, LeadZ);
-    KnownZero &= Mask;
     return;
   }
   case ISD::UDIV: {
     // For the purposes of computing leading zeros we can conservatively
     // treat a udiv as a logical right shift by the power of 2 known to
     // be less than the denominator.
-    APInt AllOnes = APInt::getAllOnesValue(BitWidth);
-    ComputeMaskedBits(Op.getOperand(0),
-                      AllOnes, KnownZero2, KnownOne2, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
     unsigned LeadZ = KnownZero2.countLeadingOnes();
 
     KnownOne2.clearAllBits();
     KnownZero2.clearAllBits();
-    ComputeMaskedBits(Op.getOperand(1),
-                      AllOnes, KnownZero2, KnownOne2, Depth+1);
+    ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
     unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
     if (RHSUnknownLeadingOnes != BitWidth)
       LeadZ = std::min(BitWidth,
                        LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
 
-    KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask;
+    KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ);
     return;
   }
   case ISD::SELECT:
-    ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1);
-    ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1);
+    ComputeMaskedBits(Op.getOperand(2), KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
     assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
     assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
 
@@ -1721,8 +1741,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     KnownZero &= KnownZero2;
     return;
   case ISD::SELECT_CC:
-    ComputeMaskedBits(Op.getOperand(3), Mask, KnownZero, KnownOne, Depth+1);
-    ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero2, KnownOne2, Depth+1);
+    ComputeMaskedBits(Op.getOperand(3), KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(2), KnownZero2, KnownOne2, Depth+1);
     assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
     assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
 
@@ -1754,8 +1774,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
       if (ShAmt >= BitWidth)
         return;
 
-      ComputeMaskedBits(Op.getOperand(0), Mask.lshr(ShAmt),
-                        KnownZero, KnownOne, Depth+1);
+      ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
       assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
       KnownZero <<= ShAmt;
       KnownOne  <<= ShAmt;
@@ -1772,13 +1791,12 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
       if (ShAmt >= BitWidth)
         return;
 
-      ComputeMaskedBits(Op.getOperand(0), (Mask << ShAmt),
-                        KnownZero, KnownOne, Depth+1);
+      ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
       assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
       KnownZero = KnownZero.lshr(ShAmt);
       KnownOne  = KnownOne.lshr(ShAmt);
 
-      APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask;
+      APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
       KnownZero |= HighBits;  // High bits known zero.
     }
     return;
@@ -1790,15 +1808,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
       if (ShAmt >= BitWidth)
         return;
 
-      APInt InDemandedMask = (Mask << ShAmt);
       // If any of the demanded bits are produced by the sign extension, we also
       // demand the input sign bit.
-      APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask;
-      if (HighBits.getBoolValue())
-        InDemandedMask |= APInt::getSignBit(BitWidth);
+      APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
 
-      ComputeMaskedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne,
-                        Depth+1);
+      ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
       assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
       KnownZero = KnownZero.lshr(ShAmt);
       KnownOne  = KnownOne.lshr(ShAmt);
@@ -1820,10 +1834,10 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
 
     // Sign extension.  Compute the demanded bits in the result that are not
     // present in the input.
-    APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits) & Mask;
+    APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits);
 
     APInt InSignBit = APInt::getSignBit(EBits);
-    APInt InputDemandedBits = Mask & APInt::getLowBitsSet(BitWidth, EBits);
+    APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, EBits);
 
     // If the sign extended bits are demanded, we know that the sign
     // bit is demanded.
@@ -1831,8 +1845,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     if (NewBits.getBoolValue())
       InputDemandedBits |= InSignBit;
 
-    ComputeMaskedBits(Op.getOperand(0), InputDemandedBits,
-                      KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+    KnownOne &= InputDemandedBits;
+    KnownZero &= InputDemandedBits;
     assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
 
     // If the sign bit of the input is known set or clear, then we know the
@@ -1850,7 +1865,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     return;
   }
   case ISD::CTTZ:
+  case ISD::CTTZ_ZERO_UNDEF:
   case ISD::CTLZ:
+  case ISD::CTLZ_ZERO_UNDEF:
   case ISD::CTPOP: {
     unsigned LowBits = Log2_32(BitWidth)+1;
     KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
@@ -1858,22 +1875,23 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     return;
   }
   case ISD::LOAD: {
+    LoadSDNode *LD = cast<LoadSDNode>(Op);
     if (ISD::isZEXTLoad(Op.getNode())) {
-      LoadSDNode *LD = cast<LoadSDNode>(Op);
       EVT VT = LD->getMemoryVT();
       unsigned MemBits = VT.getScalarType().getSizeInBits();
-      KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits) & Mask;
+      KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
+    } else if (const MDNode *Ranges = LD->getRanges()) {
+      computeMaskedBitsLoad(*Ranges, KnownZero);
     }
     return;
   }
   case ISD::ZERO_EXTEND: {
     EVT InVT = Op.getOperand(0).getValueType();
     unsigned InBits = InVT.getScalarType().getSizeInBits();
-    APInt NewBits   = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
-    APInt InMask    = Mask.trunc(InBits);
+    APInt NewBits   = APInt::getHighBitsSet(BitWidth, BitWidth - InBits);
     KnownZero = KnownZero.trunc(InBits);
     KnownOne = KnownOne.trunc(InBits);
-    ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
     KnownZero = KnownZero.zext(BitWidth);
     KnownOne = KnownOne.zext(BitWidth);
     KnownZero |= NewBits;
@@ -1883,17 +1901,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     EVT InVT = Op.getOperand(0).getValueType();
     unsigned InBits = InVT.getScalarType().getSizeInBits();
     APInt InSignBit = APInt::getSignBit(InBits);
-    APInt NewBits   = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
-    APInt InMask = Mask.trunc(InBits);
-
-    // If any of the sign extended bits are demanded, we know that the sign
-    // bit is demanded. Temporarily set this bit in the mask for our callee.
-    if (NewBits.getBoolValue())
-      InMask |= InSignBit;
+    APInt NewBits   = APInt::getHighBitsSet(BitWidth, BitWidth - InBits);
 
     KnownZero = KnownZero.trunc(InBits);
     KnownOne = KnownOne.trunc(InBits);
-    ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
 
     // Note if the sign bit is known to be zero or one.
     bool SignBitKnownZero = KnownZero.isNegative();
@@ -1901,13 +1913,6 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     assert(!(SignBitKnownZero && SignBitKnownOne) &&
            "Sign bit can't be known to be both zero and one!");
 
-    // If the sign bit wasn't actually demanded by our caller, we don't
-    // want it set in the KnownZero and KnownOne result values. Reset the
-    // mask and reapply it to the result values.
-    InMask = Mask.trunc(InBits);
-    KnownZero &= InMask;
-    KnownOne  &= InMask;
-
     KnownZero = KnownZero.zext(BitWidth);
     KnownOne = KnownOne.zext(BitWidth);
 
@@ -1921,10 +1926,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
   case ISD::ANY_EXTEND: {
     EVT InVT = Op.getOperand(0).getValueType();
     unsigned InBits = InVT.getScalarType().getSizeInBits();
-    APInt InMask = Mask.trunc(InBits);
     KnownZero = KnownZero.trunc(InBits);
     KnownOne = KnownOne.trunc(InBits);
-    ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
     KnownZero = KnownZero.zext(BitWidth);
     KnownOne = KnownOne.zext(BitWidth);
     return;
@@ -1932,10 +1936,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
   case ISD::TRUNCATE: {
     EVT InVT = Op.getOperand(0).getValueType();
     unsigned InBits = InVT.getScalarType().getSizeInBits();
-    APInt InMask = Mask.zext(InBits);
     KnownZero = KnownZero.zext(InBits);
     KnownOne = KnownOne.zext(InBits);
-    ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
     assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
     KnownZero = KnownZero.trunc(BitWidth);
     KnownOne = KnownOne.trunc(BitWidth);
@@ -1944,9 +1947,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
   case ISD::AssertZext: {
     EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
     APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits());
-    ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero,
-                      KnownOne, Depth+1);
-    KnownZero |= (~InMask) & Mask;
+    ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+    KnownZero |= (~InMask);
     return;
   }
   case ISD::FGETSIGN:
@@ -1963,8 +1965,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
         unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros();
         // NLZ can't be BitWidth with no sign bit
         APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
-        ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero2, KnownOne2,
-                          Depth+1);
+        ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
 
         // If all of the MaskV bits are known to be zero, then we know the
         // output top bits are zero, because we now know that the output is
@@ -1972,7 +1973,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
         if ((KnownZero2 & MaskV) == MaskV) {
           unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros();
           // Top bits known zero.
-          KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask;
+          KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2);
         }
       }
     }
@@ -1983,13 +1984,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     // Output known-0 bits are known if clear or set in both the low clear bits
     // common to both LHS & RHS.  For example, 8+(X<<3) is known to have the
     // low 3 bits clear.
-    APInt Mask2 = APInt::getLowBitsSet(BitWidth,
-                                       BitWidth - Mask.countLeadingZeros());
-    ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
     assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
     unsigned KnownZeroOut = KnownZero2.countTrailingOnes();
 
-    ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero2, KnownOne2, Depth+1);
+    ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
     assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
     KnownZeroOut = std::min(KnownZeroOut,
                             KnownZero2.countTrailingOnes());
@@ -2013,7 +2012,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
       if (RA.isPowerOf2()) {
         APInt LowBits = RA - 1;
         APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
-        ComputeMaskedBits(Op.getOperand(0), Mask2,KnownZero2,KnownOne2,Depth+1);
+        ComputeMaskedBits(Op.getOperand(0), KnownZero2,KnownOne2,Depth+1);
 
         // The low bits of the first operand are unchanged by the srem.
         KnownZero = KnownZero2 & LowBits;
@@ -2028,10 +2027,6 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
         // the upper bits are all one.
         if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0))
           KnownOne |= ~LowBits;
-
-        KnownZero &= Mask;
-        KnownOne &= Mask;
-
         assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
       }
     }
@@ -2041,9 +2036,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
       const APInt &RA = Rem->getAPIntValue();
       if (RA.isPowerOf2()) {
         APInt LowBits = (RA - 1);
-        APInt Mask2 = LowBits & Mask;
-        KnownZero |= ~LowBits & Mask;
-        ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero, KnownOne,Depth+1);
+        KnownZero |= ~LowBits;
+        ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne,Depth+1);
         assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
         break;
       }
@@ -2051,16 +2045,13 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
 
     // Since the result is less than or equal to either operand, any leading
     // zero bits in either operand must also exist in the result.
-    APInt AllOnes = APInt::getAllOnesValue(BitWidth);
-    ComputeMaskedBits(Op.getOperand(0), AllOnes, KnownZero, KnownOne,
-                      Depth+1);
-    ComputeMaskedBits(Op.getOperand(1), AllOnes, KnownZero2, KnownOne2,
-                      Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
 
     uint32_t Leaders = std::max(KnownZero.countLeadingOnes(),
                                 KnownZero2.countLeadingOnes());
     KnownOne.clearAllBits();
-    KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask;
+    KnownZero = APInt::getHighBitsSet(BitWidth, Leaders);
     return;
   }
   case ISD::FrameIndex:
@@ -2080,8 +2071,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
   case ISD::INTRINSIC_W_CHAIN:
   case ISD::INTRINSIC_VOID:
     // Allow the target to implement this method for its nodes.
-    TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this,
-                                       Depth);
+    TLI.computeMaskedBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth);
     return;
   }
 }
@@ -2205,12 +2195,11 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
     if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
       if (CRHS->isAllOnesValue()) {
         APInt KnownZero, KnownOne;
-        APInt Mask = APInt::getAllOnesValue(VTBits);
-        ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
+        ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
 
         // If the input is known to be 0 or 1, the output is 0/-1, which is all
         // sign bits set.
-        if ((KnownZero | APInt(VTBits, 1)) == Mask)
+        if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue())
           return VTBits;
 
         // If we are subtracting one from a positive number, there is no carry
@@ -2221,8 +2210,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
 
     Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
     if (Tmp2 == 1) return 1;
-      return std::min(Tmp, Tmp2)-1;
-    break;
+    return std::min(Tmp, Tmp2)-1;
 
   case ISD::SUB:
     Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
@@ -2232,11 +2220,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
     if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0)))
       if (CLHS->isNullValue()) {
         APInt KnownZero, KnownOne;
-        APInt Mask = APInt::getAllOnesValue(VTBits);
-        ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+        ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
         // If the input is known to be 0 or 1, the output is 0/-1, which is all
         // sign bits set.
-        if ((KnownZero | APInt(VTBits, 1)) == Mask)
+        if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue())
           return VTBits;
 
         // If the input is known to be positive (the sign bit is known clear),
@@ -2251,8 +2238,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
     // is, at worst, one more bit than the inputs.
     Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
     if (Tmp == 1) return 1;  // Early out.
-      return std::min(Tmp, Tmp2)-1;
-    break;
+    return std::min(Tmp, Tmp2)-1;
   case ISD::TRUNCATE:
     // FIXME: it's tricky to do anything useful for this, but it is an important
     // case for targets like X86.
@@ -2286,9 +2272,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
   // Finally, if we can prove that the top bits of the result are 0's or 1's,
   // use this information.
   APInt KnownZero, KnownOne;
-  APInt Mask = APInt::getAllOnesValue(VTBits);
-  ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth);
+  ComputeMaskedBits(Op, KnownZero, KnownOne, Depth);
 
+  APInt Mask;
   if (KnownZero.isNegative()) {        // sign bit is 0
     Mask = KnownZero;
   } else if (KnownOne.isNegative()) {  // sign bit is 1;
@@ -2328,7 +2314,7 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
 
 bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
   // If we're told that NaNs won't happen, assume they won't.
-  if (NoNaNsFPMath)
+  if (getTarget().Options.NoNaNsFPMath)
     return true;
 
   // If the value is a constant, we can obviously see if it is a NaN or not.
@@ -2423,8 +2409,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
     case ISD::CTPOP:
       return getConstant(Val.countPopulation(), VT);
     case ISD::CTLZ:
+    case ISD::CTLZ_ZERO_UNDEF:
       return getConstant(Val.countLeadingZeros(), VT);
     case ISD::CTTZ:
+    case ISD::CTTZ_ZERO_UNDEF:
       return getConstant(Val.countTrailingZeros(), VT);
     }
   }
@@ -2440,7 +2428,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
       case ISD::FABS:
         V.clearSign();
         return getConstantFP(V, VT);
-      case ISD::FP_ROUND:
       case ISD::FP_EXTEND: {
         bool ignored;
         // This can return overflow, underflow, or inexact; we don't care.
@@ -2561,17 +2548,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
            "Vector element count mismatch!");
     if (OpOpcode == ISD::TRUNCATE)
       return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
-    else if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
-             OpOpcode == ISD::ANY_EXTEND) {
+    if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
+        OpOpcode == ISD::ANY_EXTEND) {
       // If the source is smaller than the dest, we still need an extend.
       if (Operand.getNode()->getOperand(0).getValueType().getScalarType()
             .bitsLT(VT.getScalarType()))
         return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
-      else if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT))
+      if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT))
         return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
-      else
-        return Operand.getNode()->getOperand(0);
+      return Operand.getNode()->getOperand(0);
     }
+    if (OpOpcode == ISD::UNDEF)
+      return getUNDEF(VT);
     break;
   case ISD::BITCAST:
     // Basic sanity checking.
@@ -2601,7 +2589,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
     break;
   case ISD::FNEG:
     // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
-    if (UnsafeFPMath && OpOpcode == ISD::FSUB)
+    if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB)
       return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1),
                      Operand.getNode()->getOperand(0));
     if (OpOpcode == ISD::FNEG)  // --X -> X
@@ -2736,7 +2724,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
   case ISD::FMUL:
   case ISD::FDIV:
   case ISD::FREM:
-    if (UnsafeFPMath) {
+    if (getTarget().Options.UnsafeFPMath) {
       if (Opcode == ISD::FADD) {
         // 0+x --> x
         if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1))
@@ -3005,6 +2993,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
       default: break;
       }
     }
+
+    if (Opcode == ISD::FP_ROUND) {
+      APFloat V = N1CFP->getValueAPF();    // make copy
+      bool ignored;
+      // This can return overflow, underflow, or inexact; we don't care.
+      // FIXME need to be more flexible about rounding mode.
+      (void)V.convert(*EVTToAPFloatSemantics(VT),
+                      APFloat::rmNearestTiesToEven, &ignored);
+      return getConstantFP(V, VT);
+    }
   }
 
   // Canonicalize an UNDEF to the RHS, even over a constant.
@@ -3059,7 +3057,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
     case ISD::FMUL:
     case ISD::FDIV:
     case ISD::FREM:
-      if (UnsafeFPMath)
+      if (getTarget().Options.UnsafeFPMath)
         return N2;
       break;
     case ISD::MUL:
@@ -3133,16 +3131,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
   case ISD::SELECT:
     if (N1C) {
      if (N1C->getZExtValue())
-        return N2;             // select true, X, Y -> X
-      else
-        return N3;             // select false, X, Y -> Y
+       return N2;             // select true, X, Y -> X
+     return N3;             // select false, X, Y -> Y
     }
 
     if (N2 == N3) return N2;   // select C, X, X -> X
     break;
   case ISD::VECTOR_SHUFFLE:
     llvm_unreachable("should use getVectorShuffle constructor!");
-    break;
   case ISD::INSERT_SUBVECTOR: {
     SDValue Index = N3;
     if (VT.isSimple() && N1.getValueType().isSimple()
@@ -3275,8 +3271,7 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
 /// used when a memcpy is turned into a memset when the source is a constant
 /// string ptr.
 static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
-                                  const TargetLowering &TLI,
-                                  std::string &Str, unsigned Offset) {
+                                  const TargetLowering &TLI, StringRef Str) {
   // Handle vector with all elements zero.
   if (Str.empty()) {
     if (VT.isInteger())
@@ -3294,15 +3289,18 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
   }
 
   assert(!VT.isVector() && "Can't handle vector type here!");
-  unsigned NumBits = VT.getSizeInBits();
-  unsigned MSB = NumBits / 8;
+  unsigned NumVTBytes = VT.getSizeInBits() / 8;
+  unsigned NumBytes = std::min(NumVTBytes, unsigned(Str.size()));
+
   uint64_t Val = 0;
-  if (TLI.isLittleEndian())
-    Offset = Offset + MSB - 1;
-  for (unsigned i = 0; i != MSB; ++i) {
-    Val = (Val << 8) | (unsigned char)Str[Offset];
-    Offset += TLI.isLittleEndian() ? -1 : 1;
+  if (TLI.isLittleEndian()) {
+    for (unsigned i = 0; i != NumBytes; ++i)
+      Val |= (uint64_t)(unsigned char)Str[i] << i*8;
+  } else {
+    for (unsigned i = 0; i != NumBytes; ++i)
+      Val |= (uint64_t)(unsigned char)Str[i] << (NumVTBytes-i-1)*8;
   }
+
   return DAG.getConstant(Val, VT);
 }
 
@@ -3317,7 +3315,7 @@ static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset,
 
 /// isMemSrcFromString - Returns true if memcpy source is a string constant.
 ///
-static bool isMemSrcFromString(SDValue Src, std::string &Str) {
+static bool isMemSrcFromString(SDValue Src, StringRef &Str) {
   unsigned SrcDelta = 0;
   GlobalAddressSDNode *G = NULL;
   if (Src.getOpcode() == ISD::GlobalAddress)
@@ -3331,11 +3329,7 @@ static bool isMemSrcFromString(SDValue Src, std::string &Str) {
   if (!G)
     return false;
 
-  const GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal());
-  if (GV && GetConstantStringInfo(GV, Str, SrcDelta, false))
-    return true;
-
-  return false;
+  return getConstantStringInfo(G->getGlobal(), Str, SrcDelta, false);
 }
 
 /// FindOptimalMemOpLowering - Determines the optimial series memory ops
@@ -3345,7 +3339,7 @@ static bool isMemSrcFromString(SDValue Src, std::string &Str) {
 static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
                                      unsigned Limit, uint64_t Size,
                                      unsigned DstAlign, unsigned SrcAlign,
-                                     bool NonScalarIntSafe,
+                                     bool IsZeroVal,
                                      bool MemcpyStrSrc,
                                      SelectionDAG &DAG,
                                      const TargetLowering &TLI) {
@@ -3359,7 +3353,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
   // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
   // not need to be loaded.
   EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign,
-                                   NonScalarIntSafe, MemcpyStrSrc,
+                                   IsZeroVal, MemcpyStrSrc,
                                    DAG.getMachineFunction());
 
   if (VT == MVT::Other) {
@@ -3438,7 +3432,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
   unsigned SrcAlign = DAG.InferPtrAlignment(Src);
   if (Align > SrcAlign)
     SrcAlign = Align;
-  std::string Str;
+  StringRef Str;
   bool CopyFromStr = isMemSrcFromString(Src, Str);
   bool isZeroStr = CopyFromStr && Str.empty();
   unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
@@ -3475,7 +3469,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
       // We only handle zero vectors here.
       // FIXME: Handle other cases where store of vector immediate is done in
       // a single instruction.
-      Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff);
+      Value = getMemsetStringVal(VT, dl, DAG, TLI, Str.substr(SrcOff));
       Store = DAG.getStore(Chain, dl, Value,
                            getMemBasePlusOffset(Dst, DstOff, DAG),
                            DstPtrInfo.getWithOffset(DstOff), isVol,
@@ -3562,7 +3556,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
     Value = DAG.getLoad(VT, dl, Chain,
                         getMemBasePlusOffset(Src, SrcOff, DAG),
                         SrcPtrInfo.getWithOffset(SrcOff), isVol,
-                        false, SrcAlign);
+                        false, false, SrcAlign);
     LoadValues.push_back(Value);
     LoadChains.push_back(Value.getValue(1));
     SrcOff += VTSize;
@@ -3606,11 +3600,11 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
   if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
-  bool NonScalarIntSafe =
+  bool IsZeroVal =
     isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
   if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
                                 Size, (DstAlignCanChange ? 0 : Align), 0,
-                                NonScalarIntSafe, false, DAG, TLI))
+                                IsZeroVal, false, DAG, TLI))
     return SDValue();
 
   if (DstAlignCanChange) {
@@ -3717,8 +3711,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
   std::pair<SDValue,SDValue> CallResult =
     TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
                     false, false, false, false, 0,
-                    TLI.getLibcallCallingConv(RTLIB::MEMCPY), false,
-                    /*isReturnValueUsed=*/false,
+                    TLI.getLibcallCallingConv(RTLIB::MEMCPY),
+                    /*isTailCall=*/false,
+                    /*doesNotReturn=*/false, /*isReturnValueUsed=*/false,
                     getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY),
                                       TLI.getPointerTy()),
                     Args, *this, dl);
@@ -3769,8 +3764,9 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
   std::pair<SDValue,SDValue> CallResult =
     TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
                     false, false, false, false, 0,
-                    TLI.getLibcallCallingConv(RTLIB::MEMMOVE), false,
-                    /*isReturnValueUsed=*/false,
+                    TLI.getLibcallCallingConv(RTLIB::MEMMOVE),
+                    /*isTailCall=*/false,
+                    /*doesNotReturn=*/false, /*isReturnValueUsed=*/false,
                     getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE),
                                       TLI.getPointerTy()),
                     Args, *this, dl);
@@ -3829,8 +3825,9 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
   std::pair<SDValue,SDValue> CallResult =
     TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
                     false, false, false, false, 0,
-                    TLI.getLibcallCallingConv(RTLIB::MEMSET), false,
-                    /*isReturnValueUsed=*/false,
+                    TLI.getLibcallCallingConv(RTLIB::MEMSET),
+                    /*isTailCall=*/false,
+                    /*doesNotReturn*/false, /*isReturnValueUsed=*/false,
                     getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
                                       TLI.getPointerTy()),
                     Args, *this, dl);
@@ -4138,8 +4135,9 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
                       EVT VT, DebugLoc dl, SDValue Chain,
                       SDValue Ptr, SDValue Offset,
                       MachinePointerInfo PtrInfo, EVT MemVT,
-                      bool isVolatile, bool isNonTemporal,
-                      unsigned Alignment, const MDNode *TBAAInfo) {
+                      bool isVolatile, bool isNonTemporal, bool isInvariant,
+                      unsigned Alignment, const MDNode *TBAAInfo,
+                      const MDNode *Ranges) {
   assert(Chain.getValueType() == MVT::Other && 
         "Invalid chain type");
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
@@ -4150,6 +4148,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
     Flags |= MachineMemOperand::MOVolatile;
   if (isNonTemporal)
     Flags |= MachineMemOperand::MONonTemporal;
+  if (isInvariant)
+    Flags |= MachineMemOperand::MOInvariant;
 
   // If we don't have a PtrInfo, infer the trivial frame index case to simplify
   // clients.
@@ -4159,7 +4159,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
   MachineFunction &MF = getMachineFunction();
   MachineMemOperand *MMO =
     MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment,
-                            TBAAInfo);
+                            TBAAInfo, Ranges);
   return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO);
 }
 
@@ -4196,7 +4196,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
   AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3);
   ID.AddInteger(MemVT.getRawBits());
   ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(),
-                                     MMO->isNonTemporal()));
+                                     MMO->isNonTemporal(), 
+                                     MMO->isInvariant()));
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
     cast<LoadSDNode>(E)->refineAlignment(MMO);
@@ -4213,10 +4214,13 @@ SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl,
                               SDValue Chain, SDValue Ptr,
                               MachinePointerInfo PtrInfo,
                               bool isVolatile, bool isNonTemporal,
-                              unsigned Alignment, const MDNode *TBAAInfo) {
+                              bool isInvariant, unsigned Alignment, 
+                              const MDNode *TBAAInfo,
+                              const MDNode *Ranges) {
   SDValue Undef = getUNDEF(Ptr.getValueType());
   return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
-                 PtrInfo, VT, isVolatile, isNonTemporal, Alignment, TBAAInfo);
+                 PtrInfo, VT, isVolatile, isNonTemporal, isInvariant, Alignment,
+                 TBAAInfo, Ranges);
 }
 
 SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT,
@@ -4226,7 +4230,7 @@ SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT,
                                  unsigned Alignment, const MDNode *TBAAInfo) {
   SDValue Undef = getUNDEF(Ptr.getValueType());
   return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef,
-                 PtrInfo, MemVT, isVolatile, isNonTemporal, Alignment,
+                 PtrInfo, MemVT, isVolatile, isNonTemporal, false, Alignment,
                  TBAAInfo);
 }
 
@@ -4239,8 +4243,8 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
          "Load is already a indexed load!");
   return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,
                  LD->getChain(), Base, Offset, LD->getPointerInfo(),
-                 LD->getMemoryVT(),
-                 LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment());
+                 LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), 
+                 false, LD->getAlignment());
 }
 
 SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
@@ -4282,7 +4286,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
   AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
   ID.AddInteger(VT.getRawBits());
   ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
-                                     MMO->isNonTemporal()));
+                                     MMO->isNonTemporal(), MMO->isInvariant()));
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
     cast<StoreSDNode>(E)->refineAlignment(MMO);
@@ -4349,7 +4353,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
   AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
   ID.AddInteger(SVT.getRawBits());
   ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(),
-                                     MMO->isNonTemporal()));
+                                     MMO->isNonTemporal(), MMO->isInvariant()));
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
     cast<StoreSDNode>(E)->refineAlignment(MMO);
@@ -4903,6 +4907,20 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
   return N;
 }
 
+/// UpdadeDebugLocOnMergedSDNode - If the opt level is -O0 then it throws away
+/// the line number information on the merged node since it is not possible to
+/// preserve the information that operation is associated with multiple lines.
+/// This will make the debugger working better at -O0, were there is a higher
+/// probability having other instructions associated with that line.
+///
+SDNode *SelectionDAG::UpdadeDebugLocOnMergedSDNode(SDNode *N, DebugLoc OLoc) {
+  DebugLoc NLoc = N->getDebugLoc();
+  if (!(NLoc.isUnknown()) && (OptLevel == CodeGenOpt::None) && (OLoc != NLoc)) {
+    N->setDebugLoc(DebugLoc());
+  }
+  return N;
+}
+
 /// MorphNodeTo - This *mutates* the specified node to have the specified
 /// return type, opcode, and operands.
 ///
@@ -4924,7 +4942,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opc, VTs, Ops, NumOps);
     if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
-      return ON;
+      return UpdadeDebugLocOnMergedSDNode(ON, N->getDebugLoc());
   }
 
   if (!RemoveNodeFromCSEMaps(N))
@@ -5128,8 +5146,9 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps);
     IP = 0;
-    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
-      return cast<MachineSDNode>(E);
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+      return cast<MachineSDNode>(UpdadeDebugLocOnMergedSDNode(E, DL));
+    }
   }
 
   // Allocate a new MachineSDNode.
@@ -5290,6 +5309,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To,
     // already exists there, recursively merge the results together.
     AddModifiedNodeToCSEMaps(User, &Listener);
   }
+
+  // If we just RAUW'd the root, take note.
+  if (FromN == getRoot())
+    setRoot(To);
 }
 
 /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
@@ -5335,6 +5358,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To,
     // already exists there, recursively merge the results together.
     AddModifiedNodeToCSEMaps(User, &Listener);
   }
+
+  // If we just RAUW'd the root, take note.
+  if (From == getRoot().getNode())
+    setRoot(SDValue(To, getRoot().getResNo()));
 }
 
 /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
@@ -5373,6 +5400,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From,
     // already exists there, recursively merge the results together.
     AddModifiedNodeToCSEMaps(User, &Listener);
   }
+
+  // If we just RAUW'd the root, take note.
+  if (From == getRoot().getNode())
+    setRoot(SDValue(To[getRoot().getResNo()]));
 }
 
 /// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving
@@ -5431,6 +5462,10 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To,
     // already exists there, recursively merge the results together.
     AddModifiedNodeToCSEMaps(User, &Listener);
   }
+
+  // If we just RAUW'd the root, take note.
+  if (From == getRoot())
+    setRoot(To);
 }
 
 namespace {
@@ -5657,7 +5692,7 @@ MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt,
                      MachineMemOperand *mmo)
  : SDNode(Opc, dl, VTs), MemoryVT(memvt), MMO(mmo) {
   SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
-                                      MMO->isNonTemporal());
+                                      MMO->isNonTemporal(), MMO->isInvariant());
   assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
   assert(isNonTemporal() == MMO->isNonTemporal() &&
          "Non-temporal encoding error!");
@@ -5670,7 +5705,7 @@ MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs,
    : SDNode(Opc, dl, VTs, Ops, NumOps),
      MemoryVT(memvt), MMO(mmo) {
   SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
-                                      MMO->isNonTemporal());
+                                      MMO->isNonTemporal(), MMO->isInvariant());
   assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
   assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!");
 }
@@ -5846,565 +5881,6 @@ uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
   return cast<ConstantSDNode>(OperandList[Num])->getZExtValue();
 }
 
-std::string SDNode::getOperationName(const SelectionDAG *G) const {
-  switch (getOpcode()) {
-  default:
-    if (getOpcode() < ISD::BUILTIN_OP_END)
-      return "<<Unknown DAG Node>>";
-    if (isMachineOpcode()) {
-      if (G)
-        if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo())
-          if (getMachineOpcode() < TII->getNumOpcodes())
-            return TII->get(getMachineOpcode()).getName();
-      return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>";
-    }
-    if (G) {
-      const TargetLowering &TLI = G->getTargetLoweringInfo();
-      const char *Name = TLI.getTargetNodeName(getOpcode());
-      if (Name) return Name;
-      return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>";
-    }
-    return "<<Unknown Node #" + utostr(getOpcode()) + ">>";
-
-#ifndef NDEBUG
-  case ISD::DELETED_NODE:
-    return "<<Deleted Node!>>";
-#endif
-  case ISD::PREFETCH:      return "Prefetch";
-  case ISD::MEMBARRIER:    return "MemBarrier";
-  case ISD::ATOMIC_FENCE:    return "AtomicFence";
-  case ISD::ATOMIC_CMP_SWAP:    return "AtomicCmpSwap";
-  case ISD::ATOMIC_SWAP:        return "AtomicSwap";
-  case ISD::ATOMIC_LOAD_ADD:    return "AtomicLoadAdd";
-  case ISD::ATOMIC_LOAD_SUB:    return "AtomicLoadSub";
-  case ISD::ATOMIC_LOAD_AND:    return "AtomicLoadAnd";
-  case ISD::ATOMIC_LOAD_OR:     return "AtomicLoadOr";
-  case ISD::ATOMIC_LOAD_XOR:    return "AtomicLoadXor";
-  case ISD::ATOMIC_LOAD_NAND:   return "AtomicLoadNand";
-  case ISD::ATOMIC_LOAD_MIN:    return "AtomicLoadMin";
-  case ISD::ATOMIC_LOAD_MAX:    return "AtomicLoadMax";
-  case ISD::ATOMIC_LOAD_UMIN:   return "AtomicLoadUMin";
-  case ISD::ATOMIC_LOAD_UMAX:   return "AtomicLoadUMax";
-  case ISD::ATOMIC_LOAD:        return "AtomicLoad";
-  case ISD::ATOMIC_STORE:       return "AtomicStore";
-  case ISD::PCMARKER:      return "PCMarker";
-  case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
-  case ISD::SRCVALUE:      return "SrcValue";
-  case ISD::MDNODE_SDNODE: return "MDNode";
-  case ISD::EntryToken:    return "EntryToken";
-  case ISD::TokenFactor:   return "TokenFactor";
-  case ISD::AssertSext:    return "AssertSext";
-  case ISD::AssertZext:    return "AssertZext";
-
-  case ISD::BasicBlock:    return "BasicBlock";
-  case ISD::VALUETYPE:     return "ValueType";
-  case ISD::Register:      return "Register";
-
-  case ISD::Constant:      return "Constant";
-  case ISD::ConstantFP:    return "ConstantFP";
-  case ISD::GlobalAddress: return "GlobalAddress";
-  case ISD::GlobalTLSAddress: return "GlobalTLSAddress";
-  case ISD::FrameIndex:    return "FrameIndex";
-  case ISD::JumpTable:     return "JumpTable";
-  case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE";
-  case ISD::RETURNADDR: return "RETURNADDR";
-  case ISD::FRAMEADDR: return "FRAMEADDR";
-  case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET";
-  case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR";
-  case ISD::LSDAADDR: return "LSDAADDR";
-  case ISD::EHSELECTION: return "EHSELECTION";
-  case ISD::EH_RETURN: return "EH_RETURN";
-  case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
-  case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
-  case ISD::EH_SJLJ_DISPATCHSETUP: return "EH_SJLJ_DISPATCHSETUP";
-  case ISD::ConstantPool:  return "ConstantPool";
-  case ISD::ExternalSymbol: return "ExternalSymbol";
-  case ISD::BlockAddress:  return "BlockAddress";
-  case ISD::INTRINSIC_WO_CHAIN:
-  case ISD::INTRINSIC_VOID:
-  case ISD::INTRINSIC_W_CHAIN: {
-    unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
-    unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();
-    if (IID < Intrinsic::num_intrinsics)
-      return Intrinsic::getName((Intrinsic::ID)IID);
-    else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
-      return TII->getName(IID);
-    llvm_unreachable("Invalid intrinsic ID");
-  }
-
-  case ISD::BUILD_VECTOR:   return "BUILD_VECTOR";
-  case ISD::TargetConstant: return "TargetConstant";
-  case ISD::TargetConstantFP:return "TargetConstantFP";
-  case ISD::TargetGlobalAddress: return "TargetGlobalAddress";
-  case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress";
-  case ISD::TargetFrameIndex: return "TargetFrameIndex";
-  case ISD::TargetJumpTable:  return "TargetJumpTable";
-  case ISD::TargetConstantPool:  return "TargetConstantPool";
-  case ISD::TargetExternalSymbol: return "TargetExternalSymbol";
-  case ISD::TargetBlockAddress: return "TargetBlockAddress";
-
-  case ISD::CopyToReg:     return "CopyToReg";
-  case ISD::CopyFromReg:   return "CopyFromReg";
-  case ISD::UNDEF:         return "undef";
-  case ISD::MERGE_VALUES:  return "merge_values";
-  case ISD::INLINEASM:     return "inlineasm";
-  case ISD::EH_LABEL:      return "eh_label";
-  case ISD::HANDLENODE:    return "handlenode";
-
-  // Unary operators
-  case ISD::FABS:   return "fabs";
-  case ISD::FNEG:   return "fneg";
-  case ISD::FSQRT:  return "fsqrt";
-  case ISD::FSIN:   return "fsin";
-  case ISD::FCOS:   return "fcos";
-  case ISD::FTRUNC: return "ftrunc";
-  case ISD::FFLOOR: return "ffloor";
-  case ISD::FCEIL:  return "fceil";
-  case ISD::FRINT:  return "frint";
-  case ISD::FNEARBYINT: return "fnearbyint";
-  case ISD::FEXP:   return "fexp";
-  case ISD::FEXP2:  return "fexp2";
-  case ISD::FLOG:   return "flog";
-  case ISD::FLOG2:  return "flog2";
-  case ISD::FLOG10: return "flog10";
-
-  // Binary operators
-  case ISD::ADD:    return "add";
-  case ISD::SUB:    return "sub";
-  case ISD::MUL:    return "mul";
-  case ISD::MULHU:  return "mulhu";
-  case ISD::MULHS:  return "mulhs";
-  case ISD::SDIV:   return "sdiv";
-  case ISD::UDIV:   return "udiv";
-  case ISD::SREM:   return "srem";
-  case ISD::UREM:   return "urem";
-  case ISD::SMUL_LOHI:  return "smul_lohi";
-  case ISD::UMUL_LOHI:  return "umul_lohi";
-  case ISD::SDIVREM:    return "sdivrem";
-  case ISD::UDIVREM:    return "udivrem";
-  case ISD::AND:    return "and";
-  case ISD::OR:     return "or";
-  case ISD::XOR:    return "xor";
-  case ISD::SHL:    return "shl";
-  case ISD::SRA:    return "sra";
-  case ISD::SRL:    return "srl";
-  case ISD::ROTL:   return "rotl";
-  case ISD::ROTR:   return "rotr";
-  case ISD::FADD:   return "fadd";
-  case ISD::FSUB:   return "fsub";
-  case ISD::FMUL:   return "fmul";
-  case ISD::FDIV:   return "fdiv";
-  case ISD::FMA:    return "fma";
-  case ISD::FREM:   return "frem";
-  case ISD::FCOPYSIGN: return "fcopysign";
-  case ISD::FGETSIGN:  return "fgetsign";
-  case ISD::FPOW:   return "fpow";
-
-  case ISD::FPOWI:  return "fpowi";
-  case ISD::SETCC:       return "setcc";
-  case ISD::SELECT:      return "select";
-  case ISD::VSELECT:     return "vselect";
-  case ISD::SELECT_CC:   return "select_cc";
-  case ISD::INSERT_VECTOR_ELT:   return "insert_vector_elt";
-  case ISD::EXTRACT_VECTOR_ELT:  return "extract_vector_elt";
-  case ISD::CONCAT_VECTORS:      return "concat_vectors";
-  case ISD::INSERT_SUBVECTOR:    return "insert_subvector";
-  case ISD::EXTRACT_SUBVECTOR:   return "extract_subvector";
-  case ISD::SCALAR_TO_VECTOR:    return "scalar_to_vector";
-  case ISD::VECTOR_SHUFFLE:      return "vector_shuffle";
-  case ISD::CARRY_FALSE:         return "carry_false";
-  case ISD::ADDC:        return "addc";
-  case ISD::ADDE:        return "adde";
-  case ISD::SADDO:       return "saddo";
-  case ISD::UADDO:       return "uaddo";
-  case ISD::SSUBO:       return "ssubo";
-  case ISD::USUBO:       return "usubo";
-  case ISD::SMULO:       return "smulo";
-  case ISD::UMULO:       return "umulo";
-  case ISD::SUBC:        return "subc";
-  case ISD::SUBE:        return "sube";
-  case ISD::SHL_PARTS:   return "shl_parts";
-  case ISD::SRA_PARTS:   return "sra_parts";
-  case ISD::SRL_PARTS:   return "srl_parts";
-
-  // Conversion operators.
-  case ISD::SIGN_EXTEND: return "sign_extend";
-  case ISD::ZERO_EXTEND: return "zero_extend";
-  case ISD::ANY_EXTEND:  return "any_extend";
-  case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg";
-  case ISD::TRUNCATE:    return "truncate";
-  case ISD::FP_ROUND:    return "fp_round";
-  case ISD::FLT_ROUNDS_: return "flt_rounds";
-  case ISD::FP_ROUND_INREG: return "fp_round_inreg";
-  case ISD::FP_EXTEND:   return "fp_extend";
-
-  case ISD::SINT_TO_FP:  return "sint_to_fp";
-  case ISD::UINT_TO_FP:  return "uint_to_fp";
-  case ISD::FP_TO_SINT:  return "fp_to_sint";
-  case ISD::FP_TO_UINT:  return "fp_to_uint";
-  case ISD::BITCAST:     return "bitcast";
-  case ISD::FP16_TO_FP32: return "fp16_to_fp32";
-  case ISD::FP32_TO_FP16: return "fp32_to_fp16";
-
-  case ISD::CONVERT_RNDSAT: {
-    switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) {
-    default: llvm_unreachable("Unknown cvt code!");
-    case ISD::CVT_FF:  return "cvt_ff";
-    case ISD::CVT_FS:  return "cvt_fs";
-    case ISD::CVT_FU:  return "cvt_fu";
-    case ISD::CVT_SF:  return "cvt_sf";
-    case ISD::CVT_UF:  return "cvt_uf";
-    case ISD::CVT_SS:  return "cvt_ss";
-    case ISD::CVT_SU:  return "cvt_su";
-    case ISD::CVT_US:  return "cvt_us";
-    case ISD::CVT_UU:  return "cvt_uu";
-    }
-  }
-
-    // Control flow instructions
-  case ISD::BR:      return "br";
-  case ISD::BRIND:   return "brind";
-  case ISD::BR_JT:   return "br_jt";
-  case ISD::BRCOND:  return "brcond";
-  case ISD::BR_CC:   return "br_cc";
-  case ISD::CALLSEQ_START:  return "callseq_start";
-  case ISD::CALLSEQ_END:    return "callseq_end";
-
-    // Other operators
-  case ISD::LOAD:               return "load";
-  case ISD::STORE:              return "store";
-  case ISD::VAARG:              return "vaarg";
-  case ISD::VACOPY:             return "vacopy";
-  case ISD::VAEND:              return "vaend";
-  case ISD::VASTART:            return "vastart";
-  case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc";
-  case ISD::EXTRACT_ELEMENT:    return "extract_element";
-  case ISD::BUILD_PAIR:         return "build_pair";
-  case ISD::STACKSAVE:          return "stacksave";
-  case ISD::STACKRESTORE:       return "stackrestore";
-  case ISD::TRAP:               return "trap";
-
-  // Bit manipulation
-  case ISD::BSWAP:   return "bswap";
-  case ISD::CTPOP:   return "ctpop";
-  case ISD::CTTZ:    return "cttz";
-  case ISD::CTLZ:    return "ctlz";
-
-  // Trampolines
-  case ISD::INIT_TRAMPOLINE: return "init_trampoline";
-  case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline";
-
-  case ISD::CONDCODE:
-    switch (cast<CondCodeSDNode>(this)->get()) {
-    default: llvm_unreachable("Unknown setcc condition!");
-    case ISD::SETOEQ:  return "setoeq";
-    case ISD::SETOGT:  return "setogt";
-    case ISD::SETOGE:  return "setoge";
-    case ISD::SETOLT:  return "setolt";
-    case ISD::SETOLE:  return "setole";
-    case ISD::SETONE:  return "setone";
-
-    case ISD::SETO:    return "seto";
-    case ISD::SETUO:   return "setuo";
-    case ISD::SETUEQ:  return "setue";
-    case ISD::SETUGT:  return "setugt";
-    case ISD::SETUGE:  return "setuge";
-    case ISD::SETULT:  return "setult";
-    case ISD::SETULE:  return "setule";
-    case ISD::SETUNE:  return "setune";
-
-    case ISD::SETEQ:   return "seteq";
-    case ISD::SETGT:   return "setgt";
-    case ISD::SETGE:   return "setge";
-    case ISD::SETLT:   return "setlt";
-    case ISD::SETLE:   return "setle";
-    case ISD::SETNE:   return "setne";
-    }
-  }
-}
-
-const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) {
-  switch (AM) {
-  default:
-    return "";
-  case ISD::PRE_INC:
-    return "<pre-inc>";
-  case ISD::PRE_DEC:
-    return "<pre-dec>";
-  case ISD::POST_INC:
-    return "<post-inc>";
-  case ISD::POST_DEC:
-    return "<post-dec>";
-  }
-}
-
-std::string ISD::ArgFlagsTy::getArgFlagsString() {
-  std::string S = "< ";
-
-  if (isZExt())
-    S += "zext ";
-  if (isSExt())
-    S += "sext ";
-  if (isInReg())
-    S += "inreg ";
-  if (isSRet())
-    S += "sret ";
-  if (isByVal())
-    S += "byval ";
-  if (isNest())
-    S += "nest ";
-  if (getByValAlign())
-    S += "byval-align:" + utostr(getByValAlign()) + " ";
-  if (getOrigAlign())
-    S += "orig-align:" + utostr(getOrigAlign()) + " ";
-  if (getByValSize())
-    S += "byval-size:" + utostr(getByValSize()) + " ";
-  return S + ">";
-}
-
-void SDNode::dump() const { dump(0); }
-void SDNode::dump(const SelectionDAG *G) const {
-  print(dbgs(), G);
-  dbgs() << '\n';
-}
-
-void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
-  OS << (void*)this << ": ";
-
-  for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
-    if (i) OS << ",";
-    if (getValueType(i) == MVT::Other)
-      OS << "ch";
-    else
-      OS << getValueType(i).getEVTString();
-  }
-  OS << " = " << getOperationName(G);
-}
-
-void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
-  if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) {
-    if (!MN->memoperands_empty()) {
-      OS << "<";
-      OS << "Mem:";
-      for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(),
-           e = MN->memoperands_end(); i != e; ++i) {
-        OS << **i;
-        if (llvm::next(i) != e)
-          OS << " ";
-      }
-      OS << ">";
-    }
-  } else if (const ShuffleVectorSDNode *SVN =
-               dyn_cast<ShuffleVectorSDNode>(this)) {
-    OS << "<";
-    for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) {
-      int Idx = SVN->getMaskElt(i);
-      if (i) OS << ",";
-      if (Idx < 0)
-        OS << "u";
-      else
-        OS << Idx;
-    }
-    OS << ">";
-  } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
-    OS << '<' << CSDN->getAPIntValue() << '>';
-  } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
-    if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle)
-      OS << '<' << CSDN->getValueAPF().convertToFloat() << '>';
-    else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble)
-      OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';
-    else {
-      OS << "<APFloat(";
-      CSDN->getValueAPF().bitcastToAPInt().dump();
-      OS << ")>";
-    }
-  } else if (const GlobalAddressSDNode *GADN =
-             dyn_cast<GlobalAddressSDNode>(this)) {
-    int64_t offset = GADN->getOffset();
-    OS << '<';
-    WriteAsOperand(OS, GADN->getGlobal());
-    OS << '>';
-    if (offset > 0)
-      OS << " + " << offset;
-    else
-      OS << " " << offset;
-    if (unsigned int TF = GADN->getTargetFlags())
-      OS << " [TF=" << TF << ']';
-  } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) {
-    OS << "<" << FIDN->getIndex() << ">";
-  } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) {
-    OS << "<" << JTDN->getIndex() << ">";
-    if (unsigned int TF = JTDN->getTargetFlags())
-      OS << " [TF=" << TF << ']';
-  } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){
-    int offset = CP->getOffset();
-    if (CP->isMachineConstantPoolEntry())
-      OS << "<" << *CP->getMachineCPVal() << ">";
-    else
-      OS << "<" << *CP->getConstVal() << ">";
-    if (offset > 0)
-      OS << " + " << offset;
-    else
-      OS << " " << offset;
-    if (unsigned int TF = CP->getTargetFlags())
-      OS << " [TF=" << TF << ']';
-  } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
-    OS << "<";
-    const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
-    if (LBB)
-      OS << LBB->getName() << " ";
-    OS << (const void*)BBDN->getBasicBlock() << ">";
-  } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
-    OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0);
-  } else if (const ExternalSymbolSDNode *ES =
-             dyn_cast<ExternalSymbolSDNode>(this)) {
-    OS << "'" << ES->getSymbol() << "'";
-    if (unsigned int TF = ES->getTargetFlags())
-      OS << " [TF=" << TF << ']';
-  } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) {
-    if (M->getValue())
-      OS << "<" << M->getValue() << ">";
-    else
-      OS << "<null>";
-  } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) {
-    if (MD->getMD())
-      OS << "<" << MD->getMD() << ">";
-    else
-      OS << "<null>";
-  } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
-    OS << ":" << N->getVT().getEVTString();
-  }
-  else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
-    OS << "<" << *LD->getMemOperand();
-
-    bool doExt = true;
-    switch (LD->getExtensionType()) {
-    default: doExt = false; break;
-    case ISD::EXTLOAD: OS << ", anyext"; break;
-    case ISD::SEXTLOAD: OS << ", sext"; break;
-    case ISD::ZEXTLOAD: OS << ", zext"; break;
-    }
-    if (doExt)
-      OS << " from " << LD->getMemoryVT().getEVTString();
-
-    const char *AM = getIndexedModeName(LD->getAddressingMode());
-    if (*AM)
-      OS << ", " << AM;
-
-    OS << ">";
-  } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) {
-    OS << "<" << *ST->getMemOperand();
-
-    if (ST->isTruncatingStore())
-      OS << ", trunc to " << ST->getMemoryVT().getEVTString();
-
-    const char *AM = getIndexedModeName(ST->getAddressingMode());
-    if (*AM)
-      OS << ", " << AM;
-
-    OS << ">";
-  } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
-    OS << "<" << *M->getMemOperand() << ">";
-  } else if (const BlockAddressSDNode *BA =
-               dyn_cast<BlockAddressSDNode>(this)) {
-    OS << "<";
-    WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false);
-    OS << ", ";
-    WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false);
-    OS << ">";
-    if (unsigned int TF = BA->getTargetFlags())
-      OS << " [TF=" << TF << ']';
-  }
-
-  if (G)
-    if (unsigned Order = G->GetOrdering(this))
-      OS << " [ORD=" << Order << ']';
-
-  if (getNodeId() != -1)
-    OS << " [ID=" << getNodeId() << ']';
-
-  DebugLoc dl = getDebugLoc();
-  if (G && !dl.isUnknown()) {
-    DIScope
-      Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext()));
-    OS << " dbg:";
-    // Omit the directory, since it's usually long and uninteresting.
-    if (Scope.Verify())
-      OS << Scope.getFilename();
-    else
-      OS << "<unknown>";
-    OS << ':' << dl.getLine();
-    if (dl.getCol() != 0)
-      OS << ':' << dl.getCol();
-  }
-}
-
-void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
-  print_types(OS, G);
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
-    if (i) OS << ", "; else OS << " ";
-    OS << (void*)getOperand(i).getNode();
-    if (unsigned RN = getOperand(i).getResNo())
-      OS << ":" << RN;
-  }
-  print_details(OS, G);
-}
-
-static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
-                                  const SelectionDAG *G, unsigned depth,
-                                  unsigned indent) {
-  if (depth == 0)
-    return;
-
-  OS.indent(indent);
-
-  N->print(OS, G);
-
-  if (depth < 1)
-    return;
-
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
-    // Don't follow chain operands.
-    if (N->getOperand(i).getValueType() == MVT::Other)
-      continue;
-    OS << '\n';
-    printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2);
-  }
-}
-
-void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G,
-                            unsigned depth) const {
-  printrWithDepthHelper(OS, this, G, depth, 0);
-}
-
-void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {
-  // Don't print impossibly deep things.
-  printrWithDepth(OS, G, 10);
-}
-
-void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
-  printrWithDepth(dbgs(), G, depth);
-}
-
-void SDNode::dumprFull(const SelectionDAG *G) const {
-  // Don't print impossibly deep things.
-  dumprWithDepth(G, 10);
-}
-
-static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
-    if (N->getOperand(i).getNode()->hasOneUse())
-      DumpNodes(N->getOperand(i).getNode(), indent+2, G);
-    else
-      dbgs() << "\n" << std::string(indent+2, ' ')
-           << (void*)N->getOperand(i).getNode() << ": <multiple use>";
-
-
-  dbgs() << "\n";
-  dbgs().indent(indent);
-  N->dump(G);
-}
-
 SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
   assert(N->getNumValues() == 1 &&
          "Can't unroll a vector with multiple results!");
@@ -6527,20 +6003,14 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
   const GlobalValue *GV;
   int64_t GVOffset = 0;
   if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
-    // If GV has specified alignment, then use it. Otherwise, use the preferred
-    // alignment.
-    unsigned Align = GV->getAlignment();
-    if (!Align) {
-      if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
-        if (GVar->hasInitializer()) {
-          const TargetData *TD = TLI.getTargetData();
-          Align = TD->getPreferredAlignment(GVar);
-        }
-      }
-      if (!Align)
-        Align = TLI.getTargetData()->getABITypeAlignment(GV->getType());
-    }
-    return MinAlign(Align, GVOffset);
+    unsigned PtrWidth = TLI.getPointerTy().getSizeInBits();
+    APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0);
+    llvm::ComputeMaskedBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne,
+                            TLI.getTargetData());
+    unsigned AlignBits = KnownZero.countTrailingOnes();
+    unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
+    if (Align)
+      return MinAlign(Align, GVOffset);
   }
 
   // If this is a direct reference to a stack slot, use information about the
@@ -6566,74 +6036,6 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
   return 0;
 }
 
-void SelectionDAG::dump() const {
-  dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:";
-
-  for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
-       I != E; ++I) {
-    const SDNode *N = I;
-    if (!N->hasOneUse() && N != getRoot().getNode())
-      DumpNodes(N, 2, this);
-  }
-
-  if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
-
-  dbgs() << "\n\n";
-}
-
-void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
-  print_types(OS, G);
-  print_details(OS, G);
-}
-
-typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet;
-static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
-                       const SelectionDAG *G, VisitedSDNodeSet &once) {
-  if (!once.insert(N))          // If we've been here before, return now.
-    return;
-
-  // Dump the current SDNode, but don't end the line yet.
-  OS << std::string(indent, ' ');
-  N->printr(OS, G);
-
-  // Having printed this SDNode, walk the children:
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
-    const SDNode *child = N->getOperand(i).getNode();
-
-    if (i) OS << ",";
-    OS << " ";
-
-    if (child->getNumOperands() == 0) {
-      // This child has no grandchildren; print it inline right here.
-      child->printr(OS, G);
-      once.insert(child);
-    } else {         // Just the address. FIXME: also print the child's opcode.
-      OS << (void*)child;
-      if (unsigned RN = N->getOperand(i).getResNo())
-        OS << ":" << RN;
-    }
-  }
-
-  OS << "\n";
-
-  // Dump children that have grandchildren on their own line(s).
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
-    const SDNode *child = N->getOperand(i).getNode();
-    DumpNodesr(OS, child, indent+2, G, once);
-  }
-}
-
-void SDNode::dumpr() const {
-  VisitedSDNodeSet once;
-  DumpNodesr(dbgs(), this, 0, 0, once);
-}
-
-void SDNode::dumpr(const SelectionDAG *G) const {
-  VisitedSDNodeSet once;
-  DumpNodesr(dbgs(), this, 0, G, once);
-}
-
-
 // getAddressSpace - Return the address space this GlobalAddress belongs to.
 unsigned GlobalAddressSDNode::getAddressSpace() const {
   return getGlobal()->getType()->getAddressSpace();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 095b4001696f..94cb95804f69 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -41,13 +41,13 @@
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/CommandLine.h"
@@ -197,7 +197,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
     // FP_ROUND's are always exact here.
     if (ValueVT.bitsLT(Val.getValueType()))
       return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val,
-                         DAG.getIntPtrConstant(1));
+                         DAG.getTargetConstant(1, TLI.getPointerTy()));
 
     return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
   }
@@ -206,7 +206,6 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
     return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
 
   llvm_unreachable("Unknown mismatch!");
-  return SDValue();
 }
 
 /// getCopyFromParts - Create a value that contains the specified legal parts
@@ -353,10 +352,13 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
       assert(NumParts == 1 && "Do not know what to promote to!");
       Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
     } else {
-      assert(PartVT.isInteger() && ValueVT.isInteger() &&
+      assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
+             ValueVT.isInteger() &&
              "Unknown mismatch!");
       ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
       Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
+      if (PartVT == MVT::x86mmx)
+        Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
     }
   } else if (PartBits == ValueVT.getSizeInBits()) {
     // Different types of the same size.
@@ -364,10 +366,13 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
     Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
   } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
     // If the parts cover less bits than value has, truncate the value.
-    assert(PartVT.isInteger() && ValueVT.isInteger() &&
+    assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
+           ValueVT.isInteger() &&
            "Unknown mismatch!");
     ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
     Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+    if (PartVT == MVT::x86mmx)
+      Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
   }
 
   // The value may have changed - recompute ValueVT.
@@ -813,9 +818,11 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
   }
 }
 
-void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) {
+void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa,
+                               const TargetLibraryInfo *li) {
   AA = &aa;
   GFI = gfi;
+  LibInfo = li;
   TD = DAG.getTarget().getTargetData();
   LPadToCallSiteMap.clear();
 }
@@ -964,7 +971,7 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
         DAG.AddDbgValue(SDV, Val.getNode(), false);
       }
     } else
-      DEBUG(dbgs() << "Dropping debug info for " << DI);
+      DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
     DanglingDebugInfoMap[V] = DanglingDebugInfo();
   }
 }
@@ -1054,6 +1061,23 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
       return DAG.getMergeValues(&Constants[0], Constants.size(),
                                 getCurDebugLoc());
     }
+    
+    if (const ConstantDataSequential *CDS =
+          dyn_cast<ConstantDataSequential>(C)) {
+      SmallVector<SDValue, 4> Ops;
+      for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+        SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode();
+        // Add each leaf value from the operand to the Constants list
+        // to form a flattened list of all the values.
+        for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
+          Ops.push_back(SDValue(Val, i));
+      }
+
+      if (isa<ArrayType>(CDS->getType()))
+        return DAG.getMergeValues(&Ops[0], Ops.size(), getCurDebugLoc());
+      return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
+                                      VT, &Ops[0], Ops.size());
+    }
 
     if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
       assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
@@ -1088,9 +1112,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
     // Now that we know the number and type of the elements, get that number of
     // elements into the Ops array based on what kind of constant it is.
     SmallVector<SDValue, 16> Ops;
-    if (const ConstantVector *CP = dyn_cast<ConstantVector>(C)) {
+    if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
       for (unsigned i = 0; i != NumElements; ++i)
-        Ops.push_back(getValue(CP->getOperand(i)));
+        Ops.push_back(getValue(CV->getOperand(i)));
     } else {
       assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
       EVT EltVT = TLI.getValueType(VecTy->getElementType());
@@ -1126,7 +1150,6 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
   }
 
   llvm_unreachable("Can't get register for value!");
-  return SDValue();
 }
 
 void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
@@ -1285,8 +1308,8 @@ bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
 }
 
 /// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
-uint32_t SelectionDAGBuilder::getEdgeWeight(MachineBasicBlock *Src,
-                                            MachineBasicBlock *Dst) {
+uint32_t SelectionDAGBuilder::getEdgeWeight(const MachineBasicBlock *Src,
+                                            const MachineBasicBlock *Dst) const {
   BranchProbabilityInfo *BPI = FuncInfo.BPI;
   if (!BPI)
     return 0;
@@ -1336,6 +1359,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
         Condition = getICmpCondCode(IC->getPredicate());
       } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
         Condition = getFCmpCondCode(FC->getPredicate());
+        if (TM.Options.NoNaNsFPMath)
+          Condition = getFCmpCodeWithoutNaN(Condition);
       } else {
         Condition = ISD::SETEQ; // silence warning.
         llvm_unreachable("Unknown compare instruction");
@@ -1811,8 +1836,8 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
   CopyToExportRegsIfNeeded(&I);
 
   // Update successor info
-  InvokeMBB->addSuccessor(Return);
-  InvokeMBB->addSuccessor(LandingPad);
+  addSuccessorWithWeight(InvokeMBB, Return);
+  addSuccessorWithWeight(InvokeMBB, LandingPad);
 
   // Drop into normal successor.
   DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
@@ -1820,9 +1845,6 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
                           DAG.getBasicBlock(Return)));
 }
 
-void SelectionDAGBuilder::visitUnwind(const UnwindInst &I) {
-}
-
 void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
   llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
 }
@@ -1835,6 +1857,12 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
   MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
   AddLandingPadInfo(LP, MMI, MBB);
 
+  // If there aren't registers to copy the values into (e.g., during SjLj
+  // exceptions), then don't bother to create these DAG nodes.
+  if (TLI.getExceptionPointerRegister() == 0 &&
+      TLI.getExceptionSelectorRegister() == 0)
+    return;
+
   SmallVector<EVT, 2> ValueVTs;
   ComputeValueVTs(TLI, LP.getType(), ValueVTs);
 
@@ -2003,7 +2031,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
 }
 
 static inline bool areJTsAllowed(const TargetLowering &TLI) {
-  return !DisableJumpTables &&
+  return !TLI.getTargetMachine().Options.DisableJumpTables &&
           (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
            TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
 }
@@ -2190,7 +2218,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
 
   CaseRange LHSR(CR.Range.first, Pivot);
   CaseRange RHSR(Pivot, CR.Range.second);
-  Constant *C = Pivot->Low;
+  const Constant *C = Pivot->Low;
   MachineBasicBlock *FalseBB = 0, *TrueBB = 0;
 
   // We know that we branch to the LHS if the Value being switched on is
@@ -2383,14 +2411,14 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
 
   BranchProbabilityInfo *BPI = FuncInfo.BPI;
   // Start with "simple" cases
-  for (size_t i = 1; i < SI.getNumSuccessors(); ++i) {
-    BasicBlock *SuccBB = SI.getSuccessor(i);
+  for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end();
+       i != e; ++i) {
+    const BasicBlock *SuccBB = i.getCaseSuccessor();
     MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB];
 
     uint32_t ExtraWeight = BPI ? BPI->getEdgeWeight(SI.getParent(), SuccBB) : 0;
 
-    Cases.push_back(Case(SI.getSuccessorValue(i),
-                         SI.getSuccessorValue(i),
+    Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(),
                          SMBB, ExtraWeight));
   }
   std::sort(Cases.begin(), Cases.end(), CaseCmp());
@@ -2457,7 +2485,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
 
   // If there is only the default destination, branch to it if it is not the
   // next basic block.  Otherwise, just fall through.
-  if (SI.getNumCases() == 1) {
+  if (!SI.getNumCases()) {
     // Update machine-CFG edges.
 
     // If this is not a fall-through branch, emit the branch.
@@ -2626,6 +2654,8 @@ void SelectionDAGBuilder::visitFCmp(const User &I) {
   SDValue Op1 = getValue(I.getOperand(0));
   SDValue Op2 = getValue(I.getOperand(1));
   ISD::CondCode Condition = getFCmpCondCode(predicate);
+  if (TM.Options.NoNaNsFPMath)
+    Condition = getFCmpCodeWithoutNaN(Condition);
   EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition));
 }
@@ -2685,11 +2715,12 @@ void SelectionDAGBuilder::visitFPTrunc(const User &I) {
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(),
-                           DestVT, N, DAG.getIntPtrConstant(0)));
+                           DestVT, N,
+                           DAG.getTargetConstant(0, TLI.getPointerTy())));
 }
 
 void SelectionDAGBuilder::visitFPExt(const User &I){
-  // FPTrunc is never a no-op cast, no need to check
+  // FPExt is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N));
@@ -2772,33 +2803,25 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) {
                            TLI.getValueType(I.getType()), InVec, InIdx));
 }
 
-// Utility for visitShuffleVector - Returns true if the mask is mask starting
-// from SIndx and increasing to the element length (undefs are allowed).
-static bool SequentialMask(SmallVectorImpl<int> &Mask, unsigned SIndx) {
-  unsigned MaskNumElts = Mask.size();
-  for (unsigned i = 0; i != MaskNumElts; ++i)
-    if ((Mask[i] >= 0) && (Mask[i] != (int)(i + SIndx)))
+// Utility for visitShuffleVector - Return true if every element in Mask,
+// begining from position Pos and ending in Pos+Size, falls within the
+// specified sequential range [L, L+Pos). or is undef.
+static bool isSequentialInRange(const SmallVectorImpl<int> &Mask,
+                                unsigned Pos, unsigned Size, int Low) {
+  for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low)
+    if (Mask[i] >= 0 && Mask[i] != Low)
       return false;
   return true;
 }
 
 void SelectionDAGBuilder::visitShuffleVector(const User &I) {
-  SmallVector<int, 8> Mask;
   SDValue Src1 = getValue(I.getOperand(0));
   SDValue Src2 = getValue(I.getOperand(1));
 
-  // Convert the ConstantVector mask operand into an array of ints, with -1
-  // representing undef values.
-  SmallVector<Constant*, 8> MaskElts;
-  cast<Constant>(I.getOperand(2))->getVectorElements(MaskElts);
-  unsigned MaskNumElts = MaskElts.size();
-  for (unsigned i = 0; i != MaskNumElts; ++i) {
-    if (isa<UndefValue>(MaskElts[i]))
-      Mask.push_back(-1);
-    else
-      Mask.push_back(cast<ConstantInt>(MaskElts[i])->getSExtValue());
-  }
-
+  SmallVector<int, 8> Mask;
+  ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask);
+  unsigned MaskNumElts = Mask.size();
+  
   EVT VT = TLI.getValueType(I.getType());
   EVT SrcVT = Src1.getValueType();
   unsigned SrcNumElts = SrcVT.getVectorNumElements();
@@ -2814,11 +2837,23 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
     // Mask is longer than the source vectors and is a multiple of the source
     // vectors.  We can use concatenate vector to make the mask and vectors
     // lengths match.
-    if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) {
-      // The shuffle is concatenating two vectors together.
-      setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
-                               VT, Src1, Src2));
-      return;
+    if (SrcNumElts*2 == MaskNumElts) {
+      // First check for Src1 in low and Src2 in high
+      if (isSequentialInRange(Mask, 0, SrcNumElts, 0) &&
+          isSequentialInRange(Mask, SrcNumElts, SrcNumElts, SrcNumElts)) {
+        // The shuffle is concatenating two vectors together.
+        setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
+                                 VT, Src1, Src2));
+        return;
+      }
+      // Then check for Src2 in low and Src1 in high
+      if (isSequentialInRange(Mask, 0, SrcNumElts, SrcNumElts) &&
+          isSequentialInRange(Mask, SrcNumElts, SrcNumElts, 0)) {
+        // The shuffle is concatenating two vectors together.
+        setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
+                                 VT, Src2, Src1));
+        return;
+      }
     }
 
     // Pad both vectors with undefs to make them the same length as the mask.
@@ -2843,10 +2878,9 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
     SmallVector<int, 8> MappedOps;
     for (unsigned i = 0; i != MaskNumElts; ++i) {
       int Idx = Mask[i];
-      if (Idx < (int)SrcNumElts)
-        MappedOps.push_back(Idx);
-      else
-        MappedOps.push_back(Idx + MaskNumElts - SrcNumElts);
+      if (Idx >= (int)SrcNumElts)
+        Idx -= SrcNumElts - MaskNumElts;
+      MappedOps.push_back(Idx);
     }
 
     setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
@@ -2858,13 +2892,13 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
     // Analyze the access pattern of the vector to see if we can extract
     // two subvectors and do the shuffle. The analysis is done by calculating
     // the range of elements the mask access on both vectors.
-    int MinRange[2] = { static_cast<int>(SrcNumElts+1),
-                        static_cast<int>(SrcNumElts+1)};
+    int MinRange[2] = { static_cast<int>(SrcNumElts),
+                        static_cast<int>(SrcNumElts)};
     int MaxRange[2] = {-1, -1};
 
     for (unsigned i = 0; i != MaskNumElts; ++i) {
       int Idx = Mask[i];
-      int Input = 0;
+      unsigned Input = 0;
       if (Idx < 0)
         continue;
 
@@ -2880,35 +2914,31 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
 
     // Check if the access is smaller than the vector size and can we find
     // a reasonable extract index.
-    int RangeUse[2] = { 2, 2 };  // 0 = Unused, 1 = Extract, 2 = Can not
-                                 // Extract.
+    int RangeUse[2] = { -1, -1 };  // 0 = Unused, 1 = Extract, -1 = Can not
+                                   // Extract.
     int StartIdx[2];  // StartIdx to extract from
-    for (int Input=0; Input < 2; ++Input) {
-      if (MinRange[Input] == (int)(SrcNumElts+1) && MaxRange[Input] == -1) {
+    for (unsigned Input = 0; Input < 2; ++Input) {
+      if (MinRange[Input] >= (int)SrcNumElts && MaxRange[Input] < 0) {
         RangeUse[Input] = 0; // Unused
         StartIdx[Input] = 0;
-      } else if (MaxRange[Input] - MinRange[Input] < (int)MaskNumElts) {
-        // Fits within range but we should see if we can find a good
-        // start index that is a multiple of the mask length.
-        if (MaxRange[Input] < (int)MaskNumElts) {
-          RangeUse[Input] = 1; // Extract from beginning of the vector
-          StartIdx[Input] = 0;
-        } else {
-          StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
-          if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
-              StartIdx[Input] + MaskNumElts <= SrcNumElts)
-            RangeUse[Input] = 1; // Extract from a multiple of the mask length.
-        }
+        continue;
       }
+
+      // Find a good start index that is a multiple of the mask length. Then
+      // see if the rest of the elements are in range.
+      StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
+      if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
+          StartIdx[Input] + MaskNumElts <= SrcNumElts)
+        RangeUse[Input] = 1; // Extract from a multiple of the mask length.
     }
 
     if (RangeUse[0] == 0 && RangeUse[1] == 0) {
       setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
       return;
     }
-    else if (RangeUse[0] < 2 && RangeUse[1] < 2) {
+    if (RangeUse[0] >= 0 && RangeUse[1] >= 0) {
       // Extract appropriate subvector and generate a vector shuffle
-      for (int Input=0; Input < 2; ++Input) {
+      for (unsigned Input = 0; Input < 2; ++Input) {
         SDValue &Src = Input == 0 ? Src1 : Src2;
         if (RangeUse[Input] == 0)
           Src = DAG.getUNDEF(VT);
@@ -2921,12 +2951,13 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
       SmallVector<int, 8> MappedOps;
       for (unsigned i = 0; i != MaskNumElts; ++i) {
         int Idx = Mask[i];
-        if (Idx < 0)
-          MappedOps.push_back(Idx);
-        else if (Idx < (int)SrcNumElts)
-          MappedOps.push_back(Idx - StartIdx[0]);
-        else
-          MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts);
+        if (Idx >= 0) {
+          if (Idx < (int)SrcNumElts)
+            Idx -= StartIdx[0];
+          else
+            Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
+        }
+        MappedOps.push_back(Idx);
       }
 
       setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
@@ -2942,22 +2973,20 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
   EVT PtrVT = TLI.getPointerTy();
   SmallVector<SDValue,8> Ops;
   for (unsigned i = 0; i != MaskNumElts; ++i) {
-    if (Mask[i] < 0) {
-      Ops.push_back(DAG.getUNDEF(EltVT));
-    } else {
-      int Idx = Mask[i];
-      SDValue Res;
+    int Idx = Mask[i];
+    SDValue Res;
 
-      if (Idx < (int)SrcNumElts)
-        Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
-                          EltVT, Src1, DAG.getConstant(Idx, PtrVT));
-      else
-        Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
-                          EltVT, Src2,
-                          DAG.getConstant(Idx - SrcNumElts, PtrVT));
+    if (Idx < 0) {
+      Res = DAG.getUNDEF(EltVT);
+    } else {
+      SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2;
+      if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;
 
-      Ops.push_back(Res);
+      Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
+                        EltVT, Src, DAG.getConstant(Idx, PtrVT));
     }
+
+    Ops.push_back(Res);
   }
 
   setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
@@ -3042,7 +3071,9 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
 
 void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
   SDValue N = getValue(I.getOperand(0));
-  Type *Ty = I.getOperand(0)->getType();
+  // Note that the pointer operand may be a vector of pointers. Take the scalar
+  // element which holds a pointer.
+  Type *Ty = I.getOperand(0)->getType()->getScalarType();
 
   for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end();
        OI != E; ++OI) {
@@ -3096,7 +3127,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
           unsigned Amt = ElementSize.logBase2();
           IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(),
                              N.getValueType(), IdxN,
-                             DAG.getConstant(Amt, TLI.getPointerTy()));
+                             DAG.getConstant(Amt, IdxN.getValueType()));
         } else {
           SDValue Scale = DAG.getConstant(ElementSize, TLI.getPointerTy());
           IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(),
@@ -3175,8 +3206,10 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
 
   bool isVolatile = I.isVolatile();
   bool isNonTemporal = I.getMetadata("nontemporal") != 0;
+  bool isInvariant = I.getMetadata("invariant.load") != 0;
   unsigned Alignment = I.getAlignment();
   const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
+  const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
 
   SmallVector<EVT, 4> ValueVTs;
   SmallVector<uint64_t, 4> Offsets;
@@ -3224,7 +3257,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
                             DAG.getConstant(Offsets[i], PtrVT));
     SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
                             A, MachinePointerInfo(SV, Offsets[i]), isVolatile,
-                            isNonTemporal, Alignment, TBAAInfo);
+                            isNonTemporal, isInvariant, Alignment, TBAAInfo,
+                            Ranges);
 
     Values[i] = L;
     Chains[ChainI] = L.getValue(1);
@@ -3358,7 +3392,7 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
   DebugLoc dl = getCurDebugLoc();
   ISD::NodeType NT;
   switch (I.getOperation()) {
-  default: llvm_unreachable("Unknown atomicrmw operation"); return;
+  default: llvm_unreachable("Unknown atomicrmw operation");
   case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break;
   case AtomicRMWInst::Add:  NT = ISD::ATOMIC_LOAD_ADD; break;
   case AtomicRMWInst::Sub:  NT = ISD::ATOMIC_LOAD_SUB; break;
@@ -3496,24 +3530,16 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
   // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
   if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
       Info.opc == ISD::INTRINSIC_W_CHAIN)
-    Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy()));
+    Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy()));
 
   // Add all operands of the call to the operand list.
   for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
     SDValue Op = getValue(I.getArgOperand(i));
-    assert(TLI.isTypeLegal(Op.getValueType()) &&
-           "Intrinsic uses a non-legal type?");
     Ops.push_back(Op);
   }
 
   SmallVector<EVT, 4> ValueVTs;
   ComputeValueVTs(TLI, I.getType(), ValueVTs);
-#ifndef NDEBUG
-  for (unsigned Val = 0, E = ValueVTs.size(); Val != E; ++Val) {
-    assert(TLI.isTypeLegal(ValueVTs[Val]) &&
-           "Intrinsic uses a non-legal type?");
-  }
-#endif // NDEBUG
 
   if (HasChain)
     ValueVTs.push_back(MVT::Other);
@@ -3556,6 +3582,12 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
     }
 
     setValue(&I, Result);
+  } else {
+    // Assign order to result here. If the intrinsic does not produce a result,
+    // it won't be mapped to a SDNode and visit() will not assign it an order
+    // number.
+    ++SDNodeOrder;
+    AssignOrderingToNode(Result.getNode());
   }
 }
 
@@ -3597,17 +3629,6 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt) {
   return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32);
 }
 
-// implVisitAluOverflow - Lower arithmetic overflow instrinsics.
-const char *
-SelectionDAGBuilder::implVisitAluOverflow(const CallInst &I, ISD::NodeType Op) {
-  SDValue Op1 = getValue(I.getArgOperand(0));
-  SDValue Op2 = getValue(I.getArgOperand(1));
-
-  SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
-  setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2));
-  return 0;
-}
-
 /// visitExp - Lower an exp intrinsic. Handles the special sequences for
 /// limited-precision mode.
 void
@@ -4367,9 +4388,8 @@ static unsigned getTruncatedArgReg(const SDValue &N) {
     const SDValue &CFR = Ext.getOperand(0);
     if (CFR.getOpcode() == ISD::CopyFromReg)
       return cast<RegisterSDNode>(CFR.getOperand(1))->getReg();
-    else
-      if (CFR.getOpcode() == ISD::TRUNCATE)
-        return getTruncatedArgReg(CFR);
+    if (CFR.getOpcode() == ISD::TRUNCATE)
+      return getTruncatedArgReg(CFR);
   }
   return 0;
 }
@@ -4398,7 +4418,7 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
   // Some arguments' frame index is recorded during argument lowering.
   Offset = FuncInfo.getArgumentFrameIndex(Arg);
   if (Offset)
-      Reg = TRI->getFrameRegister(MF);
+    Reg = TRI->getFrameRegister(MF);
 
   if (!Reg && N.getNode()) {
     if (N.getOpcode() == ISD::CopyFromReg)
@@ -4473,9 +4493,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                              getValue(I.getArgOperand(0))));
     return 0;
   case Intrinsic::setjmp:
-    return "_setjmp"+!TLI.usesUnderscoreSetJmp();
+    return &"_setjmp"[!TLI.usesUnderscoreSetJmp()];
   case Intrinsic::longjmp:
-    return "_longjmp"+!TLI.usesUnderscoreLongJmp();
+    return &"_longjmp"[!TLI.usesUnderscoreLongJmp()];
   case Intrinsic::memcpy: {
     // Assert for address < 256 since we support only user defined address
     // spaces.
@@ -4531,8 +4551,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
     MDNode *Variable = DI.getVariable();
     const Value *Address = DI.getAddress();
-    if (!Address || !DIVariable(Variable).Verify())
+    if (!Address || !DIVariable(Variable).Verify()) {
+      DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
       return 0;
+    }
 
     // Build an entry in DbgOrdering.  Debug info input nodes get an SDNodeOrder
     // but do not always have a corresponding SDNode built.  The SDNodeOrder
@@ -4543,7 +4565,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     // Check if address has undef value.
     if (isa<UndefValue>(Address) ||
         (Address->use_empty() && !isa<Argument>(Address))) {
-      DEBUG(dbgs() << "Dropping debug info for " << DI);
+      DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
       return 0;
     }
 
@@ -4553,11 +4575,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
       N = UnusedArgNodeMap[Address];
     SDDbgValue *SDV;
     if (N.getNode()) {
-      // Parameters are handled specially.
-      bool isParameter =
-        DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable;
       if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
         Address = BCI->getOperand(0);
+      // Parameters are handled specially.
+      bool isParameter =
+        (DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable ||
+         isa<Argument>(Address));
+
       const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
 
       if (isParameter && !AI) {
@@ -4577,7 +4601,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                               0, dl, SDNodeOrder);
       else {
         // Can't do anything with other non-AI cases yet.
-        DEBUG(dbgs() << "Dropping debug info for " << DI);
+        DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+        DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t");
+        DEBUG(Address->dump());
         return 0;
       }
       DAG.AddDbgValue(SDV, N.getNode(), isParameter);
@@ -4599,7 +4625,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
             }
           }
         }
-        DEBUG(dbgs() << "Dropping debug info for " << DI);
+        DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
       }
     }
     return 0;
@@ -4645,7 +4671,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
       } else {
         // We may expand this to cover more cases.  One case where we have no
         // data available is an unreferenced parameter.
-        DEBUG(dbgs() << "Dropping debug info for " << DI);
+        DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
       }
     }
 
@@ -4654,8 +4680,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
       V = BCI->getOperand(0);
     const AllocaInst *AI = dyn_cast<AllocaInst>(V);
     // Don't handle byval struct arguments or VLAs, for example.
-    if (!AI)
+    if (!AI) {
+      DEBUG(dbgs() << "Dropping debug location info for:\n  " << DI << "\n");
+      DEBUG(dbgs() << "  Last seen at:\n    " << *V << "\n");
       return 0;
+    }
     DenseMap<const AllocaInst*, int>::iterator SI =
       FuncInfo.StaticAllocaMap.find(AI);
     if (SI == FuncInfo.StaticAllocaMap.end())
@@ -4667,43 +4696,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
       MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc());
     return 0;
   }
-  case Intrinsic::eh_exception: {
-    // Insert the EXCEPTIONADDR instruction.
-    assert(FuncInfo.MBB->isLandingPad() &&
-           "Call to eh.exception not in landing pad!");
-    SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
-    SDValue Ops[1];
-    Ops[0] = DAG.getRoot();
-    SDValue Op = DAG.getNode(ISD::EXCEPTIONADDR, dl, VTs, Ops, 1);
-    setValue(&I, Op);
-    DAG.setRoot(Op.getValue(1));
-    return 0;
-  }
-
-  case Intrinsic::eh_selector: {
-    MachineBasicBlock *CallMBB = FuncInfo.MBB;
-    MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
-    if (CallMBB->isLandingPad())
-      AddCatchInfo(I, &MMI, CallMBB);
-    else {
-#ifndef NDEBUG
-      FuncInfo.CatchInfoLost.insert(&I);
-#endif
-      // FIXME: Mark exception selector register as live in.  Hack for PR1508.
-      unsigned Reg = TLI.getExceptionSelectorRegister();
-      if (Reg) FuncInfo.MBB->addLiveIn(Reg);
-    }
-
-    // Insert the EHSELECTION instruction.
-    SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
-    SDValue Ops[2];
-    Ops[0] = getValue(I.getArgOperand(0));
-    Ops[1] = getRoot();
-    SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2);
-    DAG.setRoot(Op.getValue(1));
-    setValue(&I, DAG.getSExtOrTrunc(Op, dl, MVT::i32));
-    return 0;
-  }
 
   case Intrinsic::eh_typeid_for: {
     // Find the type id for the given typeinfo.
@@ -4775,11 +4767,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                             getRoot(), getValue(I.getArgOperand(0))));
     return 0;
   }
-  case Intrinsic::eh_sjlj_dispatch_setup: {
-    DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
-                            getRoot(), getValue(I.getArgOperand(0))));
-    return 0;
-  }
 
   case Intrinsic::x86_mmx_pslli_w:
   case Intrinsic::x86_mmx_pslli_d:
@@ -4841,6 +4828,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     setValue(&I, Res);
     return 0;
   }
+  case Intrinsic::x86_avx_vinsertf128_pd_256:
+  case Intrinsic::x86_avx_vinsertf128_ps_256:
+  case Intrinsic::x86_avx_vinsertf128_si_256:
+  case Intrinsic::x86_avx2_vinserti128: {
+    DebugLoc dl = getCurDebugLoc();
+    EVT DestVT = TLI.getValueType(I.getType());
+    EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType());
+    uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) *
+                   ElVT.getVectorNumElements();
+    Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, DestVT,
+                      getValue(I.getArgOperand(0)),
+                      getValue(I.getArgOperand(1)),
+                      DAG.getConstant(Idx, MVT::i32));
+    setValue(&I, Res);
+    return 0;
+  }
   case Intrinsic::convertff:
   case Intrinsic::convertfsi:
   case Intrinsic::convertfui:
@@ -4852,6 +4855,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::convertuu: {
     ISD::CvtCode Code = ISD::CVT_INVALID;
     switch (Intrinsic) {
+    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
     case Intrinsic::convertff:  Code = ISD::CVT_FF; break;
     case Intrinsic::convertfsi: Code = ISD::CVT_FS; break;
     case Intrinsic::convertfui: Code = ISD::CVT_FU; break;
@@ -4946,14 +4950,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     return 0;
   case Intrinsic::cttz: {
     SDValue Arg = getValue(I.getArgOperand(0));
+    ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
     EVT Ty = Arg.getValueType();
-    setValue(&I, DAG.getNode(ISD::CTTZ, dl, Ty, Arg));
+    setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF,
+                             dl, Ty, Arg));
     return 0;
   }
   case Intrinsic::ctlz: {
     SDValue Arg = getValue(I.getArgOperand(0));
+    ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
     EVT Ty = Arg.getValueType();
-    setValue(&I, DAG.getNode(ISD::CTLZ, dl, Ty, Arg));
+    setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF,
+                             dl, Ty, Arg));
     return 0;
   }
   case Intrinsic::ctpop: {
@@ -5052,7 +5060,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::gcread:
   case Intrinsic::gcwrite:
     llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
-    return 0;
   case Intrinsic::flt_rounds:
     setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32));
     return 0;
@@ -5064,7 +5071,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   }
 
   case Intrinsic::trap: {
-    StringRef TrapFuncName = getTrapFunctionName();
+    StringRef TrapFuncName = TM.Options.getTrapFunctionName();
     if (TrapFuncName.empty()) {
       DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
       return 0;
@@ -5073,25 +5080,36 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     std::pair<SDValue, SDValue> Result =
       TLI.LowerCallTo(getRoot(), I.getType(),
                  false, false, false, false, 0, CallingConv::C,
-                 /*isTailCall=*/false, /*isReturnValueUsed=*/true,
+                 /*isTailCall=*/false,
+                 /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
                  DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()),
                  Args, DAG, getCurDebugLoc());
     DAG.setRoot(Result.second);
     return 0;
   }
   case Intrinsic::uadd_with_overflow:
-    return implVisitAluOverflow(I, ISD::UADDO);
   case Intrinsic::sadd_with_overflow:
-    return implVisitAluOverflow(I, ISD::SADDO);
   case Intrinsic::usub_with_overflow:
-    return implVisitAluOverflow(I, ISD::USUBO);
   case Intrinsic::ssub_with_overflow:
-    return implVisitAluOverflow(I, ISD::SSUBO);
   case Intrinsic::umul_with_overflow:
-    return implVisitAluOverflow(I, ISD::UMULO);
-  case Intrinsic::smul_with_overflow:
-    return implVisitAluOverflow(I, ISD::SMULO);
+  case Intrinsic::smul_with_overflow: {
+    ISD::NodeType Op;
+    switch (Intrinsic) {
+    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
+    case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break;
+    case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break;
+    case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break;
+    case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break;
+    case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break;
+    case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break;
+    }
+    SDValue Op1 = getValue(I.getArgOperand(0));
+    SDValue Op2 = getValue(I.getArgOperand(1));
 
+    SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
+    setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2));
+    return 0;
+  }
   case Intrinsic::prefetch: {
     SDValue Ops[5];
     unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
@@ -5226,7 +5244,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
 
   // If there's a possibility that fast-isel has already selected some amount
   // of the current basic block, don't emit a tail call.
-  if (isTailCall && EnableFastISel)
+  if (isTailCall && TM.Options.EnableFastISel)
     isTailCall = false;
 
   std::pair<SDValue,SDValue> Result =
@@ -5236,6 +5254,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
                     CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(),
                     CS.getCallingConv(),
                     isTailCall,
+                    CS.doesNotReturn(),
                     !CS.getInstruction()->use_empty(),
                     Callee, Args, DAG, getCurDebugLoc());
   assert((isTailCall || Result.second.getNode()) &&
@@ -5264,7 +5283,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
       SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second,
                               Add,
                   MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]),
-                              false, false, 1);
+                              false, false, false, 1);
       Values[i] = L;
       Chains[i] = L.getValue(1);
     }
@@ -5375,7 +5394,8 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
   SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root,
                                         Ptr, MachinePointerInfo(PtrVal),
                                         false /*volatile*/,
-                                        false /*nontemporal*/, 1 /* align=1 */);
+                                        false /*nontemporal*/, 
+                                        false /*isinvariant*/, 1 /* align=1 */);
 
   if (!ConstantMemory)
     Builder.PendingLoads.push_back(LoadVal.getValue(1));
@@ -5470,23 +5490,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
     return;
   }
 
-  // See if any floating point values are being passed to this function. This is
-  // used to emit an undefined reference to fltused on Windows.
-  FunctionType *FT =
-    cast<FunctionType>(I.getCalledValue()->getType()->getContainedType(0));
   MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
-  if (FT->isVarArg() &&
-      !MMI.callsExternalVAFunctionWithFloatingPointArguments()) {
-    for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
-      Type* T = I.getArgOperand(i)->getType();
-      for (po_iterator<Type*> i = po_begin(T), e = po_end(T);
-           i != e; ++i) {
-        if (!i->isFloatingPointTy()) continue;
-        MMI.setCallsExternalVAFunctionWithFloatingPointArguments(true);
-        break;
-      }
-    }
-  }
+  ComputeUsesVAFloatArgument(I, &MMI);
 
   const char *RenameFn = 0;
   if (Function *F = I.getCalledFunction()) {
@@ -5509,7 +5514,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
     // can't be a library call.
     if (!F->hasLocalLinkage() && F->hasName()) {
       StringRef Name = F->getName();
-      if (Name == "copysign" || Name == "copysignf" || Name == "copysignl") {
+      if ((LibInfo->has(LibFunc::copysign) && Name == "copysign") ||
+          (LibInfo->has(LibFunc::copysignf) && Name == "copysignf") ||
+          (LibInfo->has(LibFunc::copysignl) && Name == "copysignl")) {
         if (I.getNumArgOperands() == 2 &&   // Basic sanity checks.
             I.getArgOperand(0)->getType()->isFloatingPointTy() &&
             I.getType() == I.getArgOperand(0)->getType() &&
@@ -5520,7 +5527,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
                                    LHS.getValueType(), LHS, RHS));
           return;
         }
-      } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") {
+      } else if ((LibInfo->has(LibFunc::fabs) && Name == "fabs") ||
+                 (LibInfo->has(LibFunc::fabsf) && Name == "fabsf") ||
+                 (LibInfo->has(LibFunc::fabsl) && Name == "fabsl")) {
         if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
             I.getArgOperand(0)->getType()->isFloatingPointTy() &&
             I.getType() == I.getArgOperand(0)->getType()) {
@@ -5529,7 +5538,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
                                    Tmp.getValueType(), Tmp));
           return;
         }
-      } else if (Name == "sin" || Name == "sinf" || Name == "sinl") {
+      } else if ((LibInfo->has(LibFunc::sin) && Name == "sin") ||
+                 (LibInfo->has(LibFunc::sinf) && Name == "sinf") ||
+                 (LibInfo->has(LibFunc::sinl) && Name == "sinl")) {
         if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
             I.getArgOperand(0)->getType()->isFloatingPointTy() &&
             I.getType() == I.getArgOperand(0)->getType() &&
@@ -5539,7 +5550,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
                                    Tmp.getValueType(), Tmp));
           return;
         }
-      } else if (Name == "cos" || Name == "cosf" || Name == "cosl") {
+      } else if ((LibInfo->has(LibFunc::cos) && Name == "cos") ||
+                 (LibInfo->has(LibFunc::cosf) && Name == "cosf") ||
+                 (LibInfo->has(LibFunc::cosl) && Name == "cosl")) {
         if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
             I.getArgOperand(0)->getType()->isFloatingPointTy() &&
             I.getType() == I.getArgOperand(0)->getType() &&
@@ -5549,7 +5562,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
                                    Tmp.getValueType(), Tmp));
           return;
         }
-      } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") {
+      } else if ((LibInfo->has(LibFunc::sqrt) && Name == "sqrt") ||
+                 (LibInfo->has(LibFunc::sqrtf) && Name == "sqrtf") ||
+                 (LibInfo->has(LibFunc::sqrtl) && Name == "sqrtl")) {
         if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
             I.getArgOperand(0)->getType()->isFloatingPointTy() &&
             I.getType() == I.getArgOperand(0)->getType() &&
@@ -5559,6 +5574,85 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
                                    Tmp.getValueType(), Tmp));
           return;
         }
+      } else if ((LibInfo->has(LibFunc::floor) && Name == "floor") ||
+                 (LibInfo->has(LibFunc::floorf) && Name == "floorf") ||
+                 (LibInfo->has(LibFunc::floorl) && Name == "floorl")) {
+        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType()) {
+          SDValue Tmp = getValue(I.getArgOperand(0));
+          setValue(&I, DAG.getNode(ISD::FFLOOR, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if ((LibInfo->has(LibFunc::nearbyint) && Name == "nearbyint") ||
+                 (LibInfo->has(LibFunc::nearbyintf) && Name == "nearbyintf") ||
+                 (LibInfo->has(LibFunc::nearbyintl) && Name == "nearbyintl")) {
+        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType()) {
+          SDValue Tmp = getValue(I.getArgOperand(0));
+          setValue(&I, DAG.getNode(ISD::FNEARBYINT, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if ((LibInfo->has(LibFunc::ceil) && Name == "ceil") ||
+                 (LibInfo->has(LibFunc::ceilf) && Name == "ceilf") ||
+                 (LibInfo->has(LibFunc::ceill) && Name == "ceill")) {
+        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType()) {
+          SDValue Tmp = getValue(I.getArgOperand(0));
+          setValue(&I, DAG.getNode(ISD::FCEIL, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if ((LibInfo->has(LibFunc::rint) && Name == "rint") ||
+                 (LibInfo->has(LibFunc::rintf) && Name == "rintf") ||
+                 (LibInfo->has(LibFunc::rintl) && Name == "rintl")) {
+        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType()) {
+          SDValue Tmp = getValue(I.getArgOperand(0));
+          setValue(&I, DAG.getNode(ISD::FRINT, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if ((LibInfo->has(LibFunc::trunc) && Name == "trunc") ||
+                 (LibInfo->has(LibFunc::truncf) && Name == "truncf") ||
+                 (LibInfo->has(LibFunc::truncl) && Name == "truncl")) {
+        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType()) {
+          SDValue Tmp = getValue(I.getArgOperand(0));
+          setValue(&I, DAG.getNode(ISD::FTRUNC, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if ((LibInfo->has(LibFunc::log2) && Name == "log2") ||
+                 (LibInfo->has(LibFunc::log2f) && Name == "log2f") ||
+                 (LibInfo->has(LibFunc::log2l) && Name == "log2l")) {
+        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType() &&
+            I.onlyReadsMemory()) {
+          SDValue Tmp = getValue(I.getArgOperand(0));
+          setValue(&I, DAG.getNode(ISD::FLOG2, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if ((LibInfo->has(LibFunc::exp2) && Name == "exp2") ||
+                 (LibInfo->has(LibFunc::exp2f) && Name == "exp2f") ||
+                 (LibInfo->has(LibFunc::exp2l) && Name == "exp2l")) {
+        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType() &&
+            I.onlyReadsMemory()) {
+          SDValue Tmp = getValue(I.getArgOperand(0));
+          setValue(&I, DAG.getNode(ISD::FEXP2, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
       } else if (Name == "memcmp") {
         if (visitMemCmpCall(I))
           return;
@@ -5596,22 +5690,6 @@ public:
     : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) {
   }
 
-  /// MarkAllocatedRegs - Once AssignedRegs is set, mark the assigned registers
-  /// busy in OutputRegs/InputRegs.
-  void MarkAllocatedRegs(bool isOutReg, bool isInReg,
-                         std::set<unsigned> &OutputRegs,
-                         std::set<unsigned> &InputRegs,
-                         const TargetRegisterInfo &TRI) const {
-    if (isOutReg) {
-      for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
-        MarkRegAndAliases(AssignedRegs.Regs[i], OutputRegs, TRI);
-    }
-    if (isInReg) {
-      for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
-        MarkRegAndAliases(AssignedRegs.Regs[i], InputRegs, TRI);
-    }
-  }
-
   /// getCallOperandValEVT - Return the EVT of the Value* that this operand
   /// corresponds to.  If there is no Value* for this operand, it returns
   /// MVT::Other.
@@ -5659,18 +5737,6 @@ public:
 
     return TLI.getValueType(OpTy, true);
   }
-
-private:
-  /// MarkRegAndAliases - Mark the specified register and all aliases in the
-  /// specified set.
-  static void MarkRegAndAliases(unsigned Reg, std::set<unsigned> &Regs,
-                                const TargetRegisterInfo &TRI) {
-    assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "Isn't a physreg");
-    Regs.insert(Reg);
-    if (const unsigned *Aliases = TRI.getAliasSet(Reg))
-      for (; *Aliases; ++Aliases)
-        Regs.insert(*Aliases);
-  }
 };
 
 typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
@@ -5684,39 +5750,13 @@ typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
 /// allocation.  This produces generally horrible, but correct, code.
 ///
 ///   OpInfo describes the operand.
-///   Input and OutputRegs are the set of already allocated physical registers.
 ///
 static void GetRegistersForValue(SelectionDAG &DAG,
                                  const TargetLowering &TLI,
                                  DebugLoc DL,
-                                 SDISelAsmOperandInfo &OpInfo,
-                                 std::set<unsigned> &OutputRegs,
-                                 std::set<unsigned> &InputRegs) {
+                                 SDISelAsmOperandInfo &OpInfo) {
   LLVMContext &Context = *DAG.getContext();
 
-  // Compute whether this value requires an input register, an output register,
-  // or both.
-  bool isOutReg = false;
-  bool isInReg = false;
-  switch (OpInfo.Type) {
-  case InlineAsm::isOutput:
-    isOutReg = true;
-
-    // If there is an input constraint that matches this, we need to reserve
-    // the input register so no other inputs allocate to it.
-    isInReg = OpInfo.hasMatchingInput();
-    break;
-  case InlineAsm::isInput:
-    isInReg = true;
-    isOutReg = false;
-    break;
-  case InlineAsm::isClobber:
-    isOutReg = true;
-    isInReg = true;
-    break;
-  }
-
-
   MachineFunction &MF = DAG.getMachineFunction();
   SmallVector<unsigned, 4> Regs;
 
@@ -5790,8 +5830,6 @@ static void GetRegistersForValue(SelectionDAG &DAG,
     }
 
     OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
-    const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
-    OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
     return;
   }
 
@@ -5822,8 +5860,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
   /// ConstraintOperands - Information about all of the constraints.
   SDISelAsmOperandInfoVector ConstraintOperands;
 
-  std::set<unsigned> OutputRegs, InputRegs;
-
   TargetLowering::AsmOperandInfoVector
     TargetConstraints = TLI.ParseConstraints(CS);
 
@@ -5956,7 +5992,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
       // constant pool entry to get its address.
       const Value *OpVal = OpInfo.CallOperandVal;
       if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
-          isa<ConstantVector>(OpVal)) {
+          isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) {
         OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
                                                  TLI.getPointerTy());
       } else {
@@ -5985,8 +6021,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     // If this constraint is for a specific register, allocate it before
     // anything else.
     if (OpInfo.ConstraintType == TargetLowering::C_Register)
-      GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs,
-                           InputRegs);
+      GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo);
   }
 
   // Second pass - Loop over all of the operands, assigning virtual or physregs
@@ -5997,8 +6032,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     // C_Register operands have already been allocated, Other/Memory don't need
     // to be.
     if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
-      GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs,
-                           InputRegs);
+      GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo);
   }
 
   // AsmNodeOperands - The operands for the ISD::INLINEASM node.
@@ -6052,9 +6086,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
 
       // Copy the output from the appropriate register.  Find a register that
       // we can use.
-      if (OpInfo.AssignedRegs.Regs.empty())
-        report_fatal_error("Couldn't allocate output reg for constraint '" +
-                           Twine(OpInfo.ConstraintCode) + "'!");
+      if (OpInfo.AssignedRegs.Regs.empty()) {
+        LLVMContext &Ctx = *DAG.getContext();
+        Ctx.emitError(CS.getInstruction(),  
+                      "couldn't allocate output register for constraint '" +
+                           Twine(OpInfo.ConstraintCode) + "'");
+        break;
+      }
 
       // If this is an indirect operand, store through the pointer after the
       // asm.
@@ -6154,9 +6192,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
         std::vector<SDValue> Ops;
         TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
                                          Ops, DAG);
-        if (Ops.empty())
-          report_fatal_error("Invalid operand for inline asm constraint '" +
-                             Twine(OpInfo.ConstraintCode) + "'!");
+        if (Ops.empty()) {
+          LLVMContext &Ctx = *DAG.getContext();
+          Ctx.emitError(CS.getInstruction(),
+                        "invalid operand for inline asm constraint '" +
+                        Twine(OpInfo.ConstraintCode) + "'");
+          break;
+        }
 
         // Add information to the INLINEASM node to know about this input.
         unsigned ResOpType =
@@ -6187,9 +6229,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
              "Don't know how to handle indirect register inputs yet!");
 
       // Copy the input into the appropriate registers.
-      if (OpInfo.AssignedRegs.Regs.empty())
-        report_fatal_error("Couldn't allocate input reg for constraint '" +
-                           Twine(OpInfo.ConstraintCode) + "'!");
+      if (OpInfo.AssignedRegs.Regs.empty()) {
+        LLVMContext &Ctx = *DAG.getContext();
+        Ctx.emitError(CS.getInstruction(), 
+                      "couldn't allocate input reg for constraint '" +
+                           Twine(OpInfo.ConstraintCode) + "'");
+        break;
+      }
 
       OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
                                         Chain, &Flag);
@@ -6327,7 +6373,7 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy,
                             bool RetSExt, bool RetZExt, bool isVarArg,
                             bool isInreg, unsigned NumFixedArgs,
                             CallingConv::ID CallConv, bool isTailCall,
-                            bool isReturnValueUsed,
+                            bool doesNotRet, bool isReturnValueUsed,
                             SDValue Callee,
                             ArgListTy &Args, SelectionDAG &DAG,
                             DebugLoc dl) const {
@@ -6424,7 +6470,7 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy,
   }
 
   SmallVector<SDValue, 4> InVals;
-  Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall,
+  Chain = LowerCall(Chain, Callee, CallConv, isVarArg, doesNotRet, isTailCall,
                     Outs, OutVals, Ins, dl, DAG, InVals);
 
   // Verify that the target's LowerCall behaved as expected.
@@ -6493,7 +6539,6 @@ void TargetLowering::LowerOperationWrapper(SDNode *N,
 
 SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   llvm_unreachable("LowerOperation not implemented for this target!");
-  return SDValue();
 }
 
 void
@@ -6515,10 +6560,10 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
 /// isOnlyUsedInEntryBlock - If the specified argument is only used in the
 /// entry block, return true.  This includes arguments used by switches, since
 /// the switch may expand into multiple basic blocks.
-static bool isOnlyUsedInEntryBlock(const Argument *A) {
+static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
   // With FastISel active, we may be splitting blocks, so force creation
   // of virtual registers for all non-dead arguments.
-  if (EnableFastISel)
+  if (FastISel)
     return A->use_empty();
 
   const BasicBlock *Entry = A->getParent()->begin();
@@ -6708,7 +6753,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
                                      SDB->getCurDebugLoc());
 
     SDB->setValue(I, Res);
-    if (!EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
+    if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
       if (LoadSDNode *LNode = 
           dyn_cast<LoadSDNode>(Res.getOperand(0).getNode()))
         if (FrameIndexSDNode *FI =
@@ -6718,7 +6763,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
 
     // If this argument is live outside of the entry block, insert a copy from
     // wherever we got it to the vreg that other BB's will reference it as.
-    if (!EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) {
+    if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) {
       // If we can, though, try to skip creating an unnecessary vreg.
       // FIXME: This isn't very clean... it would be nice to make this more
       // general.  It's also subtly incompatible with the hacks FastISel
@@ -6729,7 +6774,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
         continue;
       }
     }
-    if (!isOnlyUsedInEntryBlock(I)) {
+    if (!isOnlyUsedInEntryBlock(I, TM.Options.EnableFastISel)) {
       FuncInfo->InitializeRegForValue(I);
       SDB->CopyToExportRegsIfNeeded(I);
     }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 0a21ca3472ca..8393b414926a 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -67,11 +67,11 @@ class SIToFPInst;
 class StoreInst;
 class SwitchInst;
 class TargetData;
+class TargetLibraryInfo;
 class TargetLowering;
 class TruncInst;
 class UIToFPInst;
 class UnreachableInst;
-class UnwindInst;
 class VAArgInst;
 class ZExtInst;
 
@@ -129,13 +129,13 @@ private:
   /// Case - A struct to record the Value for a switch case, and the
   /// case's target basic block.
   struct Case {
-    Constant* Low;
-    Constant* High;
+    const Constant *Low;
+    const Constant *High;
     MachineBasicBlock* BB;
     uint32_t ExtraWeight;
 
     Case() : Low(0), High(0), BB(0), ExtraWeight(0) { }
-    Case(Constant* low, Constant* high, MachineBasicBlock* bb,
+    Case(const Constant *low, const Constant *high, MachineBasicBlock *bb,
          uint32_t extraweight) : Low(low), High(high), BB(bb),
          ExtraWeight(extraweight) { }
 
@@ -294,6 +294,7 @@ public:
   SelectionDAG &DAG;
   const TargetData *TD;
   AliasAnalysis *AA;
+  const TargetLibraryInfo *LibInfo;
 
   /// SwitchCases - Vector of CaseBlock structures used to communicate
   /// SwitchInst code generation information.
@@ -338,7 +339,8 @@ public:
       HasTailCall(false), Context(dag.getContext()) {
   }
 
-  void init(GCFunctionInfo *gfi, AliasAnalysis &aa);
+  void init(GCFunctionInfo *gfi, AliasAnalysis &aa,
+            const TargetLibraryInfo *li);
 
   /// clear - Clear out the current SelectionDAG and the associated
   /// state and prepare this SelectionDAGBuilder object to be used
@@ -451,7 +453,8 @@ private:
                                 MachineBasicBlock* Default,
                                 MachineBasicBlock *SwitchBB);
 
-  uint32_t getEdgeWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst);
+  uint32_t getEdgeWeight(const MachineBasicBlock *Src,
+                         const MachineBasicBlock *Dst) const;
   void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst,
                               uint32_t Weight = 0);
 public:
@@ -471,7 +474,6 @@ private:
   // These all get lowered before this pass.
   void visitInvoke(const InvokeInst &I);
   void visitResume(const ResumeInst &I);
-  void visitUnwind(const UnwindInst &I);
 
   void visitBinary(const User &I, unsigned OpCode);
   void visitShift(const User &I, unsigned Opcode);
@@ -554,8 +556,6 @@ private:
   void visitUserOp2(const Instruction &I) {
     llvm_unreachable("UserOp2 should not exist at instruction selection time!");
   }
-  
-  const char *implVisitAluOverflow(const CallInst &I, ISD::NodeType Op);
 
   void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB);
 
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
new file mode 100644
index 000000000000..f981afb437b0
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -0,0 +1,631 @@
+//===-- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG::dump method and friends.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace llvm;
+
+std::string SDNode::getOperationName(const SelectionDAG *G) const {
+  switch (getOpcode()) {
+  default:
+    if (getOpcode() < ISD::BUILTIN_OP_END)
+      return "<<Unknown DAG Node>>";
+    if (isMachineOpcode()) {
+      if (G)
+        if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo())
+          if (getMachineOpcode() < TII->getNumOpcodes())
+            return TII->getName(getMachineOpcode());
+      return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>";
+    }
+    if (G) {
+      const TargetLowering &TLI = G->getTargetLoweringInfo();
+      const char *Name = TLI.getTargetNodeName(getOpcode());
+      if (Name) return Name;
+      return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>";
+    }
+    return "<<Unknown Node #" + utostr(getOpcode()) + ">>";
+
+#ifndef NDEBUG
+  case ISD::DELETED_NODE:               return "<<Deleted Node!>>";
+#endif
+  case ISD::PREFETCH:                   return "Prefetch";
+  case ISD::MEMBARRIER:                 return "MemBarrier";
+  case ISD::ATOMIC_FENCE:               return "AtomicFence";
+  case ISD::ATOMIC_CMP_SWAP:            return "AtomicCmpSwap";
+  case ISD::ATOMIC_SWAP:                return "AtomicSwap";
+  case ISD::ATOMIC_LOAD_ADD:            return "AtomicLoadAdd";
+  case ISD::ATOMIC_LOAD_SUB:            return "AtomicLoadSub";
+  case ISD::ATOMIC_LOAD_AND:            return "AtomicLoadAnd";
+  case ISD::ATOMIC_LOAD_OR:             return "AtomicLoadOr";
+  case ISD::ATOMIC_LOAD_XOR:            return "AtomicLoadXor";
+  case ISD::ATOMIC_LOAD_NAND:           return "AtomicLoadNand";
+  case ISD::ATOMIC_LOAD_MIN:            return "AtomicLoadMin";
+  case ISD::ATOMIC_LOAD_MAX:            return "AtomicLoadMax";
+  case ISD::ATOMIC_LOAD_UMIN:           return "AtomicLoadUMin";
+  case ISD::ATOMIC_LOAD_UMAX:           return "AtomicLoadUMax";
+  case ISD::ATOMIC_LOAD:                return "AtomicLoad";
+  case ISD::ATOMIC_STORE:               return "AtomicStore";
+  case ISD::PCMARKER:                   return "PCMarker";
+  case ISD::READCYCLECOUNTER:           return "ReadCycleCounter";
+  case ISD::SRCVALUE:                   return "SrcValue";
+  case ISD::MDNODE_SDNODE:              return "MDNode";
+  case ISD::EntryToken:                 return "EntryToken";
+  case ISD::TokenFactor:                return "TokenFactor";
+  case ISD::AssertSext:                 return "AssertSext";
+  case ISD::AssertZext:                 return "AssertZext";
+
+  case ISD::BasicBlock:                 return "BasicBlock";
+  case ISD::VALUETYPE:                  return "ValueType";
+  case ISD::Register:                   return "Register";
+  case ISD::RegisterMask:               return "RegisterMask";
+  case ISD::Constant:                   return "Constant";
+  case ISD::ConstantFP:                 return "ConstantFP";
+  case ISD::GlobalAddress:              return "GlobalAddress";
+  case ISD::GlobalTLSAddress:           return "GlobalTLSAddress";
+  case ISD::FrameIndex:                 return "FrameIndex";
+  case ISD::JumpTable:                  return "JumpTable";
+  case ISD::GLOBAL_OFFSET_TABLE:        return "GLOBAL_OFFSET_TABLE";
+  case ISD::RETURNADDR:                 return "RETURNADDR";
+  case ISD::FRAMEADDR:                  return "FRAMEADDR";
+  case ISD::FRAME_TO_ARGS_OFFSET:       return "FRAME_TO_ARGS_OFFSET";
+  case ISD::EXCEPTIONADDR:              return "EXCEPTIONADDR";
+  case ISD::LSDAADDR:                   return "LSDAADDR";
+  case ISD::EHSELECTION:                return "EHSELECTION";
+  case ISD::EH_RETURN:                  return "EH_RETURN";
+  case ISD::EH_SJLJ_SETJMP:             return "EH_SJLJ_SETJMP";
+  case ISD::EH_SJLJ_LONGJMP:            return "EH_SJLJ_LONGJMP";
+  case ISD::ConstantPool:               return "ConstantPool";
+  case ISD::ExternalSymbol:             return "ExternalSymbol";
+  case ISD::BlockAddress:               return "BlockAddress";
+  case ISD::INTRINSIC_WO_CHAIN:
+  case ISD::INTRINSIC_VOID:
+  case ISD::INTRINSIC_W_CHAIN: {
+    unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
+    unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();
+    if (IID < Intrinsic::num_intrinsics)
+      return Intrinsic::getName((Intrinsic::ID)IID);
+    else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
+      return TII->getName(IID);
+    llvm_unreachable("Invalid intrinsic ID");
+  }
+
+  case ISD::BUILD_VECTOR:               return "BUILD_VECTOR";
+  case ISD::TargetConstant:             return "TargetConstant";
+  case ISD::TargetConstantFP:           return "TargetConstantFP";
+  case ISD::TargetGlobalAddress:        return "TargetGlobalAddress";
+  case ISD::TargetGlobalTLSAddress:     return "TargetGlobalTLSAddress";
+  case ISD::TargetFrameIndex:           return "TargetFrameIndex";
+  case ISD::TargetJumpTable:            return "TargetJumpTable";
+  case ISD::TargetConstantPool:         return "TargetConstantPool";
+  case ISD::TargetExternalSymbol:       return "TargetExternalSymbol";
+  case ISD::TargetBlockAddress:         return "TargetBlockAddress";
+
+  case ISD::CopyToReg:                  return "CopyToReg";
+  case ISD::CopyFromReg:                return "CopyFromReg";
+  case ISD::UNDEF:                      return "undef";
+  case ISD::MERGE_VALUES:               return "merge_values";
+  case ISD::INLINEASM:                  return "inlineasm";
+  case ISD::EH_LABEL:                   return "eh_label";
+  case ISD::HANDLENODE:                 return "handlenode";
+
+  // Unary operators
+  case ISD::FABS:                       return "fabs";
+  case ISD::FNEG:                       return "fneg";
+  case ISD::FSQRT:                      return "fsqrt";
+  case ISD::FSIN:                       return "fsin";
+  case ISD::FCOS:                       return "fcos";
+  case ISD::FTRUNC:                     return "ftrunc";
+  case ISD::FFLOOR:                     return "ffloor";
+  case ISD::FCEIL:                      return "fceil";
+  case ISD::FRINT:                      return "frint";
+  case ISD::FNEARBYINT:                 return "fnearbyint";
+  case ISD::FEXP:                       return "fexp";
+  case ISD::FEXP2:                      return "fexp2";
+  case ISD::FLOG:                       return "flog";
+  case ISD::FLOG2:                      return "flog2";
+  case ISD::FLOG10:                     return "flog10";
+
+  // Binary operators
+  case ISD::ADD:                        return "add";
+  case ISD::SUB:                        return "sub";
+  case ISD::MUL:                        return "mul";
+  case ISD::MULHU:                      return "mulhu";
+  case ISD::MULHS:                      return "mulhs";
+  case ISD::SDIV:                       return "sdiv";
+  case ISD::UDIV:                       return "udiv";
+  case ISD::SREM:                       return "srem";
+  case ISD::UREM:                       return "urem";
+  case ISD::SMUL_LOHI:                  return "smul_lohi";
+  case ISD::UMUL_LOHI:                  return "umul_lohi";
+  case ISD::SDIVREM:                    return "sdivrem";
+  case ISD::UDIVREM:                    return "udivrem";
+  case ISD::AND:                        return "and";
+  case ISD::OR:                         return "or";
+  case ISD::XOR:                        return "xor";
+  case ISD::SHL:                        return "shl";
+  case ISD::SRA:                        return "sra";
+  case ISD::SRL:                        return "srl";
+  case ISD::ROTL:                       return "rotl";
+  case ISD::ROTR:                       return "rotr";
+  case ISD::FADD:                       return "fadd";
+  case ISD::FSUB:                       return "fsub";
+  case ISD::FMUL:                       return "fmul";
+  case ISD::FDIV:                       return "fdiv";
+  case ISD::FMA:                        return "fma";
+  case ISD::FREM:                       return "frem";
+  case ISD::FCOPYSIGN:                  return "fcopysign";
+  case ISD::FGETSIGN:                   return "fgetsign";
+  case ISD::FPOW:                       return "fpow";
+
+  case ISD::FPOWI:                      return "fpowi";
+  case ISD::SETCC:                      return "setcc";
+  case ISD::SELECT:                     return "select";
+  case ISD::VSELECT:                    return "vselect";
+  case ISD::SELECT_CC:                  return "select_cc";
+  case ISD::INSERT_VECTOR_ELT:          return "insert_vector_elt";
+  case ISD::EXTRACT_VECTOR_ELT:         return "extract_vector_elt";
+  case ISD::CONCAT_VECTORS:             return "concat_vectors";
+  case ISD::INSERT_SUBVECTOR:           return "insert_subvector";
+  case ISD::EXTRACT_SUBVECTOR:          return "extract_subvector";
+  case ISD::SCALAR_TO_VECTOR:           return "scalar_to_vector";
+  case ISD::VECTOR_SHUFFLE:             return "vector_shuffle";
+  case ISD::CARRY_FALSE:                return "carry_false";
+  case ISD::ADDC:                       return "addc";
+  case ISD::ADDE:                       return "adde";
+  case ISD::SADDO:                      return "saddo";
+  case ISD::UADDO:                      return "uaddo";
+  case ISD::SSUBO:                      return "ssubo";
+  case ISD::USUBO:                      return "usubo";
+  case ISD::SMULO:                      return "smulo";
+  case ISD::UMULO:                      return "umulo";
+  case ISD::SUBC:                       return "subc";
+  case ISD::SUBE:                       return "sube";
+  case ISD::SHL_PARTS:                  return "shl_parts";
+  case ISD::SRA_PARTS:                  return "sra_parts";
+  case ISD::SRL_PARTS:                  return "srl_parts";
+
+  // Conversion operators.
+  case ISD::SIGN_EXTEND:                return "sign_extend";
+  case ISD::ZERO_EXTEND:                return "zero_extend";
+  case ISD::ANY_EXTEND:                 return "any_extend";
+  case ISD::SIGN_EXTEND_INREG:          return "sign_extend_inreg";
+  case ISD::TRUNCATE:                   return "truncate";
+  case ISD::FP_ROUND:                   return "fp_round";
+  case ISD::FLT_ROUNDS_:                return "flt_rounds";
+  case ISD::FP_ROUND_INREG:             return "fp_round_inreg";
+  case ISD::FP_EXTEND:                  return "fp_extend";
+
+  case ISD::SINT_TO_FP:                 return "sint_to_fp";
+  case ISD::UINT_TO_FP:                 return "uint_to_fp";
+  case ISD::FP_TO_SINT:                 return "fp_to_sint";
+  case ISD::FP_TO_UINT:                 return "fp_to_uint";
+  case ISD::BITCAST:                    return "bitcast";
+  case ISD::FP16_TO_FP32:               return "fp16_to_fp32";
+  case ISD::FP32_TO_FP16:               return "fp32_to_fp16";
+
+  case ISD::CONVERT_RNDSAT: {
+    switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) {
+    default: llvm_unreachable("Unknown cvt code!");
+    case ISD::CVT_FF:                   return "cvt_ff";
+    case ISD::CVT_FS:                   return "cvt_fs";
+    case ISD::CVT_FU:                   return "cvt_fu";
+    case ISD::CVT_SF:                   return "cvt_sf";
+    case ISD::CVT_UF:                   return "cvt_uf";
+    case ISD::CVT_SS:                   return "cvt_ss";
+    case ISD::CVT_SU:                   return "cvt_su";
+    case ISD::CVT_US:                   return "cvt_us";
+    case ISD::CVT_UU:                   return "cvt_uu";
+    }
+  }
+
+    // Control flow instructions
+  case ISD::BR:                         return "br";
+  case ISD::BRIND:                      return "brind";
+  case ISD::BR_JT:                      return "br_jt";
+  case ISD::BRCOND:                     return "brcond";
+  case ISD::BR_CC:                      return "br_cc";
+  case ISD::CALLSEQ_START:              return "callseq_start";
+  case ISD::CALLSEQ_END:                return "callseq_end";
+
+    // Other operators
+  case ISD::LOAD:                       return "load";
+  case ISD::STORE:                      return "store";
+  case ISD::VAARG:                      return "vaarg";
+  case ISD::VACOPY:                     return "vacopy";
+  case ISD::VAEND:                      return "vaend";
+  case ISD::VASTART:                    return "vastart";
+  case ISD::DYNAMIC_STACKALLOC:         return "dynamic_stackalloc";
+  case ISD::EXTRACT_ELEMENT:            return "extract_element";
+  case ISD::BUILD_PAIR:                 return "build_pair";
+  case ISD::STACKSAVE:                  return "stacksave";
+  case ISD::STACKRESTORE:               return "stackrestore";
+  case ISD::TRAP:                       return "trap";
+
+  // Bit manipulation
+  case ISD::BSWAP:                      return "bswap";
+  case ISD::CTPOP:                      return "ctpop";
+  case ISD::CTTZ:                       return "cttz";
+  case ISD::CTTZ_ZERO_UNDEF:            return "cttz_zero_undef";
+  case ISD::CTLZ:                       return "ctlz";
+  case ISD::CTLZ_ZERO_UNDEF:            return "ctlz_zero_undef";
+
+  // Trampolines
+  case ISD::INIT_TRAMPOLINE:            return "init_trampoline";
+  case ISD::ADJUST_TRAMPOLINE:          return "adjust_trampoline";
+
+  case ISD::CONDCODE:
+    switch (cast<CondCodeSDNode>(this)->get()) {
+    default: llvm_unreachable("Unknown setcc condition!");
+    case ISD::SETOEQ:                   return "setoeq";
+    case ISD::SETOGT:                   return "setogt";
+    case ISD::SETOGE:                   return "setoge";
+    case ISD::SETOLT:                   return "setolt";
+    case ISD::SETOLE:                   return "setole";
+    case ISD::SETONE:                   return "setone";
+
+    case ISD::SETO:                     return "seto";
+    case ISD::SETUO:                    return "setuo";
+    case ISD::SETUEQ:                   return "setue";
+    case ISD::SETUGT:                   return "setugt";
+    case ISD::SETUGE:                   return "setuge";
+    case ISD::SETULT:                   return "setult";
+    case ISD::SETULE:                   return "setule";
+    case ISD::SETUNE:                   return "setune";
+
+    case ISD::SETEQ:                    return "seteq";
+    case ISD::SETGT:                    return "setgt";
+    case ISD::SETGE:                    return "setge";
+    case ISD::SETLT:                    return "setlt";
+    case ISD::SETLE:                    return "setle";
+    case ISD::SETNE:                    return "setne";
+
+    case ISD::SETTRUE:                  return "settrue";
+    case ISD::SETTRUE2:                 return "settrue2";
+    case ISD::SETFALSE:                 return "setfalse";
+    case ISD::SETFALSE2:                return "setfalse2";
+    }
+  }
+}
+
+const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) {
+  switch (AM) {
+  default:              return "";
+  case ISD::PRE_INC:    return "<pre-inc>";
+  case ISD::PRE_DEC:    return "<pre-dec>";
+  case ISD::POST_INC:   return "<post-inc>";
+  case ISD::POST_DEC:   return "<post-dec>";
+  }
+}
+
+void SDNode::dump() const { dump(0); }
+void SDNode::dump(const SelectionDAG *G) const {
+  print(dbgs(), G);
+  dbgs() << '\n';
+}
+
+void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
+  OS << (void*)this << ": ";
+
+  for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
+    if (i) OS << ",";
+    if (getValueType(i) == MVT::Other)
+      OS << "ch";
+    else
+      OS << getValueType(i).getEVTString();
+  }
+  OS << " = " << getOperationName(G);
+}
+
+void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
+  if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) {
+    if (!MN->memoperands_empty()) {
+      OS << "<";
+      OS << "Mem:";
+      for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(),
+           e = MN->memoperands_end(); i != e; ++i) {
+        OS << **i;
+        if (llvm::next(i) != e)
+          OS << " ";
+      }
+      OS << ">";
+    }
+  } else if (const ShuffleVectorSDNode *SVN =
+               dyn_cast<ShuffleVectorSDNode>(this)) {
+    OS << "<";
+    for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) {
+      int Idx = SVN->getMaskElt(i);
+      if (i) OS << ",";
+      if (Idx < 0)
+        OS << "u";
+      else
+        OS << Idx;
+    }
+    OS << ">";
+  } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
+    OS << '<' << CSDN->getAPIntValue() << '>';
+  } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
+    if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle)
+      OS << '<' << CSDN->getValueAPF().convertToFloat() << '>';
+    else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble)
+      OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';
+    else {
+      OS << "<APFloat(";
+      CSDN->getValueAPF().bitcastToAPInt().dump();
+      OS << ")>";
+    }
+  } else if (const GlobalAddressSDNode *GADN =
+             dyn_cast<GlobalAddressSDNode>(this)) {
+    int64_t offset = GADN->getOffset();
+    OS << '<';
+    WriteAsOperand(OS, GADN->getGlobal());
+    OS << '>';
+    if (offset > 0)
+      OS << " + " << offset;
+    else
+      OS << " " << offset;
+    if (unsigned int TF = GADN->getTargetFlags())
+      OS << " [TF=" << TF << ']';
+  } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) {
+    OS << "<" << FIDN->getIndex() << ">";
+  } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) {
+    OS << "<" << JTDN->getIndex() << ">";
+    if (unsigned int TF = JTDN->getTargetFlags())
+      OS << " [TF=" << TF << ']';
+  } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){
+    int offset = CP->getOffset();
+    if (CP->isMachineConstantPoolEntry())
+      OS << "<" << *CP->getMachineCPVal() << ">";
+    else
+      OS << "<" << *CP->getConstVal() << ">";
+    if (offset > 0)
+      OS << " + " << offset;
+    else
+      OS << " " << offset;
+    if (unsigned int TF = CP->getTargetFlags())
+      OS << " [TF=" << TF << ']';
+  } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
+    OS << "<";
+    const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
+    if (LBB)
+      OS << LBB->getName() << " ";
+    OS << (const void*)BBDN->getBasicBlock() << ">";
+  } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
+    OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0);
+  } else if (const ExternalSymbolSDNode *ES =
+             dyn_cast<ExternalSymbolSDNode>(this)) {
+    OS << "'" << ES->getSymbol() << "'";
+    if (unsigned int TF = ES->getTargetFlags())
+      OS << " [TF=" << TF << ']';
+  } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) {
+    if (M->getValue())
+      OS << "<" << M->getValue() << ">";
+    else
+      OS << "<null>";
+  } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) {
+    if (MD->getMD())
+      OS << "<" << MD->getMD() << ">";
+    else
+      OS << "<null>";
+  } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
+    OS << ":" << N->getVT().getEVTString();
+  }
+  else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
+    OS << "<" << *LD->getMemOperand();
+
+    bool doExt = true;
+    switch (LD->getExtensionType()) {
+    default: doExt = false; break;
+    case ISD::EXTLOAD:  OS << ", anyext"; break;
+    case ISD::SEXTLOAD: OS << ", sext"; break;
+    case ISD::ZEXTLOAD: OS << ", zext"; break;
+    }
+    if (doExt)
+      OS << " from " << LD->getMemoryVT().getEVTString();
+
+    const char *AM = getIndexedModeName(LD->getAddressingMode());
+    if (*AM)
+      OS << ", " << AM;
+
+    OS << ">";
+  } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) {
+    OS << "<" << *ST->getMemOperand();
+
+    if (ST->isTruncatingStore())
+      OS << ", trunc to " << ST->getMemoryVT().getEVTString();
+
+    const char *AM = getIndexedModeName(ST->getAddressingMode());
+    if (*AM)
+      OS << ", " << AM;
+
+    OS << ">";
+  } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
+    OS << "<" << *M->getMemOperand() << ">";
+  } else if (const BlockAddressSDNode *BA =
+               dyn_cast<BlockAddressSDNode>(this)) {
+    OS << "<";
+    WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false);
+    OS << ", ";
+    WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false);
+    OS << ">";
+    if (unsigned int TF = BA->getTargetFlags())
+      OS << " [TF=" << TF << ']';
+  }
+
+  if (G)
+    if (unsigned Order = G->GetOrdering(this))
+      OS << " [ORD=" << Order << ']';
+
+  if (getNodeId() != -1)
+    OS << " [ID=" << getNodeId() << ']';
+
+  DebugLoc dl = getDebugLoc();
+  if (G && !dl.isUnknown()) {
+    DIScope
+      Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext()));
+    OS << " dbg:";
+    // Omit the directory, since it's usually long and uninteresting.
+    if (Scope.Verify())
+      OS << Scope.getFilename();
+    else
+      OS << "<unknown>";
+    OS << ':' << dl.getLine();
+    if (dl.getCol() != 0)
+      OS << ':' << dl.getCol();
+  }
+}
+
+static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+    if (N->getOperand(i).getNode()->hasOneUse())
+      DumpNodes(N->getOperand(i).getNode(), indent+2, G);
+    else
+      dbgs() << "\n" << std::string(indent+2, ' ')
+             << (void*)N->getOperand(i).getNode() << ": <multiple use>";
+
+  dbgs() << '\n';
+  dbgs().indent(indent);
+  N->dump(G);
+}
+
+void SelectionDAG::dump() const {
+  dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:";
+
+  for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
+       I != E; ++I) {
+    const SDNode *N = I;
+    if (!N->hasOneUse() && N != getRoot().getNode())
+      DumpNodes(N, 2, this);
+  }
+
+  if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
+  dbgs() << "\n\n";
+}
+
+void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
+  print_types(OS, G);
+  print_details(OS, G);
+}
+
+typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet;
+static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
+                       const SelectionDAG *G, VisitedSDNodeSet &once) {
+  if (!once.insert(N))          // If we've been here before, return now.
+    return;
+
+  // Dump the current SDNode, but don't end the line yet.
+  OS.indent(indent);
+  N->printr(OS, G);
+
+  // Having printed this SDNode, walk the children:
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    const SDNode *child = N->getOperand(i).getNode();
+
+    if (i) OS << ",";
+    OS << " ";
+
+    if (child->getNumOperands() == 0) {
+      // This child has no grandchildren; print it inline right here.
+      child->printr(OS, G);
+      once.insert(child);
+    } else {         // Just the address. FIXME: also print the child's opcode.
+      OS << (void*)child;
+      if (unsigned RN = N->getOperand(i).getResNo())
+        OS << ":" << RN;
+    }
+  }
+
+  OS << "\n";
+
+  // Dump children that have grandchildren on their own line(s).
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    const SDNode *child = N->getOperand(i).getNode();
+    DumpNodesr(OS, child, indent+2, G, once);
+  }
+}
+
+void SDNode::dumpr() const {
+  VisitedSDNodeSet once;
+  DumpNodesr(dbgs(), this, 0, 0, once);
+}
+
+void SDNode::dumpr(const SelectionDAG *G) const {
+  VisitedSDNodeSet once;
+  DumpNodesr(dbgs(), this, 0, G, once);
+}
+
+static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
+                                  const SelectionDAG *G, unsigned depth,
+                                  unsigned indent) {
+  if (depth == 0)
+    return;
+
+  OS.indent(indent);
+
+  N->print(OS, G);
+
+  if (depth < 1)
+    return;
+
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    // Don't follow chain operands.
+    if (N->getOperand(i).getValueType() == MVT::Other)
+      continue;
+    OS << '\n';
+    printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2);
+  }
+}
+
+void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G,
+                            unsigned depth) const {
+  printrWithDepthHelper(OS, this, G, depth, 0);
+}
+
+void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {
+  // Don't print impossibly deep things.
+  printrWithDepth(OS, G, 10);
+}
+
+void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
+  printrWithDepth(dbgs(), G, depth);
+}
+
+void SDNode::dumprFull(const SelectionDAG *G) const {
+  // Don't print impossibly deep things.
+  dumprWithDepth(G, 10);
+}
+
+void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
+  print_types(OS, G);
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    if (i) OS << ", "; else OS << " ";
+    OS << (void*)getOperand(i).getNode();
+    if (unsigned RN = getOperand(i).getResNo())
+      OS << ":" << RN;
+  }
+  print_details(OS, G);
+}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 68b9146adfe1..605509bd227a 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -41,6 +41,7 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
@@ -61,6 +62,80 @@ STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel");
 STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG");
 STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path");
 
+#ifndef NDEBUG
+static cl::opt<bool>
+EnableFastISelVerbose2("fast-isel-verbose2", cl::Hidden,
+          cl::desc("Enable extra verbose messages in the \"fast\" "
+                   "instruction selector"));
+  // Terminators
+STATISTIC(NumFastIselFailRet,"Fast isel fails on Ret");
+STATISTIC(NumFastIselFailBr,"Fast isel fails on Br");
+STATISTIC(NumFastIselFailSwitch,"Fast isel fails on Switch");
+STATISTIC(NumFastIselFailIndirectBr,"Fast isel fails on IndirectBr");
+STATISTIC(NumFastIselFailInvoke,"Fast isel fails on Invoke");
+STATISTIC(NumFastIselFailResume,"Fast isel fails on Resume");
+STATISTIC(NumFastIselFailUnreachable,"Fast isel fails on Unreachable");
+
+  // Standard binary operators...
+STATISTIC(NumFastIselFailAdd,"Fast isel fails on Add");
+STATISTIC(NumFastIselFailFAdd,"Fast isel fails on FAdd");
+STATISTIC(NumFastIselFailSub,"Fast isel fails on Sub");
+STATISTIC(NumFastIselFailFSub,"Fast isel fails on FSub");
+STATISTIC(NumFastIselFailMul,"Fast isel fails on Mul");
+STATISTIC(NumFastIselFailFMul,"Fast isel fails on FMul");
+STATISTIC(NumFastIselFailUDiv,"Fast isel fails on UDiv");
+STATISTIC(NumFastIselFailSDiv,"Fast isel fails on SDiv");
+STATISTIC(NumFastIselFailFDiv,"Fast isel fails on FDiv");
+STATISTIC(NumFastIselFailURem,"Fast isel fails on URem");
+STATISTIC(NumFastIselFailSRem,"Fast isel fails on SRem");
+STATISTIC(NumFastIselFailFRem,"Fast isel fails on FRem");
+
+  // Logical operators...
+STATISTIC(NumFastIselFailAnd,"Fast isel fails on And");
+STATISTIC(NumFastIselFailOr,"Fast isel fails on Or");
+STATISTIC(NumFastIselFailXor,"Fast isel fails on Xor");
+
+  // Memory instructions...
+STATISTIC(NumFastIselFailAlloca,"Fast isel fails on Alloca");
+STATISTIC(NumFastIselFailLoad,"Fast isel fails on Load");
+STATISTIC(NumFastIselFailStore,"Fast isel fails on Store");
+STATISTIC(NumFastIselFailAtomicCmpXchg,"Fast isel fails on AtomicCmpXchg");
+STATISTIC(NumFastIselFailAtomicRMW,"Fast isel fails on AtomicRWM");
+STATISTIC(NumFastIselFailFence,"Fast isel fails on Frence");
+STATISTIC(NumFastIselFailGetElementPtr,"Fast isel fails on GetElementPtr");
+
+  // Convert instructions...
+STATISTIC(NumFastIselFailTrunc,"Fast isel fails on Trunc");
+STATISTIC(NumFastIselFailZExt,"Fast isel fails on ZExt");
+STATISTIC(NumFastIselFailSExt,"Fast isel fails on SExt");
+STATISTIC(NumFastIselFailFPTrunc,"Fast isel fails on FPTrunc");
+STATISTIC(NumFastIselFailFPExt,"Fast isel fails on FPExt");
+STATISTIC(NumFastIselFailFPToUI,"Fast isel fails on FPToUI");
+STATISTIC(NumFastIselFailFPToSI,"Fast isel fails on FPToSI");
+STATISTIC(NumFastIselFailUIToFP,"Fast isel fails on UIToFP");
+STATISTIC(NumFastIselFailSIToFP,"Fast isel fails on SIToFP");
+STATISTIC(NumFastIselFailIntToPtr,"Fast isel fails on IntToPtr");
+STATISTIC(NumFastIselFailPtrToInt,"Fast isel fails on PtrToInt");
+STATISTIC(NumFastIselFailBitCast,"Fast isel fails on BitCast");
+
+  // Other instructions...
+STATISTIC(NumFastIselFailICmp,"Fast isel fails on ICmp");
+STATISTIC(NumFastIselFailFCmp,"Fast isel fails on FCmp");
+STATISTIC(NumFastIselFailPHI,"Fast isel fails on PHI");
+STATISTIC(NumFastIselFailSelect,"Fast isel fails on Select");
+STATISTIC(NumFastIselFailCall,"Fast isel fails on Call");
+STATISTIC(NumFastIselFailShl,"Fast isel fails on Shl");
+STATISTIC(NumFastIselFailLShr,"Fast isel fails on LShr");
+STATISTIC(NumFastIselFailAShr,"Fast isel fails on AShr");
+STATISTIC(NumFastIselFailVAArg,"Fast isel fails on VAArg");
+STATISTIC(NumFastIselFailExtractElement,"Fast isel fails on ExtractElement");
+STATISTIC(NumFastIselFailInsertElement,"Fast isel fails on InsertElement");
+STATISTIC(NumFastIselFailShuffleVector,"Fast isel fails on ShuffleVector");
+STATISTIC(NumFastIselFailExtractValue,"Fast isel fails on ExtractValue");
+STATISTIC(NumFastIselFailInsertValue,"Fast isel fails on InsertValue");
+STATISTIC(NumFastIselFailLandingPad,"Fast isel fails on LandingPad");
+#endif
+
 static cl::opt<bool>
 EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
           cl::desc("Enable verbose messages in the \"fast\" "
@@ -142,14 +217,15 @@ namespace llvm {
                                              CodeGenOpt::Level OptLevel) {
     const TargetLowering &TLI = IS->getTargetLowering();
 
-    if (OptLevel == CodeGenOpt::None)
+    if (OptLevel == CodeGenOpt::None ||
+        TLI.getSchedulingPreference() == Sched::Source)
       return createSourceListDAGScheduler(IS, OptLevel);
-    if (TLI.getSchedulingPreference() == Sched::Latency)
-      return createTDListDAGScheduler(IS, OptLevel);
     if (TLI.getSchedulingPreference() == Sched::RegPressure)
       return createBURRListDAGScheduler(IS, OptLevel);
     if (TLI.getSchedulingPreference() == Sched::Hybrid)
       return createHybridListDAGScheduler(IS, OptLevel);
+    if (TLI.getSchedulingPreference() == Sched::VLIW)
+      return createVLIWDAGScheduler(IS, OptLevel);
     assert(TLI.getSchedulingPreference() == Sched::ILP &&
            "Unknown sched type!");
     return createILPListDAGScheduler(IS, OptLevel);
@@ -174,12 +250,11 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
           "TargetLowering::EmitInstrWithCustomInserter!";
 #endif
   llvm_unreachable(0);
-  return 0;
 }
 
 void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
                                                    SDNode *Node) const {
-  assert(!MI->getDesc().hasPostISelHook() &&
+  assert(!MI->hasPostISelHook() &&
          "If a target marks an instruction with 'hasPostISelHook', "
          "it must implement TargetLowering::AdjustInstrPostInstrSelection!");
 }
@@ -188,11 +263,13 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
 // SelectionDAGISel code
 //===----------------------------------------------------------------------===//
 
+void SelectionDAGISel::ISelUpdater::anchor() { }
+
 SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm,
                                    CodeGenOpt::Level OL) :
   MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()),
   FuncInfo(new FunctionLoweringInfo(TLI)),
-  CurDAG(new SelectionDAG(tm)),
+  CurDAG(new SelectionDAG(tm, OL)),
   SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
   GFI(),
   OptLevel(OL),
@@ -200,6 +277,7 @@ SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm,
     initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
     initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry());
     initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry());
+    initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry());
   }
 
 SelectionDAGISel::~SelectionDAGISel() {
@@ -213,6 +291,7 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<AliasAnalysis>();
   AU.addRequired<GCModuleInfo>();
   AU.addPreserved<GCModuleInfo>();
+  AU.addRequired<TargetLibraryInfo>();
   if (UseMBPI && OptLevel != CodeGenOpt::None)
     AU.addRequired<BranchProbabilityInfo>();
   MachineFunctionPass::getAnalysisUsage(AU);
@@ -258,9 +337,9 @@ static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) {
 
 bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   // Do some sanity-checking on the command-line options.
-  assert((!EnableFastISelVerbose || EnableFastISel) &&
+  assert((!EnableFastISelVerbose || TM.Options.EnableFastISel) &&
          "-fast-isel-verbose requires -fast-isel");
-  assert((!EnableFastISelAbort || EnableFastISel) &&
+  assert((!EnableFastISelAbort || TM.Options.EnableFastISel) &&
          "-fast-isel-abort requires -fast-isel");
 
   const Function &Fn = *mf.getFunction();
@@ -270,6 +349,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   MF = &mf;
   RegInfo = &MF->getRegInfo();
   AA = &getAnalysis<AliasAnalysis>();
+  LibInfo = &getAnalysis<TargetLibraryInfo>();
   GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : 0;
 
   DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
@@ -284,7 +364,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   else
     FuncInfo->BPI = 0;
 
-  SDB->init(GFI, *AA);
+  SDB->init(GFI, *AA, LibInfo);
 
   SelectAllBasicBlocks(Fn);
 
@@ -348,7 +428,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
                   TII.get(TargetOpcode::DBG_VALUE))
           .addReg(CopyUseMI->getOperand(0).getReg(), RegState::Debug)
           .addImm(Offset).addMetadata(Variable);
-        EntryMBB->insertAfter(CopyUseMI, NewMI);
+        MachineBasicBlock::iterator Pos = CopyUseMI;
+        EntryMBB->insertAfter(Pos, NewMI);
       }
     }
   }
@@ -374,7 +455,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   }
 
   // Determine if there is a call to setjmp in the machine function.
-  MF->setCallsSetJmp(Fn.callsFunctionThatReturnsTwice());
+  MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice());
 
   // Replace forward-declared registers with the registers containing
   // the desired value.
@@ -427,7 +508,6 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
 
   Worklist.push_back(CurDAG->getRoot().getNode());
 
-  APInt Mask;
   APInt KnownZero;
   APInt KnownOne;
 
@@ -458,8 +538,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
       continue;
 
     unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src);
-    Mask = APInt::getAllOnesValue(SrcVT.getSizeInBits());
-    CurDAG->ComputeMaskedBits(Src, Mask, KnownZero, KnownOne);
+    CurDAG->ComputeMaskedBits(Src, KnownZero, KnownOne);
     FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, KnownZero, KnownOne);
   } while (!Worklist.empty());
 }
@@ -478,8 +557,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
 #endif
   {
     BlockNumber = FuncInfo->MBB->getNumber();
-    BlockName = MF->getFunction()->getNameStr() + ":" +
-                FuncInfo->MBB->getBasicBlock()->getNameStr();
+    BlockName = MF->getFunction()->getName().str() + ":" +
+                FuncInfo->MBB->getBasicBlock()->getName().str();
   }
   DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber
         << " '" << BlockName << "'\n"; CurDAG->dump());
@@ -489,7 +568,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   // Run the DAG combiner in pre-legalize mode.
   {
     NamedRegionTimer T("DAG Combining 1", GroupName, TimePassesIsEnabled);
-    CurDAG->Combine(Unrestricted, *AA, OptLevel);
+    CurDAG->Combine(BeforeLegalizeTypes, *AA, OptLevel);
   }
 
   DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber
@@ -517,7 +596,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     {
       NamedRegionTimer T("DAG Combining after legalize types", GroupName,
                          TimePassesIsEnabled);
-      CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);
+      CurDAG->Combine(AfterLegalizeTypes, *AA, OptLevel);
     }
 
     DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber
@@ -542,7 +621,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     {
       NamedRegionTimer T("DAG Combining after legalize vectors", GroupName,
                          TimePassesIsEnabled);
-      CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
+      CurDAG->Combine(AfterLegalizeVectorOps, *AA, OptLevel);
     }
 
     DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#"
@@ -564,7 +643,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   // Run the DAG combiner in post-legalize mode.
   {
     NamedRegionTimer T("DAG Combining 2", GroupName, TimePassesIsEnabled);
-    CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
+    CurDAG->Combine(AfterLegalizeDAG, *AA, OptLevel);
   }
 
   DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber
@@ -592,7 +671,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   {
     NamedRegionTimer T("Instruction Scheduling", GroupName,
                        TimePassesIsEnabled);
-    Scheduler->Run(CurDAG, FuncInfo->MBB, FuncInfo->InsertPt);
+    Scheduler->Run(CurDAG, FuncInfo->MBB);
   }
 
   if (ViewSUnitDAGs) Scheduler->viewGraph();
@@ -603,8 +682,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   {
     NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled);
 
-    LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule();
-    FuncInfo->InsertPt = Scheduler->InsertPos;
+    // FuncInfo->InsertPt is passed by reference and set to the end of the
+    // scheduled instructions.
+    LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule(FuncInfo->InsertPt);
   }
 
   // If the block was split, make sure we update any references that are used to
@@ -693,43 +773,18 @@ void SelectionDAGISel::PrepareEHLandingPad() {
 
   // Assign the call site to the landing pad's begin label.
   MF->getMMI().setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]);
-    
+
   const MCInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL);
   BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
     .addSym(Label);
 
   // Mark exception register as live in.
-  unsigned Reg = TLI.getExceptionAddressRegister();
+  unsigned Reg = TLI.getExceptionPointerRegister();
   if (Reg) MBB->addLiveIn(Reg);
 
   // Mark exception selector register as live in.
   Reg = TLI.getExceptionSelectorRegister();
   if (Reg) MBB->addLiveIn(Reg);
-
-  // FIXME: Hack around an exception handling flaw (PR1508): the personality
-  // function and list of typeids logically belong to the invoke (or, if you
-  // like, the basic block containing the invoke), and need to be associated
-  // with it in the dwarf exception handling tables.  Currently however the
-  // information is provided by an intrinsic (eh.selector) that can be moved
-  // to unexpected places by the optimizers: if the unwind edge is critical,
-  // then breaking it can result in the intrinsics being in the successor of
-  // the landing pad, not the landing pad itself.  This results
-  // in exceptions not being caught because no typeids are associated with
-  // the invoke.  This may not be the only way things can go wrong, but it
-  // is the only way we try to work around for the moment.
-  const BasicBlock *LLVMBB = MBB->getBasicBlock();
-  const BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator());
-
-  if (Br && Br->isUnconditional()) { // Critical edge?
-    BasicBlock::const_iterator I, E;
-    for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I)
-      if (isa<EHSelectorInst>(I))
-        break;
-
-    if (I == E)
-      // No catch info found - try to extract some from the successor.
-      CopyCatchInfo(Br->getSuccessor(0), LLVMBB, &MF->getMMI(), *FuncInfo);
-  }
 }
 
 /// TryToFoldFastISelLoad - We're checking to see if we can fold the specified
@@ -822,10 +877,90 @@ static bool isFoldedOrDeadInstruction(const Instruction *I,
          !FuncInfo->isExportedInst(I); // Exported instrs must be computed.
 }
 
+#ifndef NDEBUG
+// Collect per Instruction statistics for fast-isel misses.  Only those
+// instructions that cause the bail are accounted for.  It does not account for
+// instructions higher in the block.  Thus, summing the per instructions stats
+// will not add up to what is reported by NumFastIselFailures.
+static void collectFailStats(const Instruction *I) {
+  switch (I->getOpcode()) {
+  default: assert (0 && "<Invalid operator> ");
+
+  // Terminators
+  case Instruction::Ret:         NumFastIselFailRet++; return;
+  case Instruction::Br:          NumFastIselFailBr++; return;
+  case Instruction::Switch:      NumFastIselFailSwitch++; return;
+  case Instruction::IndirectBr:  NumFastIselFailIndirectBr++; return;
+  case Instruction::Invoke:      NumFastIselFailInvoke++; return;
+  case Instruction::Resume:      NumFastIselFailResume++; return;
+  case Instruction::Unreachable: NumFastIselFailUnreachable++; return;
+
+  // Standard binary operators...
+  case Instruction::Add:  NumFastIselFailAdd++; return;
+  case Instruction::FAdd: NumFastIselFailFAdd++; return;
+  case Instruction::Sub:  NumFastIselFailSub++; return;
+  case Instruction::FSub: NumFastIselFailFSub++; return;
+  case Instruction::Mul:  NumFastIselFailMul++; return;
+  case Instruction::FMul: NumFastIselFailFMul++; return;
+  case Instruction::UDiv: NumFastIselFailUDiv++; return;
+  case Instruction::SDiv: NumFastIselFailSDiv++; return;
+  case Instruction::FDiv: NumFastIselFailFDiv++; return;
+  case Instruction::URem: NumFastIselFailURem++; return;
+  case Instruction::SRem: NumFastIselFailSRem++; return;
+  case Instruction::FRem: NumFastIselFailFRem++; return;
+
+  // Logical operators...
+  case Instruction::And: NumFastIselFailAnd++; return;
+  case Instruction::Or:  NumFastIselFailOr++; return;
+  case Instruction::Xor: NumFastIselFailXor++; return;
+
+  // Memory instructions...
+  case Instruction::Alloca:        NumFastIselFailAlloca++; return;
+  case Instruction::Load:          NumFastIselFailLoad++; return;
+  case Instruction::Store:         NumFastIselFailStore++; return;
+  case Instruction::AtomicCmpXchg: NumFastIselFailAtomicCmpXchg++; return;
+  case Instruction::AtomicRMW:     NumFastIselFailAtomicRMW++; return;
+  case Instruction::Fence:         NumFastIselFailFence++; return;
+  case Instruction::GetElementPtr: NumFastIselFailGetElementPtr++; return;
+
+  // Convert instructions...
+  case Instruction::Trunc:    NumFastIselFailTrunc++; return;
+  case Instruction::ZExt:     NumFastIselFailZExt++; return;
+  case Instruction::SExt:     NumFastIselFailSExt++; return;
+  case Instruction::FPTrunc:  NumFastIselFailFPTrunc++; return;
+  case Instruction::FPExt:    NumFastIselFailFPExt++; return;
+  case Instruction::FPToUI:   NumFastIselFailFPToUI++; return;
+  case Instruction::FPToSI:   NumFastIselFailFPToSI++; return;
+  case Instruction::UIToFP:   NumFastIselFailUIToFP++; return;
+  case Instruction::SIToFP:   NumFastIselFailSIToFP++; return;
+  case Instruction::IntToPtr: NumFastIselFailIntToPtr++; return;
+  case Instruction::PtrToInt: NumFastIselFailPtrToInt++; return;
+  case Instruction::BitCast:  NumFastIselFailBitCast++; return;
+
+  // Other instructions...
+  case Instruction::ICmp:           NumFastIselFailICmp++; return;
+  case Instruction::FCmp:           NumFastIselFailFCmp++; return;
+  case Instruction::PHI:            NumFastIselFailPHI++; return;
+  case Instruction::Select:         NumFastIselFailSelect++; return;
+  case Instruction::Call:           NumFastIselFailCall++; return;
+  case Instruction::Shl:            NumFastIselFailShl++; return;
+  case Instruction::LShr:           NumFastIselFailLShr++; return;
+  case Instruction::AShr:           NumFastIselFailAShr++; return;
+  case Instruction::VAArg:          NumFastIselFailVAArg++; return;
+  case Instruction::ExtractElement: NumFastIselFailExtractElement++; return;
+  case Instruction::InsertElement:  NumFastIselFailInsertElement++; return;
+  case Instruction::ShuffleVector:  NumFastIselFailShuffleVector++; return;
+  case Instruction::ExtractValue:   NumFastIselFailExtractValue++; return;
+  case Instruction::InsertValue:    NumFastIselFailInsertValue++; return;
+  case Instruction::LandingPad:     NumFastIselFailLandingPad++; return;
+  }
+}
+#endif
+
 void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
   // Initialize the Fast-ISel state, if needed.
   FastISel *FastIS = 0;
-  if (EnableFastISel)
+  if (TM.Options.EnableFastISel)
     FastIS = TLI.createFastISel(*FuncInfo);
 
   // Iterate over all basic blocks in the function.
@@ -894,13 +1029,16 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
           FastIS->setLastLocalValue(0);
       }
 
+      unsigned NumFastIselRemaining = std::distance(Begin, End);
       // Do FastISel on as many instructions as possible.
       for (; BI != Begin; --BI) {
         const Instruction *Inst = llvm::prior(BI);
 
         // If we no longer require this instruction, skip it.
-        if (isFoldedOrDeadInstruction(Inst, FuncInfo))
+        if (isFoldedOrDeadInstruction(Inst, FuncInfo)) {
+          --NumFastIselRemaining;
           continue;
+        }
 
         // Bottom-up: reset the insert pos at the top, after any local-value
         // instructions.
@@ -908,6 +1046,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
 
         // Try to select the instruction with FastISel.
         if (FastIS->SelectInstruction(Inst)) {
+          --NumFastIselRemaining;
           ++NumFastIselSuccess;
           // If fast isel succeeded, skip over all the folded instructions, and
           // then see if there is a load right before the selected instructions.
@@ -920,15 +1059,23 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
           }
           if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) &&
               BeforeInst->hasOneUse() &&
-              TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS))
+              TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS)) {
             // If we succeeded, don't re-select the load.
             BI = llvm::next(BasicBlock::const_iterator(BeforeInst));
+            --NumFastIselRemaining;
+            ++NumFastIselSuccess;
+          }
           continue;
         }
 
+#ifndef NDEBUG
+        if (EnableFastISelVerbose2)
+          collectFailStats(Inst);
+#endif
+
         // Then handle certain instructions as single-LLVM-Instruction blocks.
         if (isa<CallInst>(Inst)) {
-          ++NumFastIselFailures;
+
           if (EnableFastISelVerbose || EnableFastISelAbort) {
             dbgs() << "FastISel missed call: ";
             Inst->dump();
@@ -943,24 +1090,30 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
           bool HadTailCall = false;
           SelectBasicBlock(Inst, BI, HadTailCall);
 
+          // Recompute NumFastIselRemaining as Selection DAG instruction
+          // selection may have handled the call, input args, etc.
+          unsigned RemainingNow = std::distance(Begin, BI);
+          NumFastIselFailures += NumFastIselRemaining - RemainingNow;
+
           // If the call was emitted as a tail call, we're done with the block.
           if (HadTailCall) {
             --BI;
             break;
           }
 
+          NumFastIselRemaining = RemainingNow;
           continue;
         }
 
         if (isa<TerminatorInst>(Inst) && !isa<BranchInst>(Inst)) {
           // Don't abort, and use a different message for terminator misses.
-          ++NumFastIselFailures;
+          NumFastIselFailures += NumFastIselRemaining;
           if (EnableFastISelVerbose || EnableFastISelAbort) {
             dbgs() << "FastISel missed terminator: ";
             Inst->dump();
           }
         } else {
-          ++NumFastIselFailures;
+          NumFastIselFailures += NumFastIselRemaining;
           if (EnableFastISelVerbose || EnableFastISelAbort) {
             dbgs() << "FastISel miss: ";
             Inst->dump();
@@ -1289,7 +1442,7 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS,
   APInt NeededMask = DesiredMask & ~ActualMask;
 
   APInt KnownZero, KnownOne;
-  CurDAG->ComputeMaskedBits(LHS, NeededMask, KnownZero, KnownOne);
+  CurDAG->ComputeMaskedBits(LHS, KnownZero, KnownOne);
 
   // If all the missing bits in the or are already known to be set, match!
   if ((NeededMask & KnownOne) == NeededMask)
@@ -2025,6 +2178,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
   case ISD::EntryToken:       // These nodes remain the same.
   case ISD::BasicBlock:
   case ISD::Register:
+  case ISD::RegisterMask:
   //case ISD::VALUETYPE:
   //case ISD::CONDCODE:
   case ISD::HANDLENODE:
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index cd1647b17b9b..6cde05aea82a 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -19,7 +19,6 @@
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetMachine.h"
@@ -28,7 +27,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/Config/config.h"
 using namespace llvm;
 
 namespace llvm {
@@ -148,7 +146,7 @@ std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
 void SelectionDAG::viewGraph(const std::string &Title) {
 // This code is only for debugging!
 #ifndef NDEBUG
-  ViewGraph(this, "dag." + getMachineFunction().getFunction()->getNameStr(),
+  ViewGraph(this, "dag." + getMachineFunction().getFunction()->getName(),
             false, Title);
 #else
   errs() << "SelectionDAG::viewGraph is only available in debug builds on "
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 907d8d9da1af..09a2b1f3d7a5 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -36,31 +36,9 @@ using namespace llvm;
 /// - the promotion of vector elements. This feature is disabled by default
 /// and only enabled using this flag.
 static cl::opt<bool>
-AllowPromoteIntElem("promote-elements", cl::Hidden,
+AllowPromoteIntElem("promote-elements", cl::Hidden, cl::init(true),
   cl::desc("Allow promotion of integer vector element types"));
 
-namespace llvm {
-TLSModel::Model getTLSModel(const GlobalValue *GV, Reloc::Model reloc) {
-  bool isLocal = GV->hasLocalLinkage();
-  bool isDeclaration = GV->isDeclaration();
-  // FIXME: what should we do for protected and internal visibility?
-  // For variables, is internal different from hidden?
-  bool isHidden = GV->hasHiddenVisibility();
-
-  if (reloc == Reloc::PIC_) {
-    if (isLocal || isHidden)
-      return TLSModel::LocalDynamic;
-    else
-      return TLSModel::GeneralDynamic;
-  } else {
-    if (!isDeclaration || isHidden)
-      return TLSModel::LocalExec;
-    else
-      return TLSModel::InitialExec;
-  }
-}
-}
-
 /// InitLibcallNames - Set default libcall names.
 ///
 static void InitLibcallNames(const char **Names) {
@@ -572,21 +550,42 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
   // ConstantFP nodes default to expand.  Targets can either change this to
   // Legal, in which case all fp constants are legal, or use isFPImmLegal()
   // to optimize expansions for certain constants.
+  setOperationAction(ISD::ConstantFP, MVT::f16, Expand);
   setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
   setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
   setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
 
   // These library functions default to expand.
-  setOperationAction(ISD::FLOG , MVT::f64, Expand);
-  setOperationAction(ISD::FLOG2, MVT::f64, Expand);
-  setOperationAction(ISD::FLOG10,MVT::f64, Expand);
-  setOperationAction(ISD::FEXP , MVT::f64, Expand);
-  setOperationAction(ISD::FEXP2, MVT::f64, Expand);
-  setOperationAction(ISD::FLOG , MVT::f32, Expand);
-  setOperationAction(ISD::FLOG2, MVT::f32, Expand);
-  setOperationAction(ISD::FLOG10,MVT::f32, Expand);
-  setOperationAction(ISD::FEXP , MVT::f32, Expand);
-  setOperationAction(ISD::FEXP2, MVT::f32, Expand);
+  setOperationAction(ISD::FLOG ,  MVT::f16, Expand);
+  setOperationAction(ISD::FLOG2,  MVT::f16, Expand);
+  setOperationAction(ISD::FLOG10, MVT::f16, Expand);
+  setOperationAction(ISD::FEXP ,  MVT::f16, Expand);
+  setOperationAction(ISD::FEXP2,  MVT::f16, Expand);
+  setOperationAction(ISD::FFLOOR, MVT::f16, Expand);
+  setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand);
+  setOperationAction(ISD::FCEIL,  MVT::f16, Expand);
+  setOperationAction(ISD::FRINT,  MVT::f16, Expand);
+  setOperationAction(ISD::FTRUNC, MVT::f16, Expand);
+  setOperationAction(ISD::FLOG ,  MVT::f32, Expand);
+  setOperationAction(ISD::FLOG2,  MVT::f32, Expand);
+  setOperationAction(ISD::FLOG10, MVT::f32, Expand);
+  setOperationAction(ISD::FEXP ,  MVT::f32, Expand);
+  setOperationAction(ISD::FEXP2,  MVT::f32, Expand);
+  setOperationAction(ISD::FFLOOR, MVT::f32, Expand);
+  setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand);
+  setOperationAction(ISD::FCEIL,  MVT::f32, Expand);
+  setOperationAction(ISD::FRINT,  MVT::f32, Expand);
+  setOperationAction(ISD::FTRUNC, MVT::f32, Expand);
+  setOperationAction(ISD::FLOG ,  MVT::f64, Expand);
+  setOperationAction(ISD::FLOG2,  MVT::f64, Expand);
+  setOperationAction(ISD::FLOG10, MVT::f64, Expand);
+  setOperationAction(ISD::FEXP ,  MVT::f64, Expand);
+  setOperationAction(ISD::FEXP2,  MVT::f64, Expand);
+  setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
+  setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
+  setOperationAction(ISD::FCEIL,  MVT::f64, Expand);
+  setOperationAction(ISD::FRINT,  MVT::f64, Expand);
+  setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
 
   // Default ISD::TRAP to expand (which turns it into abort).
   setOperationAction(ISD::TRAP, MVT::Other, Expand);
@@ -610,7 +609,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
   ExceptionSelectorRegister = 0;
   BooleanContents = UndefinedBooleanContent;
   BooleanVectorContents = UndefinedBooleanContent;
-  SchedPreferenceInfo = Sched::Latency;
+  SchedPreferenceInfo = Sched::ILP;
   JumpBufSize = 0;
   JumpBufAlignment = 0;
   MinFunctionAlignment = 0;
@@ -1080,8 +1079,12 @@ unsigned TargetLowering::getJumpTableEncoding() const {
 SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
                                                  SelectionDAG &DAG) const {
   // If our PIC model is GP relative, use the global offset table as the base.
-  if (getJumpTableEncoding() == MachineJumpTableInfo::EK_GPRel32BlockAddress)
+  unsigned JTEncoding = getJumpTableEncoding();
+
+  if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
+      (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
     return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy());
+
   return Table;
 }
 
@@ -1223,7 +1226,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     if (Depth != 0) {
       // If not at the root, Just compute the KnownZero/KnownOne bits to
       // simplify things downstream.
-      TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth);
+      TLO.DAG.ComputeMaskedBits(Op, KnownZero, KnownOne, Depth);
       return false;
     }
     // If this is the root being simplified, allow it to have multiple uses,
@@ -1242,8 +1245,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
   switch (Op.getOpcode()) {
   case ISD::Constant:
     // We know all of the bits for a constant!
-    KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & NewMask;
-    KnownZero = ~KnownOne & NewMask;
+    KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue();
+    KnownZero = ~KnownOne;
     return false;   // Don't fall through, will infinitely loop.
   case ISD::AND:
     // If the RHS is a constant, check to see if the LHS would be zero without
@@ -1253,8 +1256,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
       APInt LHSZero, LHSOne;
       // Do not increment Depth here; that can cause an infinite loop.
-      TLO.DAG.ComputeMaskedBits(Op.getOperand(0), NewMask,
-                                LHSZero, LHSOne, Depth);
+      TLO.DAG.ComputeMaskedBits(Op.getOperand(0), LHSZero, LHSOne, Depth);
       // If the LHS already has zeros where RHSC does, this and is dead.
       if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
         return TLO.CombineTo(Op, Op.getOperand(0));
@@ -1473,9 +1475,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) {
         SDValue InnerOp = InOp.getNode()->getOperand(0);
         EVT InnerVT = InnerOp.getValueType();
-        if ((APInt::getHighBitsSet(BitWidth,
-                                   BitWidth - InnerVT.getSizeInBits()) &
-               DemandedMask) == 0 &&
+        unsigned InnerBits = InnerVT.getSizeInBits();
+        if (ShAmt < InnerBits && NewMask.lshr(InnerBits) == 0 &&
             isTypeDesirableForOp(ISD::SHL, InnerVT)) {
           EVT ShTy = getShiftAmountTy(InnerVT);
           if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
@@ -1545,7 +1546,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     // always convert this into a logical shr, even if the shift amount is
     // variable.  The low bit of the shift cannot be an input sign bit unless
     // the shift amount is >= the size of the datatype, which is undefined.
-    if (DemandedMask == 1)
+    if (NewMask == 1)
       return TLO.CombineTo(Op,
                            TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),
                                            Op.getOperand(0), Op.getOperand(1)));
@@ -1588,23 +1589,40 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     }
     break;
   case ISD::SIGN_EXTEND_INREG: {
-    EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+    EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+
+    APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1);
+    // If we only care about the highest bit, don't bother shifting right.
+    if (MsbMask == DemandedMask) {
+      unsigned ShAmt = ExVT.getScalarType().getSizeInBits();
+      SDValue InOp = Op.getOperand(0);
+
+      // Compute the correct shift amount type, which must be getShiftAmountTy
+      // for scalar types after legalization.
+      EVT ShiftAmtTy = Op.getValueType();
+      if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
+        ShiftAmtTy = getShiftAmountTy(ShiftAmtTy);
+
+      SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, ShiftAmtTy);
+      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
+                                            Op.getValueType(), InOp, ShiftAmt));
+    }
 
     // Sign extension.  Compute the demanded bits in the result that are not
     // present in the input.
     APInt NewBits =
       APInt::getHighBitsSet(BitWidth,
-                            BitWidth - EVT.getScalarType().getSizeInBits());
+                            BitWidth - ExVT.getScalarType().getSizeInBits());
 
     // If none of the extended bits are demanded, eliminate the sextinreg.
     if ((NewBits & NewMask) == 0)
       return TLO.CombineTo(Op, Op.getOperand(0));
 
     APInt InSignBit =
-      APInt::getSignBit(EVT.getScalarType().getSizeInBits()).zext(BitWidth);
+      APInt::getSignBit(ExVT.getScalarType().getSizeInBits()).zext(BitWidth);
     APInt InputDemandedBits =
       APInt::getLowBitsSet(BitWidth,
-                           EVT.getScalarType().getSizeInBits()) &
+                           ExVT.getScalarType().getSizeInBits()) &
       NewMask;
 
     // Since the sign extended bits are demanded, we know that the sign
@@ -1622,7 +1640,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     // If the input sign bit is known zero, convert this into a zero extension.
     if (KnownZero.intersects(InSignBit))
       return TLO.CombineTo(Op,
-                           TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,EVT));
+                          TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,ExVT));
 
     if (KnownOne.intersects(InSignBit)) {    // Input sign bit known set
       KnownOne |= NewBits;
@@ -1688,11 +1706,11 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
 
     // If the sign bit is known one, the top bits match.
     if (KnownOne.intersects(InSignBit)) {
-      KnownOne  |= NewBits;
-      KnownZero &= ~NewBits;
+      KnownOne |= NewBits;
+      assert((KnownZero & NewBits) == 0);
     } else {   // Otherwise, top bits aren't known.
-      KnownOne  &= ~NewBits;
-      KnownZero &= ~NewBits;
+      assert((KnownOne & NewBits) == 0);
+      assert((KnownZero & NewBits) == 0);
     }
     break;
   }
@@ -1783,7 +1801,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
   case ISD::BITCAST:
     // If this is an FP->Int bitcast and if the sign bit is the only
     // thing demanded, turn this into a FGETSIGN.
-    if (!Op.getOperand(0).getValueType().isVector() &&
+    if (!TLO.LegalOperations() &&
+        !Op.getValueType().isVector() &&
+        !Op.getOperand(0).getValueType().isVector() &&
         NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) &&
         Op.getOperand(0).getValueType().isFloatingPoint()) {
       bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType());
@@ -1824,7 +1844,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
   // FALL THROUGH
   default:
     // Just use ComputeMaskedBits to compute output bits.
-    TLO.DAG.ComputeMaskedBits(Op, NewMask, KnownZero, KnownOne, Depth);
+    TLO.DAG.ComputeMaskedBits(Op, KnownZero, KnownOne, Depth);
     break;
   }
 
@@ -1840,7 +1860,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
 /// in Mask are known to be either zero or one and return them in the
 /// KnownZero/KnownOne bitsets.
 void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
-                                                    const APInt &Mask,
                                                     APInt &KnownZero,
                                                     APInt &KnownOne,
                                                     const SelectionDAG &DAG,
@@ -1851,7 +1870,7 @@ void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
          "Should use MaskedValueIsZero if you don't know whether Op"
          " is a target node!");
-  KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+  KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0);
 }
 
 /// ComputeNumSignBitsForTargetNode - This method can be implemented by
@@ -1895,9 +1914,8 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {
   // Fall back to ComputeMaskedBits to catch other known cases.
   EVT OpVT = Val.getValueType();
   unsigned BitWidth = OpVT.getScalarType().getSizeInBits();
-  APInt Mask = APInt::getAllOnesValue(BitWidth);
   APInt KnownZero, KnownOne;
-  DAG.ComputeMaskedBits(Val, Mask, KnownZero, KnownOne);
+  DAG.ComputeMaskedBits(Val, KnownZero, KnownOne);
   return (KnownZero.countPopulation() == BitWidth - 1) &&
          (KnownOne.countPopulation() == 1);
 }
@@ -2060,7 +2078,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
           SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
                                 Lod->getPointerInfo().getWithOffset(bestOffset),
-                                        false, false, NewAlign);
+                                        false, false, false, NewAlign);
           return DAG.getSetCC(dl, VT,
                               DAG.getNode(ISD::AND, dl, newVT, NewLoad,
                                       DAG.getConstant(bestMask.trunc(bestWidth),
@@ -2393,8 +2411,15 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
 
   if (N0 == N1) {
     // We can always fold X == X for integer setcc's.
-    if (N0.getValueType().isInteger())
-      return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
+    if (N0.getValueType().isInteger()) {
+      switch (getBooleanContents(N0.getValueType().isVector())) {
+      case UndefinedBooleanContent: 
+      case ZeroOrOneBooleanContent: 
+        return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
+      case ZeroOrNegativeOneBooleanContent:
+        return DAG.getConstant(ISD::isTrueWhenEqual(Cond) ? -1 : 0, VT);
+      }
+    }
     unsigned UOF = ISD::getUnorderedFlavor(Cond);
     if (UOF == 2)   // FP operators that are undefined on NaNs.
       return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
@@ -2428,6 +2453,10 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         }
       }
 
+      // If RHS is a legal immediate value for a compare instruction, we need
+      // to be careful about increasing register pressure needlessly.
+      bool LegalRHSImm = false;
+
       if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) {
         if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
           // Turn (X+C1) == C2 --> X == C2-C1
@@ -2462,25 +2491,33 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                            Cond);
           }
         }
+
+        // Could RHSC fold directly into a compare?
+        if (RHSC->getValueType(0).getSizeInBits() <= 64)
+          LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
       }
 
       // Simplify (X+Z) == X -->  Z == 0
-      if (N0.getOperand(0) == N1)
-        return DAG.getSetCC(dl, VT, N0.getOperand(1),
-                        DAG.getConstant(0, N0.getValueType()), Cond);
-      if (N0.getOperand(1) == N1) {
-        if (DAG.isCommutativeBinOp(N0.getOpcode()))
-          return DAG.getSetCC(dl, VT, N0.getOperand(0),
-                          DAG.getConstant(0, N0.getValueType()), Cond);
-        else if (N0.getNode()->hasOneUse()) {
-          assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
-          // (Z-X) == X  --> Z == X<<1
-          SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(),
-                                     N1,
+      // Don't do this if X is an immediate that can fold into a cmp
+      // instruction and X+Z has other uses. It could be an induction variable
+      // chain, and the transform would increase register pressure.
+      if (!LegalRHSImm || N0.getNode()->hasOneUse()) {
+        if (N0.getOperand(0) == N1)
+          return DAG.getSetCC(dl, VT, N0.getOperand(1),
+                              DAG.getConstant(0, N0.getValueType()), Cond);
+        if (N0.getOperand(1) == N1) {
+          if (DAG.isCommutativeBinOp(N0.getOpcode()))
+            return DAG.getSetCC(dl, VT, N0.getOperand(0),
+                                DAG.getConstant(0, N0.getValueType()), Cond);
+          else if (N0.getNode()->hasOneUse()) {
+            assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
+            // (Z-X) == X  --> Z == X<<1
+            SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N1,
                        DAG.getConstant(1, getShiftAmountTy(N1.getValueType())));
-          if (!DCI.isCalledByLegalizer())
-            DCI.AddToWorklist(SH.getNode());
-          return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
+            if (!DCI.isCalledByLegalizer())
+              DCI.AddToWorklist(SH.getNode());
+            return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
+          }
         }
       }
     }
@@ -2984,7 +3021,6 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
 /// is.
 static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
   switch (CT) {
-  default: llvm_unreachable("Unknown constraint type!");
   case TargetLowering::C_Other:
   case TargetLowering::C_Unknown:
     return 0;
@@ -2995,6 +3031,7 @@ static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
   case TargetLowering::C_Memory:
     return 3;
   }
+  llvm_unreachable("Invalid constraint type");
 }
 
 /// Examine constraint type and operand type and determine a weight value.
@@ -3242,8 +3279,9 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl,
 /// return a DAG expression to select that will generate the same value by
 /// multiplying by a magic number.  See:
 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
-SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
-                                  std::vector<SDNode*>* Created) const {
+SDValue TargetLowering::
+BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
+          std::vector<SDNode*>* Created) const {
   EVT VT = N->getValueType(0);
   DebugLoc dl= N->getDebugLoc();
 
@@ -3258,10 +3296,12 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
   // Multiply the numerator (operand 0) by the magic value
   // FIXME: We should support doing a MUL in a wider type
   SDValue Q;
-  if (isOperationLegalOrCustom(ISD::MULHS, VT))
+  if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT) :
+                            isOperationLegalOrCustom(ISD::MULHS, VT))
     Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0),
                     DAG.getConstant(magics.m, VT));
-  else if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT))
+  else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT) :
+                                 isOperationLegalOrCustom(ISD::SMUL_LOHI, VT))
     Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT),
                               N->getOperand(0),
                               DAG.getConstant(magics.m, VT)).getNode(), 1);
@@ -3299,8 +3339,9 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
 /// return a DAG expression to select that will generate the same value by
 /// multiplying by a magic number.  See:
 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
-SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
-                                  std::vector<SDNode*>* Created) const {
+SDValue TargetLowering::
+BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
+          std::vector<SDNode*>* Created) const {
   EVT VT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
 
@@ -3332,9 +3373,11 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
 
   // Multiply the numerator (operand 0) by the magic value
   // FIXME: We should support doing a MUL in a wider type
-  if (isOperationLegalOrCustom(ISD::MULHU, VT))
+  if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT) :
+                            isOperationLegalOrCustom(ISD::MULHU, VT))
     Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, VT));
-  else if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT))
+  else if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT) :
+                                 isOperationLegalOrCustom(ISD::UMUL_LOHI, VT))
     Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q,
                             DAG.getConstant(magics.m, VT)).getNode(), 1);
   else
diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp
index 2609256c8ffa..0016047a134e 100644
--- a/lib/CodeGen/ShadowStackGC.cpp
+++ b/lib/CodeGen/ShadowStackGC.cpp
@@ -116,8 +116,7 @@ namespace {
           // Branches and invokes do not escape, only unwind, resume, and return
           // do.
           TerminatorInst *TI = CurBB->getTerminator();
-          if (!isa<UnwindInst>(TI) && !isa<ReturnInst>(TI) &&
-              !isa<ResumeInst>(TI))
+          if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI))
             continue;
 
           Builder.SetInsertPoint(TI->getParent(), TI);
diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp
index 160f38f69236..21ae2f5e56eb 100644
--- a/lib/CodeGen/ShrinkWrapping.cpp
+++ b/lib/CodeGen/ShrinkWrapping.cpp
@@ -93,6 +93,7 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
   }
   AU.addPreserved<MachineLoopInfo>();
   AU.addPreserved<MachineDominatorTree>();
+  AU.addRequired<TargetPassConfig>();
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
@@ -124,7 +125,7 @@ MachineLoop* PEI::getTopLevelLoopParent(MachineLoop *LP) {
 }
 
 bool PEI::isReturnBlock(MachineBasicBlock* MBB) {
-  return (MBB && !MBB->empty() && MBB->back().getDesc().isReturn());
+  return (MBB && !MBB->empty() && MBB->back().isReturn());
 }
 
 // Initialize shrink wrapping DFA sets, called before iterations.
@@ -158,7 +159,7 @@ void PEI::initShrinkWrappingInfo() {
   // via --shrink-wrap-func=<funcname>.
 #ifndef NDEBUG
   if (ShrinkWrapFunc != "") {
-    std::string MFName = MF->getFunction()->getNameStr();
+    std::string MFName = MF->getFunction()->getName().str();
     ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc);
   }
 #endif
@@ -1045,7 +1046,7 @@ std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) {
     return "";
 
   if (MBB->getBasicBlock())
-    return MBB->getBasicBlock()->getNameStr();
+    return MBB->getBasicBlock()->getName().str();
 
   std::ostringstream name;
   name << "_MBB_" << MBB->getNumber();
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index ded2459d4278..9a86f32d8f96 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -1,4 +1,4 @@
-//===- SjLjEHPass.cpp - Eliminate Invoke & Unwind instructions -----------===//
+//===- SjLjEHPrepare.cpp - Eliminate Invoke & Unwind instructions ---------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -29,21 +29,20 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include <set>
 using namespace llvm;
 
-static cl::opt<bool> DisableOldSjLjEH("disable-old-sjlj-eh", cl::Hidden,
-    cl::desc("Disable the old SjLj EH preparation pass"));
-
 STATISTIC(NumInvokes, "Number of invokes replaced");
-STATISTIC(NumUnwinds, "Number of unwinds replaced");
 STATISTIC(NumSpilled, "Number of registers live across unwind edges");
 
 namespace {
-  class SjLjEHPass : public FunctionPass {
+  class SjLjEHPrepare : public FunctionPass {
     const TargetLowering *TLI;
     Type *FunctionContextTy;
     Constant *RegisterFn;
@@ -54,16 +53,12 @@ namespace {
     Constant *StackRestoreFn;
     Constant *LSDAAddrFn;
     Value *PersonalityFn;
-    Constant *SelectorFn;
-    Constant *ExceptionFn;
     Constant *CallSiteFn;
-    Constant *DispatchSetupFn;
     Constant *FuncCtxFn;
-    Value *CallSite;
-    DenseMap<InvokeInst*, BasicBlock*> LPadSuccMap;
+    AllocaInst *FuncCtx;
   public:
     static char ID; // Pass identification, replacement for typeid
-    explicit SjLjEHPass(const TargetLowering *tli = NULL)
+    explicit SjLjEHPrepare(const TargetLowering *tli = NULL)
       : FunctionPass(ID), TLI(tli) { }
     bool doInitialization(Module &M);
     bool runOnFunction(Function &F);
@@ -75,28 +70,24 @@ namespace {
 
   private:
     bool setupEntryBlockAndCallSites(Function &F);
+    void substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
+                              Value *SelVal);
     Value *setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads);
     void lowerIncomingArguments(Function &F);
     void lowerAcrossUnwindEdges(Function &F, ArrayRef<InvokeInst*> Invokes);
-
-    void insertCallSiteStore(Instruction *I, int Number, Value *CallSite);
-    void markInvokeCallSite(InvokeInst *II, int InvokeNo, Value *CallSite,
-                            SwitchInst *CatchSwitch);
-    void splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes);
-    void splitLandingPad(InvokeInst *II);
-    bool insertSjLjEHSupport(Function &F);
+    void insertCallSiteStore(Instruction *I, int Number);
   };
 } // end anonymous namespace
 
-char SjLjEHPass::ID = 0;
+char SjLjEHPrepare::ID = 0;
 
-// Public Interface To the SjLjEHPass pass.
-FunctionPass *llvm::createSjLjEHPass(const TargetLowering *TLI) {
-  return new SjLjEHPass(TLI);
+// Public Interface To the SjLjEHPrepare pass.
+FunctionPass *llvm::createSjLjEHPreparePass(const TargetLowering *TLI) {
+  return new SjLjEHPrepare(TLI);
 }
 // doInitialization - Set up decalarations and types needed to process
 // exceptions.
-bool SjLjEHPass::doInitialization(Module &M) {
+bool SjLjEHPrepare::doInitialization(Module &M) {
   // Build the function context structure.
   // builtin_setjmp uses a five word jbuf
   Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
@@ -123,11 +114,7 @@ bool SjLjEHPass::doInitialization(Module &M) {
   StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
   BuiltinSetjmpFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp);
   LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda);
-  SelectorFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_selector);
-  ExceptionFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_exception);
   CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite);
-  DispatchSetupFn
-    = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_dispatch_setup);
   FuncCtxFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_functioncontext);
   PersonalityFn = 0;
 
@@ -136,583 +123,67 @@ bool SjLjEHPass::doInitialization(Module &M) {
 
 /// insertCallSiteStore - Insert a store of the call-site value to the
 /// function context
-void SjLjEHPass::insertCallSiteStore(Instruction *I, int Number,
-                                     Value *CallSite) {
-  ConstantInt *CallSiteNoC = ConstantInt::get(Type::getInt32Ty(I->getContext()),
-                                              Number);
-  // Insert a store of the call-site number
-  new StoreInst(CallSiteNoC, CallSite, true, I);  // volatile
-}
-
-/// splitLandingPad - Split a landing pad. This takes considerable care because
-/// of PHIs and other nasties. The problem is that the jump table needs to jump
-/// to the landing pad block. However, the landing pad block can be jumped to
-/// only by an invoke instruction. So we clone the landingpad instruction into
-/// its own basic block, have the invoke jump to there. The landingpad
-/// instruction's basic block's successor is now the target for the jump table.
-///
-/// But because of PHI nodes, we need to create another basic block for the jump
-/// table to jump to. This is definitely a hack, because the values for the PHI
-/// nodes may not be defined on the edge from the jump table. But that's okay,
-/// because the jump table is simply a construct to mimic what is happening in
-/// the CFG. So the values are mysteriously there, even though there is no value
-/// for the PHI from the jump table's edge (hence calling this a hack).
-void SjLjEHPass::splitLandingPad(InvokeInst *II) {
-  SmallVector<BasicBlock*, 2> NewBBs;
-  SplitLandingPadPredecessors(II->getUnwindDest(), II->getParent(),
-                              ".1", ".2", this, NewBBs);
-
-  // Create an empty block so that the jump table has something to jump to
-  // which doesn't have any PHI nodes.
-  BasicBlock *LPad = NewBBs[0];
-  BasicBlock *Succ = *succ_begin(LPad);
-  BasicBlock *JumpTo = BasicBlock::Create(II->getContext(), "jt.land",
-                                          LPad->getParent(), Succ);
-  LPad->getTerminator()->eraseFromParent();
-  BranchInst::Create(JumpTo, LPad);
-  BranchInst::Create(Succ, JumpTo);
-  LPadSuccMap[II] = JumpTo;
-
-  for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
-    PHINode *PN = cast<PHINode>(I);
-    Value *Val = PN->removeIncomingValue(LPad, false);
-    PN->addIncoming(Val, JumpTo);
-  }
-}
-
-/// markInvokeCallSite - Insert code to mark the call_site for this invoke
-void SjLjEHPass::markInvokeCallSite(InvokeInst *II, int InvokeNo,
-                                    Value *CallSite,
-                                    SwitchInst *CatchSwitch) {
-  ConstantInt *CallSiteNoC= ConstantInt::get(Type::getInt32Ty(II->getContext()),
-                                              InvokeNo);
-  // The runtime comes back to the dispatcher with the call_site - 1 in
-  // the context. Odd, but there it is.
-  ConstantInt *SwitchValC = ConstantInt::get(Type::getInt32Ty(II->getContext()),
-                                             InvokeNo - 1);
-
-  // If the unwind edge has phi nodes, split the edge.
-  if (isa<PHINode>(II->getUnwindDest()->begin())) {
-    // FIXME: New EH - This if-condition will be always true in the new scheme.
-    if (II->getUnwindDest()->isLandingPad())
-      splitLandingPad(II);
-    else
-      SplitCriticalEdge(II, 1, this);
-
-    // If there are any phi nodes left, they must have a single predecessor.
-    while (PHINode *PN = dyn_cast<PHINode>(II->getUnwindDest()->begin())) {
-      PN->replaceAllUsesWith(PN->getIncomingValue(0));
-      PN->eraseFromParent();
-    }
-  }
+void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) {
+  IRBuilder<> Builder(I);
 
-  // Insert the store of the call site value
-  insertCallSiteStore(II, InvokeNo, CallSite);
-
-  // Record the call site value for the back end so it stays associated with
-  // the invoke.
-  CallInst::Create(CallSiteFn, CallSiteNoC, "", II);
-
-  // Add a switch case to our unwind block.
-  if (BasicBlock *SuccBB = LPadSuccMap[II]) {
-    CatchSwitch->addCase(SwitchValC, SuccBB);
-  } else {
-    CatchSwitch->addCase(SwitchValC, II->getUnwindDest());
-  }
+  // Get a reference to the call_site field.
+  Type *Int32Ty = Type::getInt32Ty(I->getContext());
+  Value *Zero = ConstantInt::get(Int32Ty, 0);
+  Value *One = ConstantInt::get(Int32Ty, 1);
+  Value *Idxs[2] = { Zero, One };
+  Value *CallSite = Builder.CreateGEP(FuncCtx, Idxs, "call_site");
 
-  // We still want this to look like an invoke so we emit the LSDA properly,
-  // so we don't transform the invoke into a call here.
+  // Insert a store of the call-site number
+  ConstantInt *CallSiteNoC = ConstantInt::get(Type::getInt32Ty(I->getContext()),
+                                              Number);
+  Builder.CreateStore(CallSiteNoC, CallSite, true/*volatile*/);
 }
 
 /// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until
 /// we reach blocks we've already seen.
-static void MarkBlocksLiveIn(BasicBlock *BB, std::set<BasicBlock*> &LiveBBs) {
-  if (!LiveBBs.insert(BB).second) return; // already been here.
+static void MarkBlocksLiveIn(BasicBlock *BB,
+                             SmallPtrSet<BasicBlock*, 64> &LiveBBs) {
+  if (!LiveBBs.insert(BB)) return; // already been here.
 
   for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
     MarkBlocksLiveIn(*PI, LiveBBs);
 }
 
-/// splitLiveRangesAcrossInvokes - Each value that is live across an unwind edge
-/// we spill into a stack location, guaranteeing that there is nothing live
-/// across the unwind edge.  This process also splits all critical edges
-/// coming out of invoke's.
-/// FIXME: Move this function to a common utility file (Local.cpp?) so
-/// both SjLj and LowerInvoke can use it.
-void SjLjEHPass::
-splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) {
-  // First step, split all critical edges from invoke instructions.
-  for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
-    InvokeInst *II = Invokes[i];
-    SplitCriticalEdge(II, 0, this);
-
-    // FIXME: New EH - This if-condition will be always true in the new scheme.
-    if (II->getUnwindDest()->isLandingPad())
-      splitLandingPad(II);
-    else
-      SplitCriticalEdge(II, 1, this);
-
-    assert(!isa<PHINode>(II->getNormalDest()) &&
-           !isa<PHINode>(II->getUnwindDest()) &&
-           "Critical edge splitting left single entry phi nodes?");
-  }
-
-  Function *F = Invokes.back()->getParent()->getParent();
-
-  // To avoid having to handle incoming arguments specially, we lower each arg
-  // to a copy instruction in the entry block.  This ensures that the argument
-  // value itself cannot be live across the entry block.
-  BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin();
-  while (isa<AllocaInst>(AfterAllocaInsertPt) &&
-        isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsertPt)->getArraySize()))
-    ++AfterAllocaInsertPt;
-  for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
-       AI != E; ++AI) {
-    Type *Ty = AI->getType();
-    // Aggregate types can't be cast, but are legal argument types, so we have
-    // to handle them differently. We use an extract/insert pair as a
-    // lightweight method to achieve the same goal.
-    if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
-      Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt);
-      Instruction *NI = InsertValueInst::Create(AI, EI, 0);
-      NI->insertAfter(EI);
-      AI->replaceAllUsesWith(NI);
-      // Set the operand of the instructions back to the AllocaInst.
-      EI->setOperand(0, AI);
-      NI->setOperand(0, AI);
-    } else {
-      // This is always a no-op cast because we're casting AI to AI->getType()
-      // so src and destination types are identical. BitCast is the only
-      // possibility.
-      CastInst *NC = new BitCastInst(
-        AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt);
-      AI->replaceAllUsesWith(NC);
-      // Set the operand of the cast instruction back to the AllocaInst.
-      // Normally it's forbidden to replace a CastInst's operand because it
-      // could cause the opcode to reflect an illegal conversion. However,
-      // we're replacing it here with the same value it was constructed with.
-      // We do this because the above replaceAllUsesWith() clobbered the
-      // operand, but we want this one to remain.
-      NC->setOperand(0, AI);
-    }
-  }
-
-  // Finally, scan the code looking for instructions with bad live ranges.
-  for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
-    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
-      // Ignore obvious cases we don't have to handle.  In particular, most
-      // instructions either have no uses or only have a single use inside the
-      // current block.  Ignore them quickly.
-      Instruction *Inst = II;
-      if (Inst->use_empty()) continue;
-      if (Inst->hasOneUse() &&
-          cast<Instruction>(Inst->use_back())->getParent() == BB &&
-          !isa<PHINode>(Inst->use_back())) continue;
-
-      // If this is an alloca in the entry block, it's not a real register
-      // value.
-      if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst))
-        if (isa<ConstantInt>(AI->getArraySize()) && BB == F->begin())
-          continue;
-
-      // Avoid iterator invalidation by copying users to a temporary vector.
-      SmallVector<Instruction*,16> Users;
-      for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
-           UI != E; ++UI) {
-        Instruction *User = cast<Instruction>(*UI);
-        if (User->getParent() != BB || isa<PHINode>(User))
-          Users.push_back(User);
-      }
-
-      // Find all of the blocks that this value is live in.
-      std::set<BasicBlock*> LiveBBs;
-      LiveBBs.insert(Inst->getParent());
-      while (!Users.empty()) {
-        Instruction *U = Users.back();
-        Users.pop_back();
-
-        if (!isa<PHINode>(U)) {
-          MarkBlocksLiveIn(U->getParent(), LiveBBs);
-        } else {
-          // Uses for a PHI node occur in their predecessor block.
-          PHINode *PN = cast<PHINode>(U);
-          for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
-            if (PN->getIncomingValue(i) == Inst)
-              MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs);
-        }
-      }
-
-      // Now that we know all of the blocks that this thing is live in, see if
-      // it includes any of the unwind locations.
-      bool NeedsSpill = false;
-      for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
-        BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
-        if (UnwindBlock != BB && LiveBBs.count(UnwindBlock))
-          NeedsSpill = true;
-      }
-
-      // If we decided we need a spill, do it.
-      // FIXME: Spilling this way is overkill, as it forces all uses of
-      // the value to be reloaded from the stack slot, even those that aren't
-      // in the unwind blocks. We should be more selective.
-      if (NeedsSpill) {
-        ++NumSpilled;
-        DemoteRegToStack(*Inst, true);
-      }
-    }
-}
-
-/// CreateLandingPadLoad - Load the exception handling values and insert them
-/// into a structure.
-static Instruction *CreateLandingPadLoad(Function &F, Value *ExnAddr,
-                                         Value *SelAddr,
-                                         BasicBlock::iterator InsertPt) {
-  Value *Exn = new LoadInst(ExnAddr, "exn", false,
-                            InsertPt);
-  Type *Ty = Type::getInt8PtrTy(F.getContext());
-  Exn = CastInst::Create(Instruction::IntToPtr, Exn, Ty, "", InsertPt);
-  Value *Sel = new LoadInst(SelAddr, "sel", false, InsertPt);
-
-  Ty = StructType::get(Exn->getType(), Sel->getType(), NULL);
-  InsertValueInst *LPadVal = InsertValueInst::Create(llvm::UndefValue::get(Ty),
-                                                     Exn, 0,
-                                                     "lpad.val", InsertPt);
-  return InsertValueInst::Create(LPadVal, Sel, 1, "lpad.val", InsertPt);
-}
-
-/// ReplaceLandingPadVal - Replace the landingpad instruction's value with a
-/// load from the stored values (via CreateLandingPadLoad). This looks through
-/// PHI nodes, and removes them if they are dead.
-static void ReplaceLandingPadVal(Function &F, Instruction *Inst, Value *ExnAddr,
-                                 Value *SelAddr) {
-  if (Inst->use_empty()) return;
-
-  while (!Inst->use_empty()) {
-    Instruction *I = cast<Instruction>(Inst->use_back());
-
-    if (PHINode *PN = dyn_cast<PHINode>(I)) {
-      ReplaceLandingPadVal(F, PN, ExnAddr, SelAddr);
-      if (PN->use_empty()) PN->eraseFromParent();
-      continue;
-    }
-
-    I->replaceUsesOfWith(Inst, CreateLandingPadLoad(F, ExnAddr, SelAddr, I));
-  }
-}
-
-bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
-  SmallVector<ReturnInst*,16> Returns;
-  SmallVector<UnwindInst*,16> Unwinds;
-  SmallVector<InvokeInst*,16> Invokes;
-
-  // Look through the terminators of the basic blocks to find invokes, returns
-  // and unwinds.
-  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
-    if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
-      // Remember all return instructions in case we insert an invoke into this
-      // function.
-      Returns.push_back(RI);
-    } else if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
-      Invokes.push_back(II);
-    } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
-      Unwinds.push_back(UI);
-    }
-  }
-
-  NumInvokes += Invokes.size();
-  NumUnwinds += Unwinds.size();
-
-  // If we don't have any invokes, there's nothing to do.
-  if (Invokes.empty()) return false;
-
-  // Find the eh.selector.*, eh.exception and alloca calls.
-  //
-  // Remember any allocas() that aren't in the entry block, as the
-  // jmpbuf saved SP will need to be updated for them.
-  //
-  // We'll use the first eh.selector to determine the right personality
-  // function to use. For SJLJ, we always use the same personality for the
-  // whole function, not on a per-selector basis.
-  // FIXME: That's a bit ugly. Better way?
-  SmallVector<CallInst*,16> EH_Selectors;
-  SmallVector<CallInst*,16> EH_Exceptions;
-  SmallVector<Instruction*,16> JmpbufUpdatePoints;
-
-  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
-    // Note: Skip the entry block since there's nothing there that interests
-    // us. eh.selector and eh.exception shouldn't ever be there, and we
-    // want to disregard any allocas that are there.
-    // 
-    // FIXME: This is awkward. The new EH scheme won't need to skip the entry
-    //        block.
-    if (BB == F.begin()) {
-      if (InvokeInst *II = dyn_cast<InvokeInst>(F.begin()->getTerminator())) {
-        // FIXME: This will be always non-NULL in the new EH.
-        if (LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst())
-          if (!PersonalityFn) PersonalityFn = LPI->getPersonalityFn();
-      }
-
-      continue;
-    }
-
-    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
-      if (CallInst *CI = dyn_cast<CallInst>(I)) {
-        if (CI->getCalledFunction() == SelectorFn) {
-          if (!PersonalityFn) PersonalityFn = CI->getArgOperand(1);
-          EH_Selectors.push_back(CI);
-        } else if (CI->getCalledFunction() == ExceptionFn) {
-          EH_Exceptions.push_back(CI);
-        } else if (CI->getCalledFunction() == StackRestoreFn) {
-          JmpbufUpdatePoints.push_back(CI);
-        }
-      } else if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
-        JmpbufUpdatePoints.push_back(AI);
-      } else if (InvokeInst *II = dyn_cast<InvokeInst>(I)) {
-        // FIXME: This will be always non-NULL in the new EH.
-        if (LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst())
-          if (!PersonalityFn) PersonalityFn = LPI->getPersonalityFn();
-      }
-    }
-  }
-
-  // If we don't have any eh.selector calls, we can't determine the personality
-  // function. Without a personality function, we can't process exceptions.
-  if (!PersonalityFn) return false;
-
-  // We have invokes, so we need to add register/unregister calls to get this
-  // function onto the global unwind stack.
-  //
-  // First thing we need to do is scan the whole function for values that are
-  // live across unwind edges.  Each value that is live across an unwind edge we
-  // spill into a stack location, guaranteeing that there is nothing live across
-  // the unwind edge.  This process also splits all critical edges coming out of
-  // invoke's.
-  splitLiveRangesAcrossInvokes(Invokes);
-
-
-  SmallVector<LandingPadInst*, 16> LandingPads;
-  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
-    if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
-      // FIXME: This will be always non-NULL in the new EH.
-      if (LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst())
-        LandingPads.push_back(LPI);
+/// substituteLPadValues - Substitute the values returned by the landingpad
+/// instruction with those returned by the personality function.
+void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
+                                         Value *SelVal) {
+  SmallVector<Value*, 8> UseWorkList(LPI->use_begin(), LPI->use_end());
+  while (!UseWorkList.empty()) {
+    Value *Val = UseWorkList.pop_back_val();
+    ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Val);
+    if (!EVI) continue;
+    if (EVI->getNumIndices() != 1) continue;
+    if (*EVI->idx_begin() == 0)
+      EVI->replaceAllUsesWith(ExnVal);
+    else if (*EVI->idx_begin() == 1)
+      EVI->replaceAllUsesWith(SelVal);
+    if (EVI->getNumUses() == 0)
+      EVI->eraseFromParent();
   }
 
+  if (LPI->getNumUses() == 0)  return;
 
-  BasicBlock *EntryBB = F.begin();
-  // Create an alloca for the incoming jump buffer ptr and the new jump buffer
-  // that needs to be restored on all exits from the function.  This is an
-  // alloca because the value needs to be added to the global context list.
-  unsigned Align = 4; // FIXME: Should be a TLI check?
-  AllocaInst *FunctionContext =
-    new AllocaInst(FunctionContextTy, 0, Align,
-                   "fcn_context", F.begin()->begin());
-
-  Value *Idxs[2];
-  Type *Int32Ty = Type::getInt32Ty(F.getContext());
-  Value *Zero = ConstantInt::get(Int32Ty, 0);
-  // We need to also keep around a reference to the call_site field
-  Idxs[0] = Zero;
-  Idxs[1] = ConstantInt::get(Int32Ty, 1);
-  CallSite = GetElementPtrInst::Create(FunctionContext, Idxs, "call_site",
-                                       EntryBB->getTerminator());
-
-  // The exception selector comes back in context->data[1]
-  Idxs[1] = ConstantInt::get(Int32Ty, 2);
-  Value *FCData = GetElementPtrInst::Create(FunctionContext, Idxs, "fc_data",
-                                            EntryBB->getTerminator());
-  Idxs[1] = ConstantInt::get(Int32Ty, 1);
-  Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs,
-                                                  "exc_selector_gep",
-                                                  EntryBB->getTerminator());
-  // The exception value comes back in context->data[0]
-  Idxs[1] = Zero;
-  Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs,
-                                                   "exception_gep",
-                                                   EntryBB->getTerminator());
-
-  // The result of the eh.selector call will be replaced with a a reference to
-  // the selector value returned in the function context. We leave the selector
-  // itself so the EH analysis later can use it.
-  for (int i = 0, e = EH_Selectors.size(); i < e; ++i) {
-    CallInst *I = EH_Selectors[i];
-    Value *SelectorVal = new LoadInst(SelectorAddr, "select_val", true, I);
-    I->replaceAllUsesWith(SelectorVal);
-  }
-
-  // eh.exception calls are replaced with references to the proper location in
-  // the context. Unlike eh.selector, the eh.exception calls are removed
-  // entirely.
-  for (int i = 0, e = EH_Exceptions.size(); i < e; ++i) {
-    CallInst *I = EH_Exceptions[i];
-    // Possible for there to be duplicates, so check to make sure the
-    // instruction hasn't already been removed.
-    if (!I->getParent()) continue;
-    Value *Val = new LoadInst(ExceptionAddr, "exception", true, I);
-    Type *Ty = Type::getInt8PtrTy(F.getContext());
-    Val = CastInst::Create(Instruction::IntToPtr, Val, Ty, "", I);
-
-    I->replaceAllUsesWith(Val);
-    I->eraseFromParent();
-  }
-
-  for (unsigned i = 0, e = LandingPads.size(); i != e; ++i)
-    ReplaceLandingPadVal(F, LandingPads[i], ExceptionAddr, SelectorAddr);
-
-  // The entry block changes to have the eh.sjlj.setjmp, with a conditional
-  // branch to a dispatch block for non-zero returns. If we return normally,
-  // we're not handling an exception and just register the function context and
-  // continue.
-
-  // Create the dispatch block.  The dispatch block is basically a big switch
-  // statement that goes to all of the invoke landing pads.
-  BasicBlock *DispatchBlock =
-    BasicBlock::Create(F.getContext(), "eh.sjlj.setjmp.catch", &F);
-
-  // Insert a load of the callsite in the dispatch block, and a switch on its
-  // value. By default, we issue a trap statement.
-  BasicBlock *TrapBlock =
-    BasicBlock::Create(F.getContext(), "trapbb", &F);
-  CallInst::Create(Intrinsic::getDeclaration(F.getParent(), Intrinsic::trap),
-                   "", TrapBlock);
-  new UnreachableInst(F.getContext(), TrapBlock);
-
-  Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true,
-                                     DispatchBlock);
-  SwitchInst *DispatchSwitch =
-    SwitchInst::Create(DispatchLoad, TrapBlock, Invokes.size(),
-                       DispatchBlock);
-  // Split the entry block to insert the conditional branch for the setjmp.
-  BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
-                                                   "eh.sjlj.setjmp.cont");
-
-  // Populate the Function Context
-  //   1. LSDA address
-  //   2. Personality function address
-  //   3. jmpbuf (save SP, FP and call eh.sjlj.setjmp)
-
-  // LSDA address
-  Idxs[0] = Zero;
-  Idxs[1] = ConstantInt::get(Int32Ty, 4);
-  Value *LSDAFieldPtr =
-    GetElementPtrInst::Create(FunctionContext, Idxs, "lsda_gep",
-                              EntryBB->getTerminator());
-  Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr",
-                                 EntryBB->getTerminator());
-  new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator());
-
-  Idxs[1] = ConstantInt::get(Int32Ty, 3);
-  Value *PersonalityFieldPtr =
-    GetElementPtrInst::Create(FunctionContext, Idxs, "lsda_gep",
-                              EntryBB->getTerminator());
-  new StoreInst(PersonalityFn, PersonalityFieldPtr, true,
-                EntryBB->getTerminator());
-
-  // Save the frame pointer.
-  Idxs[1] = ConstantInt::get(Int32Ty, 5);
-  Value *JBufPtr
-    = GetElementPtrInst::Create(FunctionContext, Idxs, "jbuf_gep",
-                                EntryBB->getTerminator());
-  Idxs[1] = ConstantInt::get(Int32Ty, 0);
-  Value *FramePtr =
-    GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_fp_gep",
-                              EntryBB->getTerminator());
-
-  Value *Val = CallInst::Create(FrameAddrFn,
-                                ConstantInt::get(Int32Ty, 0),
-                                "fp",
-                                EntryBB->getTerminator());
-  new StoreInst(Val, FramePtr, true, EntryBB->getTerminator());
-
-  // Save the stack pointer.
-  Idxs[1] = ConstantInt::get(Int32Ty, 2);
-  Value *StackPtr =
-    GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_sp_gep",
-                              EntryBB->getTerminator());
-
-  Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator());
-  new StoreInst(Val, StackPtr, true, EntryBB->getTerminator());
-
-  // Call the setjmp instrinsic. It fills in the rest of the jmpbuf.
-  Value *SetjmpArg =
-    CastInst::Create(Instruction::BitCast, JBufPtr,
-                     Type::getInt8PtrTy(F.getContext()), "",
-                     EntryBB->getTerminator());
-  Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg,
-                                        "",
-                                        EntryBB->getTerminator());
-
-  // Add a call to dispatch_setup after the setjmp call. This is expanded to any
-  // target-specific setup that needs to be done.
-  CallInst::Create(DispatchSetupFn, DispatchVal, "", EntryBB->getTerminator());
+  // There are still some uses of LPI. Construct an aggregate with the exception
+  // values and replace the LPI with that aggregate.
+  Type *LPadType = LPI->getType();
+  Value *LPadVal = UndefValue::get(LPadType);
+  IRBuilder<>
+    Builder(llvm::next(BasicBlock::iterator(cast<Instruction>(SelVal))));
+  LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val");
+  LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val");
 
-  // check the return value of the setjmp. non-zero goes to dispatcher.
-  Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
-                                 ICmpInst::ICMP_EQ, DispatchVal, Zero,
-                                 "notunwind");
-  // Nuke the uncond branch.
-  EntryBB->getTerminator()->eraseFromParent();
-
-  // Put in a new condbranch in its place.
-  BranchInst::Create(ContBlock, DispatchBlock, IsNormal, EntryBB);
-
-  // Register the function context and make sure it's known to not throw
-  CallInst *Register =
-    CallInst::Create(RegisterFn, FunctionContext, "",
-                     ContBlock->getTerminator());
-  Register->setDoesNotThrow();
-
-  // At this point, we are all set up, update the invoke instructions to mark
-  // their call_site values, and fill in the dispatch switch accordingly.
-  for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
-    markInvokeCallSite(Invokes[i], i+1, CallSite, DispatchSwitch);
-
-  // Mark call instructions that aren't nounwind as no-action (call_site ==
-  // -1). Skip the entry block, as prior to then, no function context has been
-  // created for this function and any unexpected exceptions thrown will go
-  // directly to the caller's context, which is what we want anyway, so no need
-  // to do anything here.
-  for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) {
-    for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I)
-      if (CallInst *CI = dyn_cast<CallInst>(I)) {
-        // Ignore calls to the EH builtins (eh.selector, eh.exception)
-        Constant *Callee = CI->getCalledFunction();
-        if (Callee != SelectorFn && Callee != ExceptionFn
-            && !CI->doesNotThrow())
-          insertCallSiteStore(CI, -1, CallSite);
-      } else if (ResumeInst *RI = dyn_cast<ResumeInst>(I)) {
-        insertCallSiteStore(RI, -1, CallSite);
-      }
-  }
-
-  // Replace all unwinds with a branch to the unwind handler.
-  // ??? Should this ever happen with sjlj exceptions?
-  for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) {
-    BranchInst::Create(TrapBlock, Unwinds[i]);
-    Unwinds[i]->eraseFromParent();
-  }
-
-  // Following any allocas not in the entry block, update the saved SP in the
-  // jmpbuf to the new value.
-  for (unsigned i = 0, e = JmpbufUpdatePoints.size(); i != e; ++i) {
-    Instruction *AI = JmpbufUpdatePoints[i];
-    Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
-    StackAddr->insertAfter(AI);
-    Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true);
-    StoreStackAddr->insertAfter(StackAddr);
-  }
-
-  // Finally, for any returns from this function, if this function contains an
-  // invoke, add a call to unregister the function context.
-  for (unsigned i = 0, e = Returns.size(); i != e; ++i)
-    CallInst::Create(UnregisterFn, FunctionContext, "", Returns[i]);
-
-  return true;
+  LPI->replaceAllUsesWith(LPadVal);
 }
 
 /// setupFunctionContext - Allocate the function context on the stack and fill
 /// it with all of the data that we know at this point.
-Value *SjLjEHPass::
+Value *SjLjEHPrepare::
 setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) {
   BasicBlock *EntryBB = F.begin();
 
@@ -721,56 +192,42 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) {
   // because the value needs to be added to the global context list.
   unsigned Align =
     TLI->getTargetData()->getPrefTypeAlignment(FunctionContextTy);
-  AllocaInst *FuncCtx =
+  FuncCtx =
     new AllocaInst(FunctionContextTy, 0, Align, "fn_context", EntryBB->begin());
 
   // Fill in the function context structure.
-  Value *Idxs[2];
   Type *Int32Ty = Type::getInt32Ty(F.getContext());
   Value *Zero = ConstantInt::get(Int32Ty, 0);
   Value *One = ConstantInt::get(Int32Ty, 1);
+  Value *Two = ConstantInt::get(Int32Ty, 2);
+  Value *Three = ConstantInt::get(Int32Ty, 3);
+  Value *Four = ConstantInt::get(Int32Ty, 4);
 
-  // Keep around a reference to the call_site field.
-  Idxs[0] = Zero;
-  Idxs[1] = One;
-  CallSite = GetElementPtrInst::Create(FuncCtx, Idxs, "call_site",
-                                       EntryBB->getTerminator());
-
-  // Reference the __data field.
-  Idxs[1] = ConstantInt::get(Int32Ty, 2);
-  Value *FCData = GetElementPtrInst::Create(FuncCtx, Idxs, "__data",
-                                            EntryBB->getTerminator());
-
-  // The exception value comes back in context->__data[0].
-  Idxs[1] = Zero;
-  Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs,
-                                                   "exception_gep",
-                                                   EntryBB->getTerminator());
-
-  // The exception selector comes back in context->__data[1].
-  Idxs[1] = One;
-  Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs,
-                                                  "exn_selector_gep",
-                                                  EntryBB->getTerminator());
+  Value *Idxs[2] = { Zero, 0 };
 
   for (unsigned I = 0, E = LPads.size(); I != E; ++I) {
     LandingPadInst *LPI = LPads[I];
     IRBuilder<> Builder(LPI->getParent()->getFirstInsertionPt());
 
+    // Reference the __data field.
+    Idxs[1] = Two;
+    Value *FCData = Builder.CreateGEP(FuncCtx, Idxs, "__data");
+
+    // The exception values come back in context->__data[0].
+    Idxs[1] = Zero;
+    Value *ExceptionAddr = Builder.CreateGEP(FCData, Idxs, "exception_gep");
     Value *ExnVal = Builder.CreateLoad(ExceptionAddr, true, "exn_val");
     ExnVal = Builder.CreateIntToPtr(ExnVal, Type::getInt8PtrTy(F.getContext()));
-    Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val");
 
-    Type *LPadType = LPI->getType();
-    Value *LPadVal = UndefValue::get(LPadType);
-    LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val");
-    LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val");
+    Idxs[1] = One;
+    Value *SelectorAddr = Builder.CreateGEP(FCData, Idxs, "exn_selector_gep");
+    Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val");
 
-    LPI->replaceAllUsesWith(LPadVal);
+    substituteLPadValues(LPI, ExnVal, SelVal);
   }
 
   // Personality function
-  Idxs[1] = ConstantInt::get(Int32Ty, 3);
+  Idxs[1] = Three;
   if (!PersonalityFn)
     PersonalityFn = LPads[0]->getPersonalityFn();
   Value *PersonalityFieldPtr =
@@ -780,11 +237,11 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) {
                 EntryBB->getTerminator());
 
   // LSDA address
-  Idxs[1] = ConstantInt::get(Int32Ty, 4);
-  Value *LSDAFieldPtr = GetElementPtrInst::Create(FuncCtx, Idxs, "lsda_gep",
-                                                  EntryBB->getTerminator());
   Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr",
                                  EntryBB->getTerminator());
+  Idxs[1] = Four;
+  Value *LSDAFieldPtr = GetElementPtrInst::Create(FuncCtx, Idxs, "lsda_gep",
+                                                  EntryBB->getTerminator());
   new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator());
 
   return FuncCtx;
@@ -794,7 +251,7 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) {
 /// specially, we lower each arg to a copy instruction in the entry block. This
 /// ensures that the argument value itself cannot be live out of the entry
 /// block.
-void SjLjEHPass::lowerIncomingArguments(Function &F) {
+void SjLjEHPrepare::lowerIncomingArguments(Function &F) {
   BasicBlock::iterator AfterAllocaInsPt = F.begin()->begin();
   while (isa<AllocaInst>(AfterAllocaInsPt) &&
          isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsPt)->getArraySize()))
@@ -838,8 +295,8 @@ void SjLjEHPass::lowerIncomingArguments(Function &F) {
 
 /// lowerAcrossUnwindEdges - Find all variables which are alive across an unwind
 /// edge and spill them.
-void SjLjEHPass::lowerAcrossUnwindEdges(Function &F,
-                                        ArrayRef<InvokeInst*> Invokes) {
+void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F,
+                                           ArrayRef<InvokeInst*> Invokes) {
   // Finally, scan the code looking for instructions with bad live ranges.
   for (Function::iterator
          BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
@@ -870,7 +327,7 @@ void SjLjEHPass::lowerAcrossUnwindEdges(Function &F,
       }
 
       // Find all of the blocks that this value is live in.
-      std::set<BasicBlock*> LiveBBs;
+      SmallPtrSet<BasicBlock*, 64> LiveBBs;
       LiveBBs.insert(Inst->getParent());
       while (!Users.empty()) {
         Instruction *U = Users.back();
@@ -893,7 +350,10 @@ void SjLjEHPass::lowerAcrossUnwindEdges(Function &F,
       for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
         BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
         if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) {
+          DEBUG(dbgs() << "SJLJ Spill: " << *Inst << " around "
+                << UnwindBlock->getName() << "\n");
           NeedsSpill = true;
+          break;
         }
       }
 
@@ -902,36 +362,60 @@ void SjLjEHPass::lowerAcrossUnwindEdges(Function &F,
       // the value to be reloaded from the stack slot, even those that aren't
       // in the unwind blocks. We should be more selective.
       if (NeedsSpill) {
-        ++NumSpilled;
         DemoteRegToStack(*Inst, true);
+        ++NumSpilled;
       }
     }
   }
+
+  // Go through the landing pads and remove any PHIs there.
+  for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+    BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
+    LandingPadInst *LPI = UnwindBlock->getLandingPadInst();
+
+    // Place PHIs into a set to avoid invalidating the iterator.
+    SmallPtrSet<PHINode*, 8> PHIsToDemote;
+    for (BasicBlock::iterator
+           PN = UnwindBlock->begin(); isa<PHINode>(PN); ++PN)
+      PHIsToDemote.insert(cast<PHINode>(PN));
+    if (PHIsToDemote.empty()) continue;
+
+    // Demote the PHIs to the stack.
+    for (SmallPtrSet<PHINode*, 8>::iterator
+           I = PHIsToDemote.begin(), E = PHIsToDemote.end(); I != E; ++I)
+      DemotePHIToStack(*I);
+
+    // Move the landingpad instruction back to the top of the landing pad block.
+    LPI->moveBefore(UnwindBlock->begin());
+  }
 }
 
 /// setupEntryBlockAndCallSites - Setup the entry block by creating and filling
 /// the function context and marking the call sites with the appropriate
 /// values. These values are used by the DWARF EH emitter.
-bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) {
+bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
   SmallVector<ReturnInst*,     16> Returns;
   SmallVector<InvokeInst*,     16> Invokes;
-  SmallVector<LandingPadInst*, 16> LPads;
+  SmallSetVector<LandingPadInst*, 16> LPads;
 
   // Look through the terminators of the basic blocks to find invokes.
   for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
     if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
       Invokes.push_back(II);
-      LPads.push_back(II->getUnwindDest()->getLandingPadInst());
+      LPads.insert(II->getUnwindDest()->getLandingPadInst());
     } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
       Returns.push_back(RI);
     }
 
   if (Invokes.empty()) return false;
 
+  NumInvokes += Invokes.size();
+
   lowerIncomingArguments(F);
   lowerAcrossUnwindEdges(F, Invokes);
 
-  Value *FuncCtx = setupFunctionContext(F, LPads);
+  Value *FuncCtx =
+    setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end()));
   BasicBlock *EntryBB = F.begin();
   Type *Int32Ty = Type::getInt32Ty(F.getContext());
 
@@ -979,7 +463,7 @@ bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) {
   // At this point, we are all set up, update the invoke instructions to mark
   // their call_site values.
   for (unsigned I = 0, E = Invokes.size(); I != E; ++I) {
-    insertCallSiteStore(Invokes[I], I + 1, CallSite);
+    insertCallSiteStore(Invokes[I], I + 1);
 
     ConstantInt *CallSiteNum =
       ConstantInt::get(Type::getInt32Ty(F.getContext()), I + 1);
@@ -998,9 +482,9 @@ bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) {
     for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I)
       if (CallInst *CI = dyn_cast<CallInst>(I)) {
         if (!CI->doesNotThrow())
-          insertCallSiteStore(CI, -1, CallSite);
+          insertCallSiteStore(CI, -1);
       } else if (ResumeInst *RI = dyn_cast<ResumeInst>(I)) {
-        insertCallSiteStore(RI, -1, CallSite);
+        insertCallSiteStore(RI, -1);
       }
 
   // Register the function context and make sure it's known to not throw
@@ -1008,6 +492,25 @@ bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) {
                                         EntryBB->getTerminator());
   Register->setDoesNotThrow();
 
+  // Following any allocas not in the entry block, update the saved SP in the
+  // jmpbuf to the new value.
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    if (BB == F.begin())
+      continue;
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+      if (CallInst *CI = dyn_cast<CallInst>(I)) {
+        if (CI->getCalledFunction() != StackRestoreFn)
+          continue;
+      } else if (!isa<AllocaInst>(I)) {
+        continue;
+      }
+      Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
+      StackAddr->insertAfter(I);
+      Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true);
+      StoreStackAddr->insertAfter(StackAddr);
+    }
+  }
+
   // Finally, for any returns from this function, if this function contains an
   // invoke, add a call to unregister the function context.
   for (unsigned I = 0, E = Returns.size(); I != E; ++I)
@@ -1016,11 +519,7 @@ bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) {
   return true;
 }
 
-bool SjLjEHPass::runOnFunction(Function &F) {
-  bool Res = false;
-  if (!DisableOldSjLjEH)
-    Res = insertSjLjEHSupport(F);
-  else
-    Res = setupEntryBlockAndCallSites(F);
+bool SjLjEHPrepare::runOnFunction(Function &F) {
+  bool Res = setupEntryBlockAndCallSites(F);
   return Res;
 }
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index ca79cafcf4be..c5bd3a3cae63 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -76,7 +76,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
     MachineBasicBlock *mbb = &*mbbItr;
 
     // Insert an index for the MBB start.
-    SlotIndex blockStartIndex(back(), SlotIndex::LOAD);
+    SlotIndex blockStartIndex(back(), SlotIndex::Slot_Block);
 
     for (MachineBasicBlock::iterator miItr = mbb->begin(), miEnd = mbb->end();
          miItr != miEnd; ++miItr) {
@@ -88,7 +88,8 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
       push_back(createEntry(mi, index += SlotIndex::InstrDist));
 
       // Save this base index in the maps.
-      mi2iMap.insert(std::make_pair(mi, SlotIndex(back(), SlotIndex::LOAD)));
+      mi2iMap.insert(std::make_pair(mi, SlotIndex(back(),
+                                                  SlotIndex::Slot_Block)));
  
       ++functionSize;
     }
@@ -97,14 +98,15 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
     push_back(createEntry(0, index += SlotIndex::InstrDist));
 
     MBBRanges[mbb->getNumber()].first = blockStartIndex;
-    MBBRanges[mbb->getNumber()].second = SlotIndex(back(), SlotIndex::LOAD);
+    MBBRanges[mbb->getNumber()].second = SlotIndex(back(),
+                                                   SlotIndex::Slot_Block);
     idx2MBBMap.push_back(IdxMBBPair(blockStartIndex, mbb));
   }
 
   // Sort the Idx2MBBMap
   std::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare());
 
-  DEBUG(dump());
+  DEBUG(mf->print(dbgs(), this));
 
   // And we're done!
   return false;
@@ -166,7 +168,7 @@ void SlotIndexes::dump() const {
 // Print a SlotIndex to a raw_ostream.
 void SlotIndex::print(raw_ostream &os) const {
   if (isValid())
-    os << entry().getIndex() << "LudS"[getSlot()];
+    os << entry().getIndex() << "Berd"[getSlot()];
   else
     os << "invalid";
 }
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index b6bbcd7176dd..4cd22eb60f55 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -11,8 +11,8 @@
 
 #include "Spiller.h"
 #include "VirtRegMap.h"
-#include "LiveRangeEdit.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -29,7 +29,7 @@
 using namespace llvm;
 
 namespace {
-  enum SpillerName { trivial, standard, inline_ };
+  enum SpillerName { trivial, inline_ };
 }
 
 static cl::opt<SpillerName>
@@ -37,10 +37,9 @@ spillerOpt("spiller",
            cl::desc("Spiller to use: (default: standard)"),
            cl::Prefix,
            cl::values(clEnumVal(trivial,   "trivial spiller"),
-                      clEnumVal(standard,  "default spiller"),
                       clEnumValN(inline_,  "inline", "inline spiller"),
                       clEnumValEnd),
-           cl::init(standard));
+           cl::init(trivial));
 
 // Spiller virtual destructor implementation.
 Spiller::~Spiller() {}
@@ -73,8 +72,9 @@ protected:
   /// Add spill ranges for every use/def of the live interval, inserting loads
   /// immediately before each use, and stores after each def. No folding or
   /// remat is attempted.
-  void trivialSpillEverywhere(LiveInterval *li,
-                              SmallVectorImpl<LiveInterval*> &newIntervals) {
+  void trivialSpillEverywhere(LiveRangeEdit& LRE) {
+    LiveInterval* li = &LRE.getParent();
+
     DEBUG(dbgs() << "Spilling everywhere " << *li << "\n");
 
     assert(li->weight != HUGE_VALF &&
@@ -116,17 +116,14 @@ protected:
       }
 
       // Create a new vreg & interval for this instr.
-      unsigned newVReg = mri->createVirtualRegister(trc);
-      vrm->grow();
-      vrm->assignVirt2StackSlot(newVReg, ss);
-      LiveInterval *newLI = &lis->getOrCreateInterval(newVReg);
+      LiveInterval *newLI = &LRE.create();
       newLI->weight = HUGE_VALF;
 
       // Update the reg operands & kill flags.
       for (unsigned i = 0; i < indices.size(); ++i) {
         unsigned mopIdx = indices[i];
         MachineOperand &mop = mi->getOperand(mopIdx);
-        mop.setReg(newVReg);
+        mop.setReg(newLI->reg);
         if (mop.isUse() && !mi->isRegTiedToDefOperand(mopIdx)) {
           mop.setIsKill(true);
         }
@@ -136,33 +133,29 @@ protected:
       // Insert reload if necessary.
       MachineBasicBlock::iterator miItr(mi);
       if (hasUse) {
-        tii->loadRegFromStackSlot(*mi->getParent(), miItr, newVReg, ss, trc,
+        tii->loadRegFromStackSlot(*mi->getParent(), miItr, newLI->reg, ss, trc,
                                   tri);
         MachineInstr *loadInstr(prior(miItr));
         SlotIndex loadIndex =
-          lis->InsertMachineInstrInMaps(loadInstr).getDefIndex();
-        vrm->addSpillSlotUse(ss, loadInstr);
+          lis->InsertMachineInstrInMaps(loadInstr).getRegSlot();
         SlotIndex endIndex = loadIndex.getNextIndex();
         VNInfo *loadVNI =
-          newLI->getNextValue(loadIndex, 0, lis->getVNInfoAllocator());
+          newLI->getNextValue(loadIndex, lis->getVNInfoAllocator());
         newLI->addRange(LiveRange(loadIndex, endIndex, loadVNI));
       }
 
       // Insert store if necessary.
       if (hasDef) {
-        tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr), newVReg,
+        tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr),newLI->reg,
                                  true, ss, trc, tri);
         MachineInstr *storeInstr(llvm::next(miItr));
         SlotIndex storeIndex =
-          lis->InsertMachineInstrInMaps(storeInstr).getDefIndex();
-        vrm->addSpillSlotUse(ss, storeInstr);
+          lis->InsertMachineInstrInMaps(storeInstr).getRegSlot();
         SlotIndex beginIndex = storeIndex.getPrevIndex();
         VNInfo *storeVNI =
-          newLI->getNextValue(beginIndex, 0, lis->getVNInfoAllocator());
+          newLI->getNextValue(beginIndex, lis->getVNInfoAllocator());
         newLI->addRange(LiveRange(beginIndex, storeIndex, storeVNI));
       }
-
-      newIntervals.push_back(newLI);
     }
   }
 };
@@ -182,60 +175,20 @@ public:
 
   void spill(LiveRangeEdit &LRE) {
     // Ignore spillIs - we don't use it.
-    trivialSpillEverywhere(&LRE.getParent(), *LRE.getNewVRegs());
+    trivialSpillEverywhere(LRE);
   }
 };
 
 } // end anonymous namespace
 
-namespace {
-
-/// Falls back on LiveIntervals::addIntervalsForSpills.
-class StandardSpiller : public Spiller {
-protected:
-  MachineFunction *mf;
-  LiveIntervals *lis;
-  LiveStacks *lss;
-  MachineLoopInfo *loopInfo;
-  VirtRegMap *vrm;
-public:
-  StandardSpiller(MachineFunctionPass &pass, MachineFunction &mf,
-                  VirtRegMap &vrm)
-    : mf(&mf),
-      lis(&pass.getAnalysis<LiveIntervals>()),
-      lss(&pass.getAnalysis<LiveStacks>()),
-      loopInfo(pass.getAnalysisIfAvailable<MachineLoopInfo>()),
-      vrm(&vrm) {}
-
-  /// Falls back on LiveIntervals::addIntervalsForSpills.
-  void spill(LiveRangeEdit &LRE) {
-    std::vector<LiveInterval*> added =
-      lis->addIntervalsForSpills(LRE.getParent(), LRE.getUselessVRegs(),
-                                 loopInfo, *vrm);
-    LRE.getNewVRegs()->insert(LRE.getNewVRegs()->end(),
-                              added.begin(), added.end());
-
-    // Update LiveStacks.
-    int SS = vrm->getStackSlot(LRE.getReg());
-    if (SS == VirtRegMap::NO_STACK_SLOT)
-      return;
-    const TargetRegisterClass *RC = mf->getRegInfo().getRegClass(LRE.getReg());
-    LiveInterval &SI = lss->getOrCreateInterval(SS, RC);
-    if (!SI.hasAtLeastOneValue())
-      SI.getNextValue(SlotIndex(), 0, lss->getVNInfoAllocator());
-    SI.MergeRangesInAsValue(LRE.getParent(), SI.getValNumInfo(0));
-  }
-};
-
-} // end anonymous namespace
+void Spiller::anchor() { }
 
 llvm::Spiller* llvm::createSpiller(MachineFunctionPass &pass,
                                    MachineFunction &mf,
                                    VirtRegMap &vrm) {
   switch (spillerOpt) {
-  default: assert(0 && "unknown spiller");
   case trivial: return new TrivialSpiller(pass, mf, vrm);
-  case standard: return new StandardSpiller(pass, mf, vrm);
   case inline_: return createInlineSpiller(pass, mf, vrm);
   }
+  llvm_unreachable("Invalid spiller optimization");
 }
diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h
index 41f1727da439..b7d5beaab1b2 100644
--- a/lib/CodeGen/Spiller.h
+++ b/lib/CodeGen/Spiller.h
@@ -22,6 +22,7 @@ namespace llvm {
   /// Implementations are utility classes which insert spill or remat code on
   /// demand.
   class Spiller {
+    virtual void anchor();
   public:
     virtual ~Spiller() = 0;
 
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index 63627800af69..9959f74d5f27 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -14,10 +14,10 @@
 
 #define DEBUG_TYPE "regalloc"
 #include "SplitKit.h"
-#include "LiveRangeEdit.h"
 #include "VirtRegMap.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
@@ -62,13 +62,14 @@ SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) {
   const MachineBasicBlock *MBB = MF.getBlockNumbered(Num);
   const MachineBasicBlock *LPad = MBB->getLandingPadSuccessor();
   std::pair<SlotIndex, SlotIndex> &LSP = LastSplitPoint[Num];
+  SlotIndex MBBEnd = LIS.getMBBEndIdx(MBB);
 
   // Compute split points on the first call. The pair is independent of the
   // current live interval.
   if (!LSP.first.isValid()) {
     MachineBasicBlock::const_iterator FirstTerm = MBB->getFirstTerminator();
     if (FirstTerm == MBB->end())
-      LSP.first = LIS.getMBBEndIdx(MBB);
+      LSP.first = MBBEnd;
     else
       LSP.first = LIS.getInstructionIndex(FirstTerm);
 
@@ -80,7 +81,7 @@ SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) {
     for (MachineBasicBlock::const_iterator I = MBB->end(), E = MBB->begin();
          I != E;) {
       --I;
-      if (I->getDesc().isCall()) {
+      if (I->isCall()) {
         LSP.second = LIS.getInstructionIndex(I);
         break;
       }
@@ -89,10 +90,32 @@ SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) {
 
   // If CurLI is live into a landing pad successor, move the last split point
   // back to the call that may throw.
-  if (LPad && LSP.second.isValid() && LIS.isLiveInToMBB(*CurLI, LPad))
-    return LSP.second;
-  else
+  if (!LPad || !LSP.second || !LIS.isLiveInToMBB(*CurLI, LPad))
+    return LSP.first;
+
+  // Find the value leaving MBB.
+  const VNInfo *VNI = CurLI->getVNInfoBefore(MBBEnd);
+  if (!VNI)
+    return LSP.first;
+
+  // If the value leaving MBB was defined after the call in MBB, it can't
+  // really be live-in to the landing pad.  This can happen if the landing pad
+  // has a PHI, and this register is undef on the exceptional edge.
+  // <rdar://problem/10664933>
+  if (!SlotIndex::isEarlierInstr(VNI->def, LSP.second) && VNI->def < MBBEnd)
     return LSP.first;
+
+  // Value is properly live-in to the landing pad.
+  // Only allow splits before the call.
+  return LSP.second;
+}
+
+MachineBasicBlock::iterator
+SplitAnalysis::getLastSplitPointIter(MachineBasicBlock *MBB) {
+  SlotIndex LSP = getLastSplitPoint(MBB->getNumber());
+  if (LSP == LIS.getMBBEndIdx(MBB))
+    return MBB->end();
+  return LIS.getInstructionFromIndex(LSP);
 }
 
 /// analyzeUses - Count instructions, basic blocks, and loops using CurLI.
@@ -112,7 +135,7 @@ void SplitAnalysis::analyzeUses() {
        I = MRI.use_nodbg_begin(CurLI->reg), E = MRI.use_nodbg_end(); I != E;
        ++I)
     if (!I.getOperand().isUndef())
-      UseSlots.push_back(LIS.getInstructionIndex(&*I).getDefIndex());
+      UseSlots.push_back(LIS.getInstructionIndex(&*I).getRegSlot());
 
   array_pod_sort(UseSlots.begin(), UseSlots.end());
 
@@ -328,7 +351,7 @@ void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) {
 
   // We don't need an AliasAnalysis since we will only be performing
   // cheap-as-a-copy remats anyway.
-  Edit->anyRematerializable(LIS, TII, 0);
+  Edit->anyRematerializable(0);
 }
 
 void SplitEditor::dump() const {
@@ -351,7 +374,7 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx,
   LiveInterval *LI = Edit->get(RegIdx);
 
   // Create a new value.
-  VNInfo *VNI = LI->getNextValue(Idx, 0, LIS.getVNInfoAllocator());
+  VNInfo *VNI = LI->getNextValue(Idx, LIS.getVNInfoAllocator());
 
   // Use insert for lookup, so we can add missing values with a second lookup.
   std::pair<ValueMap::iterator, bool> InsP =
@@ -366,14 +389,14 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx,
   // If the previous value was a simple mapping, add liveness for it now.
   if (VNInfo *OldVNI = InsP.first->second.getPointer()) {
     SlotIndex Def = OldVNI->def;
-    LI->addRange(LiveRange(Def, Def.getNextSlot(), OldVNI));
+    LI->addRange(LiveRange(Def, Def.getDeadSlot(), OldVNI));
     // No longer a simple mapping.  Switch to a complex, non-forced mapping.
     InsP.first->second = ValueForcePair();
   }
 
   // This is a complex mapping, add liveness for VNI
   SlotIndex Def = VNI->def;
-  LI->addRange(LiveRange(Def, Def.getNextSlot(), VNI));
+  LI->addRange(LiveRange(Def, Def.getDeadSlot(), VNI));
 
   return VNI;
 }
@@ -393,7 +416,7 @@ void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI) {
   // This was previously a single mapping. Make sure the old def is represented
   // by a trivial live range.
   SlotIndex Def = VNI->def;
-  Edit->get(RegIdx)->addRange(LiveRange(Def, Def.getNextSlot(), VNI));
+  Edit->get(RegIdx)->addRange(LiveRange(Def, Def.getDeadSlot(), VNI));
   // Mark as complex mapped, forced.
   VFP = ValueForcePair(0, true);
 }
@@ -413,33 +436,31 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
 
   // Attempt cheap-as-a-copy rematerialization.
   LiveRangeEdit::Remat RM(ParentVNI);
-  if (Edit->canRematerializeAt(RM, UseIdx, true, LIS)) {
-    Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, LIS, TII, TRI, Late);
+  if (Edit->canRematerializeAt(RM, UseIdx, true)) {
+    Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late);
     ++NumRemats;
   } else {
     // Can't remat, just insert a copy from parent.
     CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg)
                .addReg(Edit->getReg());
     Def = LIS.getSlotIndexes()->insertMachineInstrInMaps(CopyMI, Late)
-            .getDefIndex();
+            .getRegSlot();
     ++NumCopies;
   }
 
   // Define the value in Reg.
-  VNInfo *VNI = defValue(RegIdx, ParentVNI, Def);
-  VNI->setCopy(CopyMI);
-  return VNI;
+  return defValue(RegIdx, ParentVNI, Def);
 }
 
 /// Create a new virtual register and live interval.
 unsigned SplitEditor::openIntv() {
   // Create the complement as index 0.
   if (Edit->empty())
-    Edit->create(LIS, VRM);
+    Edit->create();
 
   // Create the open interval.
   OpenIdx = Edit->size();
-  Edit->create(LIS, VRM);
+  Edit->create();
   return OpenIdx;
 }
 
@@ -497,7 +518,7 @@ SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) {
   }
   DEBUG(dbgs() << ": valno " << ParentVNI->id);
   VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Last, MBB,
-                              LIS.getLastSplitPoint(Edit->getParent(), &MBB));
+                              SA.getLastSplitPointIter(&MBB));
   RegAssign.insert(VNI->def, End, OpenIdx);
   DEBUG(dump());
   return VNI->def;
@@ -586,7 +607,7 @@ SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
 void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) {
   assert(OpenIdx && "openIntv not called before overlapIntv");
   const VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start);
-  assert(ParentVNI == Edit->getParent().getVNInfoAt(End.getPrevSlot()) &&
+  assert(ParentVNI == Edit->getParent().getVNInfoBefore(End) &&
          "Parent changes value in extended range");
   assert(LIS.getMBBFromIndex(Start) == LIS.getMBBFromIndex(End) &&
          "Range cannot span basic blocks");
@@ -640,7 +661,7 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) {
       DEBUG(dbgs() << "  cannot find simple kill of RegIdx " << RegIdx << '\n');
       forceRecompute(RegIdx, Edit->getParent().getVNInfoAt(Def));
     } else {
-      SlotIndex Kill = LIS.getInstructionIndex(MBBI).getDefIndex();
+      SlotIndex Kill = LIS.getInstructionIndex(MBBI).getRegSlot();
       DEBUG(dbgs() << "  move kill to " << Kill << '\t' << *MBBI);
       AssignI.setStop(Kill);
     }
@@ -780,7 +801,7 @@ void SplitEditor::hoistCopiesForSize() {
     SlotIndex Last = LIS.getMBBEndIdx(Dom.first).getPrevSlot();
     Dom.second =
       defFromParent(0, ParentVNI, Last, *Dom.first,
-                    LIS.getLastSplitPoint(Edit->getParent(), Dom.first))->def;
+                    SA.getLastSplitPointIter(Dom.first))->def;
   }
 
   // Remove redundant back-copies that are now known to be dominated by another
@@ -958,7 +979,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
     // use the same register as the def, so just do that always.
     SlotIndex Idx = LIS.getInstructionIndex(MI);
     if (MO.isDef() || MO.isUndef())
-      Idx = MO.isEarlyClobber() ? Idx.getUseIndex() : Idx.getDefIndex();
+      Idx = Idx.getRegSlot(MO.isEarlyClobber());
 
     // Rewrite to the mapped register at Idx.
     unsigned RegIdx = RegAssign.lookup(Idx);
@@ -981,7 +1002,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
       if (!Edit->getParent().liveAt(Idx))
         continue;
     } else
-      Idx = Idx.getUseIndex();
+      Idx = Idx.getRegSlot(true);
 
     getLRCalc(RegIdx).extend(LI, Idx.getNextSlot(), LIS.getSlotIndexes(),
                              &MDT, &LIS.getVNInfoAllocator());
@@ -994,8 +1015,8 @@ void SplitEditor::deleteRematVictims() {
     LiveInterval *LI = *I;
     for (LiveInterval::const_iterator LII = LI->begin(), LIE = LI->end();
            LII != LIE; ++LII) {
-      // Dead defs end at the store slot.
-      if (LII->end != LII->valno->def.getNextSlot())
+      // Dead defs end at the dead slot.
+      if (LII->end != LII->valno->def.getDeadSlot())
         continue;
       MachineInstr *MI = LIS.getInstructionFromIndex(LII->valno->def);
       assert(MI && "Missing instruction for dead def");
@@ -1012,7 +1033,7 @@ void SplitEditor::deleteRematVictims() {
   if (Dead.empty())
     return;
 
-  Edit->eliminateDeadDefs(Dead, LIS, VRM, TII);
+  Edit->eliminateDeadDefs(Dead);
 }
 
 void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
@@ -1030,7 +1051,6 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
     unsigned RegIdx = RegAssign.lookup(ParentVNI->def);
     VNInfo *VNI = defValue(RegIdx, ParentVNI, ParentVNI->def);
     VNI->setIsPHIDef(ParentVNI->isPHIDef());
-    VNI->setCopy(ParentVNI->getCopy());
 
     // Force rematted values to be recomputed everywhere.
     // The new live ranges may be truncated.
@@ -1049,7 +1069,6 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
     break;
   case SM_Speed:
     llvm_unreachable("Spill mode 'speed' not implemented yet");
-    break;
   }
 
   // Transfer the simply mapped values, check if any are skipped.
@@ -1089,7 +1108,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
     SmallVector<LiveInterval*, 8> dups;
     dups.push_back(li);
     for (unsigned j = 1; j != NumComp; ++j)
-      dups.push_back(&Edit->create(LIS, VRM));
+      dups.push_back(&Edit->create());
     ConEQ.Distribute(&dups[0], MRI);
     // The new intervals all map back to i.
     if (LRMap)
@@ -1097,7 +1116,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
   }
 
   // Calculate spill weight and allocation hints for new intervals.
-  Edit->calculateRegClassAndHint(VRM.getMachineFunction(), LIS, SA.Loops);
+  Edit->calculateRegClassAndHint(VRM.getMachineFunction(), SA.Loops);
 
   assert(!LRMap || LRMap->size() == Edit->size());
 }
diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h
index d8fc2122a3c7..4005a3d5cbbf 100644
--- a/lib/CodeGen/SplitKit.h
+++ b/lib/CodeGen/SplitKit.h
@@ -46,9 +46,6 @@ public:
   const MachineLoopInfo &Loops;
   const TargetInstrInfo &TII;
 
-  // Sorted slot indexes of using instructions.
-  SmallVector<SlotIndex, 8> UseSlots;
-
   /// Additional information about basic blocks where the current variable is
   /// live. Such a block will look like one of these templates:
   ///
@@ -85,6 +82,9 @@ private:
   // Current live interval.
   const LiveInterval *CurLI;
 
+  // Sorted slot indexes of using instructions.
+  SmallVector<SlotIndex, 8> UseSlots;
+
   /// LastSplitPoint - Last legal split point in each basic block in the current
   /// function. The first entry is the first terminator, the second entry is the
   /// last valid split point for a variable that is live in to a landing pad
@@ -135,7 +135,7 @@ public:
   /// getParent - Return the last analyzed interval.
   const LiveInterval &getParent() const { return *CurLI; }
 
-  /// getLastSplitPoint - Return that base index of the last valid split point
+  /// getLastSplitPoint - Return the base index of the last valid split point
   /// in the basic block numbered Num.
   SlotIndex getLastSplitPoint(unsigned Num) {
     // Inline the common simple case.
@@ -145,6 +145,9 @@ public:
     return computeLastSplitPoint(Num);
   }
 
+  /// getLastSplitPointIter - Returns the last split point as an iterator.
+  MachineBasicBlock::iterator getLastSplitPointIter(MachineBasicBlock*);
+
   /// isOriginalEndpoint - Return true if the original live range was killed or
   /// (re-)defined at Idx. Idx should be the 'def' slot for a normal kill/def,
   /// and 'use' for an early-clobber def.
@@ -152,6 +155,10 @@ public:
   /// splitting.
   bool isOriginalEndpoint(SlotIndex Idx) const;
 
+  /// getUseSlots - Return an array of SlotIndexes of instructions using CurLI.
+  /// This include both use and def operands, at most one entry per instruction.
+  ArrayRef<SlotIndex> getUseSlots() const { return UseSlots; }
+
   /// getUseBlocks - Return an array of BlockInfo objects for the basic blocks
   /// where CurLI has uses.
   ArrayRef<BlockInfo> getUseBlocks() const { return UseBlocks; }
diff --git a/lib/CodeGen/Splitter.cpp b/lib/CodeGen/Splitter.cpp
deleted file mode 100644
index 77973b72bbc8..000000000000
--- a/lib/CodeGen/Splitter.cpp
+++ /dev/null
@@ -1,827 +0,0 @@
-//===-- llvm/CodeGen/Splitter.cpp -  Splitter -----------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "loopsplitter"
-
-#include "Splitter.h"
-
-#include "llvm/Module.h"
-#include "llvm/CodeGen/CalcSpillWeights.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/LiveStackAnalysis.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/SlotIndexes.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-
-using namespace llvm;
-
-char LoopSplitter::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopSplitter, "loop-splitting",
-                "Split virtual regists across loop boundaries.", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_END(LoopSplitter, "loop-splitting",
-                "Split virtual regists across loop boundaries.", false, false)
-
-namespace llvm {
-
-  class StartSlotComparator {
-  public:
-    StartSlotComparator(LiveIntervals &lis) : lis(lis) {}
-    bool operator()(const MachineBasicBlock *mbb1,
-                    const MachineBasicBlock *mbb2) const {
-      return lis.getMBBStartIdx(mbb1) < lis.getMBBStartIdx(mbb2);
-    }
-  private:
-    LiveIntervals &lis;
-  };
-
-  class LoopSplit {
-  public:
-    LoopSplit(LoopSplitter &ls, LiveInterval &li, MachineLoop &loop)
-      : ls(ls), li(li), loop(loop), valid(true), inSplit(false), newLI(0) {
-      assert(TargetRegisterInfo::isVirtualRegister(li.reg) &&
-             "Cannot split physical registers.");
-    }
-
-    LiveInterval& getLI() const { return li; }
-
-    MachineLoop& getLoop() const { return loop; }
-
-    bool isValid() const { return valid; }
-
-    bool isWorthwhile() const { return valid && (inSplit || !outSplits.empty()); }
-
-    void invalidate() { valid = false; }
-
-    void splitIncoming() { inSplit = true; }
-
-    void splitOutgoing(MachineLoop::Edge &edge) { outSplits.insert(edge); }
-
-    void addLoopInstr(MachineInstr *i) { loopInstrs.push_back(i); }
-
-    void apply() {
-      assert(valid && "Attempt to apply invalid split.");
-      applyIncoming();
-      applyOutgoing();
-      copyRanges();
-      renameInside();
-    }
-
-  private:
-    LoopSplitter &ls;
-    LiveInterval &li;
-    MachineLoop &loop;
-    bool valid, inSplit;
-    std::set<MachineLoop::Edge> outSplits;
-    std::vector<MachineInstr*> loopInstrs;
-
-    LiveInterval *newLI;
-    std::map<VNInfo*, VNInfo*> vniMap;
-
-    LiveInterval* getNewLI() {
-      if (newLI == 0) {
-        const TargetRegisterClass *trc = ls.mri->getRegClass(li.reg);
-        unsigned vreg = ls.mri->createVirtualRegister(trc);
-        newLI = &ls.lis->getOrCreateInterval(vreg);
-      }
-      return newLI;
-    }
-
-    VNInfo* getNewVNI(VNInfo *oldVNI) {
-      VNInfo *newVNI = vniMap[oldVNI];
-
-      if (newVNI == 0) {
-        newVNI = getNewLI()->createValueCopy(oldVNI,
-                                             ls.lis->getVNInfoAllocator());
-        vniMap[oldVNI] = newVNI;
-      }
-
-      return newVNI;
-    }
-
-    void applyIncoming() {
-      if (!inSplit) {
-        return;
-      }
-
-      MachineBasicBlock *preHeader = loop.getLoopPreheader();
-      if (preHeader == 0) {
-        assert(ls.canInsertPreHeader(loop) &&
-               "Can't insert required preheader.");
-        preHeader = &ls.insertPreHeader(loop);
-      }
-
-      LiveRange *preHeaderRange =
-        ls.lis->findExitingRange(li, preHeader);
-      assert(preHeaderRange != 0 && "Range not live into preheader.");
-
-      // Insert the new copy.
-      MachineInstr *copy = BuildMI(*preHeader,
-                                   preHeader->getFirstTerminator(),
-                                   DebugLoc(),
-                                   ls.tii->get(TargetOpcode::COPY))
-        .addReg(getNewLI()->reg, RegState::Define)
-        .addReg(li.reg, RegState::Kill);
-
-      ls.lis->InsertMachineInstrInMaps(copy);
-
-      SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getDefIndex();
-
-      VNInfo *newVal = getNewVNI(preHeaderRange->valno);
-      newVal->def = copyDefIdx;
-      newVal->setCopy(copy);
-      li.removeRange(copyDefIdx, ls.lis->getMBBEndIdx(preHeader), true);
-
-      getNewLI()->addRange(LiveRange(copyDefIdx,
-                                     ls.lis->getMBBEndIdx(preHeader),
-                                     newVal));
-    }
-
-    void applyOutgoing() {
-
-      for (std::set<MachineLoop::Edge>::iterator osItr = outSplits.begin(),
-                                                 osEnd = outSplits.end();
-           osItr != osEnd; ++osItr) {
-        MachineLoop::Edge edge = *osItr;
-        MachineBasicBlock *outBlock = edge.second;
-        if (ls.isCriticalEdge(edge)) {
-          assert(ls.canSplitEdge(edge) && "Unsplitable critical edge.");
-          outBlock = &ls.splitEdge(edge, loop);
-        }
-        LiveRange *outRange = ls.lis->findEnteringRange(li, outBlock);
-        assert(outRange != 0 && "No exiting range?");
-
-        MachineInstr *copy = BuildMI(*outBlock, outBlock->begin(),
-                                     DebugLoc(),
-                                     ls.tii->get(TargetOpcode::COPY))
-          .addReg(li.reg, RegState::Define)
-          .addReg(getNewLI()->reg, RegState::Kill);
-
-        ls.lis->InsertMachineInstrInMaps(copy);
-
-        SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getDefIndex();
-        
-        // Blow away output range definition.
-        outRange->valno->def = ls.lis->getInvalidIndex();
-        li.removeRange(ls.lis->getMBBStartIdx(outBlock), copyDefIdx);
-
-        SlotIndex newDefIdx = ls.lis->getMBBStartIdx(outBlock);
-        assert(ls.lis->getInstructionFromIndex(newDefIdx) == 0 &&
-               "PHI def index points at actual instruction.");
-        VNInfo *newVal =
-          getNewLI()->getNextValue(newDefIdx, 0, ls.lis->getVNInfoAllocator());
-
-        getNewLI()->addRange(LiveRange(ls.lis->getMBBStartIdx(outBlock),
-                                       copyDefIdx, newVal));
-                                       
-      }
-    }
-
-    void copyRange(LiveRange &lr) {
-      std::pair<bool, LoopSplitter::SlotPair> lsr =
-        ls.getLoopSubRange(lr, loop);
-      
-      if (!lsr.first)
-        return;
-
-      LiveRange loopRange(lsr.second.first, lsr.second.second,
-                          getNewVNI(lr.valno));
-
-      li.removeRange(loopRange.start, loopRange.end, true);
-
-      getNewLI()->addRange(loopRange);
-    }
-
-    void copyRanges() {
-      for (std::vector<MachineInstr*>::iterator iItr = loopInstrs.begin(),
-                                                iEnd = loopInstrs.end();
-           iItr != iEnd; ++iItr) {
-        MachineInstr &instr = **iItr;
-        SlotIndex instrIdx = ls.lis->getInstructionIndex(&instr);
-        if (instr.modifiesRegister(li.reg, 0)) {
-          LiveRange *defRange =
-            li.getLiveRangeContaining(instrIdx.getDefIndex());
-          if (defRange != 0) // May have caught this already.
-            copyRange(*defRange);
-        }
-        if (instr.readsRegister(li.reg, 0)) {
-          LiveRange *useRange =
-            li.getLiveRangeContaining(instrIdx.getUseIndex());
-          if (useRange != 0) { // May have caught this already.
-            copyRange(*useRange);
-          }
-        }
-      }
-
-      for (MachineLoop::block_iterator bbItr = loop.block_begin(),
-                                       bbEnd = loop.block_end();
-           bbItr != bbEnd; ++bbItr) {
-        MachineBasicBlock &loopBlock = **bbItr;
-        LiveRange *enteringRange =
-          ls.lis->findEnteringRange(li, &loopBlock);
-        if (enteringRange != 0) {
-          copyRange(*enteringRange);
-        }
-      }
-    } 
-
-    void renameInside() {
-      for (std::vector<MachineInstr*>::iterator iItr = loopInstrs.begin(),
-                                                iEnd = loopInstrs.end();
-           iItr != iEnd; ++iItr) {
-        MachineInstr &instr = **iItr;
-        for (unsigned i = 0; i < instr.getNumOperands(); ++i) {
-          MachineOperand &mop = instr.getOperand(i);
-          if (mop.isReg() && mop.getReg() == li.reg) {
-            mop.setReg(getNewLI()->reg);
-          }
-        }
-      }
-    }
-
-  };
-
-  void LoopSplitter::getAnalysisUsage(AnalysisUsage &au) const {
-    au.addRequired<MachineDominatorTree>();
-    au.addPreserved<MachineDominatorTree>();
-    au.addRequired<MachineLoopInfo>();
-    au.addPreserved<MachineLoopInfo>();
-    au.addPreservedID(RegisterCoalescerPassID);
-    au.addPreserved<CalculateSpillWeights>();
-    au.addPreserved<LiveStacks>();
-    au.addRequired<SlotIndexes>();
-    au.addPreserved<SlotIndexes>();
-    au.addRequired<LiveIntervals>();
-    au.addPreserved<LiveIntervals>();
-    MachineFunctionPass::getAnalysisUsage(au);
-  }
-
-  bool LoopSplitter::runOnMachineFunction(MachineFunction &fn) {
-
-    mf = &fn;
-    mri = &mf->getRegInfo();
-    tii = mf->getTarget().getInstrInfo();
-    tri = mf->getTarget().getRegisterInfo();
-    sis = &getAnalysis<SlotIndexes>();
-    lis = &getAnalysis<LiveIntervals>();
-    mli = &getAnalysis<MachineLoopInfo>();
-    mdt = &getAnalysis<MachineDominatorTree>();
-
-    fqn = mf->getFunction()->getParent()->getModuleIdentifier() + "." +
-      mf->getFunction()->getName().str();
-
-    dbgs() << "Splitting " << mf->getFunction()->getName() << ".";
-
-    dumpOddTerminators();
-
-//      dbgs() << "----------------------------------------\n";
-//      lis->dump();
-//      dbgs() << "----------------------------------------\n";
-       
-//     std::deque<MachineLoop*> loops;
-//     std::copy(mli->begin(), mli->end(), std::back_inserter(loops));
-//     dbgs() << "Loops:\n";
-//     while (!loops.empty()) {
-//       MachineLoop &loop = *loops.front();
-//       loops.pop_front();
-//       std::copy(loop.begin(), loop.end(), std::back_inserter(loops));
-
-//       dumpLoopInfo(loop);
-//     }
-    
-    //lis->dump();
-    //exit(0);
-
-    // Setup initial intervals.
-    for (LiveIntervals::iterator liItr = lis->begin(), liEnd = lis->end();
-         liItr != liEnd; ++liItr) {
-      LiveInterval *li = liItr->second;
-
-      if (TargetRegisterInfo::isVirtualRegister(li->reg) &&
-          !lis->intervalIsInOneMBB(*li)) {
-        intervals.push_back(li);
-      }
-    }
-
-    processIntervals();
-
-    intervals.clear();
-
-//     dbgs() << "----------------------------------------\n";
-//     lis->dump();
-//     dbgs() << "----------------------------------------\n";
-
-    dumpOddTerminators();
-
-    //exit(1);
-
-    return false;
-  }
-
-  void LoopSplitter::releaseMemory() {
-    fqn.clear();
-    intervals.clear();
-    loopRangeMap.clear();
-  }
-
-  void LoopSplitter::dumpOddTerminators() {
-    for (MachineFunction::iterator bbItr = mf->begin(), bbEnd = mf->end();
-         bbItr != bbEnd; ++bbItr) {
-      MachineBasicBlock *mbb = &*bbItr;
-      MachineBasicBlock *a = 0, *b = 0;
-      SmallVector<MachineOperand, 4> c;
-      if (tii->AnalyzeBranch(*mbb, a, b, c)) {
-        dbgs() << "MBB#" << mbb->getNumber() << " has multiway terminator.\n";
-        dbgs() << "  Terminators:\n";
-        for (MachineBasicBlock::iterator iItr = mbb->begin(), iEnd = mbb->end();
-             iItr != iEnd; ++iItr) {
-          MachineInstr *instr= &*iItr;
-          dbgs() << "    " << *instr << "";
-        }
-        dbgs() << "\n  Listed successors: [ ";
-        for (MachineBasicBlock::succ_iterator sItr = mbb->succ_begin(), sEnd = mbb->succ_end();
-             sItr != sEnd; ++sItr) {
-          MachineBasicBlock *succMBB = *sItr;
-          dbgs() << succMBB->getNumber() << " ";
-        }
-        dbgs() << "]\n\n";
-      }
-    }
-  }
-
-  void LoopSplitter::dumpLoopInfo(MachineLoop &loop) {
-    MachineBasicBlock &headerBlock = *loop.getHeader();
-    typedef SmallVector<MachineLoop::Edge, 8> ExitEdgesList;
-    ExitEdgesList exitEdges;
-    loop.getExitEdges(exitEdges);
-
-    dbgs() << "  Header: BB#" << headerBlock.getNumber() << ", Contains: [ ";
-    for (std::vector<MachineBasicBlock*>::const_iterator
-           subBlockItr = loop.getBlocks().begin(),
-           subBlockEnd = loop.getBlocks().end();
-         subBlockItr != subBlockEnd; ++subBlockItr) {
-      MachineBasicBlock &subBlock = **subBlockItr;
-      dbgs() << "BB#" << subBlock.getNumber() << " ";
-    }
-    dbgs() << "], Exit edges: [ ";
-    for (ExitEdgesList::iterator exitEdgeItr = exitEdges.begin(),
-                                 exitEdgeEnd = exitEdges.end();
-         exitEdgeItr != exitEdgeEnd; ++exitEdgeItr) {
-      MachineLoop::Edge &exitEdge = *exitEdgeItr;
-      dbgs() << "(MBB#" << exitEdge.first->getNumber()
-             << ", MBB#" << exitEdge.second->getNumber() << ") ";
-    }
-    dbgs() << "], Sub-Loop Headers: [ ";
-    for (MachineLoop::iterator subLoopItr = loop.begin(),
-                               subLoopEnd = loop.end();
-         subLoopItr != subLoopEnd; ++subLoopItr) {
-      MachineLoop &subLoop = **subLoopItr;
-      MachineBasicBlock &subLoopBlock = *subLoop.getHeader();
-      dbgs() << "BB#" << subLoopBlock.getNumber() << " ";
-    }
-    dbgs() << "]\n";
-  }
-
-  void LoopSplitter::updateTerminators(MachineBasicBlock &mbb) {
-    mbb.updateTerminator();
-
-    for (MachineBasicBlock::iterator miItr = mbb.begin(), miEnd = mbb.end();
-         miItr != miEnd; ++miItr) {
-      if (lis->isNotInMIMap(miItr)) {
-        lis->InsertMachineInstrInMaps(miItr);
-      }
-    }
-  }
-
-  bool LoopSplitter::canInsertPreHeader(MachineLoop &loop) {
-    MachineBasicBlock *header = loop.getHeader();
-    MachineBasicBlock *a = 0, *b = 0;
-    SmallVector<MachineOperand, 4> c;
-
-    for (MachineBasicBlock::pred_iterator pbItr = header->pred_begin(),
-                                          pbEnd = header->pred_end();
-         pbItr != pbEnd; ++pbItr) {
-      MachineBasicBlock *predBlock = *pbItr;
-      if (!!tii->AnalyzeBranch(*predBlock, a, b, c)) {
-        return false;
-      }
-    }
-
-    MachineFunction::iterator headerItr(header);
-    if (headerItr == mf->begin())
-      return true;
-    MachineBasicBlock *headerLayoutPred = llvm::prior(headerItr);
-    assert(headerLayoutPred != 0 && "Header should have layout pred.");
-
-    return (!tii->AnalyzeBranch(*headerLayoutPred, a, b, c));
-  }
-
-  MachineBasicBlock& LoopSplitter::insertPreHeader(MachineLoop &loop) {
-    assert(loop.getLoopPreheader() == 0 && "Loop already has preheader.");
-
-    MachineBasicBlock &header = *loop.getHeader();
-
-    // Save the preds - we'll need to update them once we insert the preheader.
-    typedef std::set<MachineBasicBlock*> HeaderPreds;
-    HeaderPreds headerPreds;
-
-    for (MachineBasicBlock::pred_iterator predItr = header.pred_begin(),
-                                          predEnd = header.pred_end();
-         predItr != predEnd; ++predItr) {
-      if (!loop.contains(*predItr))
-        headerPreds.insert(*predItr);
-    }
-
-    assert(!headerPreds.empty() && "No predecessors for header?");
-
-    //dbgs() << fqn << " MBB#" << header.getNumber() << " inserting preheader...";
-
-    MachineBasicBlock *preHeader =
-      mf->CreateMachineBasicBlock(header.getBasicBlock());
-
-    assert(preHeader != 0 && "Failed to create pre-header.");
-
-    mf->insert(header, preHeader);
-
-    for (HeaderPreds::iterator hpItr = headerPreds.begin(),
-                               hpEnd = headerPreds.end(); 
-         hpItr != hpEnd; ++hpItr) {
-      assert(*hpItr != 0 && "How'd a null predecessor get into this set?");
-      MachineBasicBlock &hp = **hpItr;
-      hp.ReplaceUsesOfBlockWith(&header, preHeader);
-    }
-    preHeader->addSuccessor(&header);
-
-    MachineBasicBlock *oldLayoutPred =
-      llvm::prior(MachineFunction::iterator(preHeader));
-    if (oldLayoutPred != 0) {
-      updateTerminators(*oldLayoutPred);
-    }
-
-    lis->InsertMBBInMaps(preHeader);
-
-    if (MachineLoop *parentLoop = loop.getParentLoop()) {
-      assert(parentLoop->getHeader() != loop.getHeader() &&
-             "Parent loop has same header?");
-      parentLoop->addBasicBlockToLoop(preHeader, mli->getBase());
-
-      // Invalidate all parent loop ranges.
-      while (parentLoop != 0) {
-        loopRangeMap.erase(parentLoop);
-        parentLoop = parentLoop->getParentLoop();
-      }
-    }
-
-    for (LiveIntervals::iterator liItr = lis->begin(),
-                                 liEnd = lis->end();
-         liItr != liEnd; ++liItr) {
-      LiveInterval &li = *liItr->second;
-
-      // Is this safe for physregs?
-      // TargetRegisterInfo::isPhysicalRegister(li.reg) ||
-      if (!lis->isLiveInToMBB(li, &header))
-        continue;
-
-      if (lis->isLiveInToMBB(li, preHeader)) {
-        assert(lis->isLiveOutOfMBB(li, preHeader) &&
-               "Range terminates in newly added preheader?");
-        continue;
-      }
-
-      bool insertRange = false;
-
-      for (MachineBasicBlock::pred_iterator predItr = preHeader->pred_begin(),
-                                            predEnd = preHeader->pred_end();
-           predItr != predEnd; ++predItr) {
-        MachineBasicBlock *predMBB = *predItr;
-        if (lis->isLiveOutOfMBB(li, predMBB)) {
-          insertRange = true;
-          break;
-        }
-      }
-
-      if (!insertRange)
-        continue;
-
-      SlotIndex newDefIdx = lis->getMBBStartIdx(preHeader);
-      assert(lis->getInstructionFromIndex(newDefIdx) == 0 &&
-             "PHI def index points at actual instruction.");
-      VNInfo *newVal = li.getNextValue(newDefIdx, 0, lis->getVNInfoAllocator());
-      li.addRange(LiveRange(lis->getMBBStartIdx(preHeader),
-                            lis->getMBBEndIdx(preHeader),
-                            newVal));
-    }
-
-
-    //dbgs() << "Dumping SlotIndexes:\n";
-    //sis->dump();
-
-    //dbgs() << "done. (Added MBB#" << preHeader->getNumber() << ")\n";
-
-    return *preHeader;
-  }
-
-  bool LoopSplitter::isCriticalEdge(MachineLoop::Edge &edge) {
-    assert(edge.first->succ_size() > 1 && "Non-sensical edge.");
-    if (edge.second->pred_size() > 1)
-      return true;
-    return false;
-  }
-
-  bool LoopSplitter::canSplitEdge(MachineLoop::Edge &edge) {
-    MachineFunction::iterator outBlockItr(edge.second);
-    if (outBlockItr == mf->begin())
-      return true;
-    MachineBasicBlock *outBlockLayoutPred = llvm::prior(outBlockItr);
-    assert(outBlockLayoutPred != 0 && "Should have a layout pred if out!=begin.");
-    MachineBasicBlock *a = 0, *b = 0;
-    SmallVector<MachineOperand, 4> c;
-    return (!tii->AnalyzeBranch(*outBlockLayoutPred, a, b, c) &&
-            !tii->AnalyzeBranch(*edge.first, a, b, c));
-  }
-
-  MachineBasicBlock& LoopSplitter::splitEdge(MachineLoop::Edge &edge,
-                                             MachineLoop &loop) {
-
-    MachineBasicBlock &inBlock = *edge.first;
-    MachineBasicBlock &outBlock = *edge.second;
-
-    assert((inBlock.succ_size() > 1) && (outBlock.pred_size() > 1) &&
-           "Splitting non-critical edge?");
-
-    //dbgs() << fqn << " Splitting edge (MBB#" << inBlock.getNumber()
-    //       << " -> MBB#" << outBlock.getNumber() << ")...";
-
-    MachineBasicBlock *splitBlock =
-      mf->CreateMachineBasicBlock();
-
-    assert(splitBlock != 0 && "Failed to create split block.");
-
-    mf->insert(&outBlock, splitBlock);
-
-    inBlock.ReplaceUsesOfBlockWith(&outBlock, splitBlock);
-    splitBlock->addSuccessor(&outBlock);
-
-    MachineBasicBlock *oldLayoutPred =
-      llvm::prior(MachineFunction::iterator(splitBlock));
-    if (oldLayoutPred != 0) {
-      updateTerminators(*oldLayoutPred);
-    }
-
-    lis->InsertMBBInMaps(splitBlock);
-
-    loopRangeMap.erase(&loop);
-
-    MachineLoop *splitParentLoop = loop.getParentLoop();
-    while (splitParentLoop != 0 &&
-           !splitParentLoop->contains(&outBlock)) {
-      splitParentLoop = splitParentLoop->getParentLoop();
-    }
-
-    if (splitParentLoop != 0) {
-      assert(splitParentLoop->contains(&loop) &&
-             "Split-block parent doesn't contain original loop?");
-      splitParentLoop->addBasicBlockToLoop(splitBlock, mli->getBase());
-      
-      // Invalidate all parent loop ranges.
-      while (splitParentLoop != 0) {
-        loopRangeMap.erase(splitParentLoop);
-        splitParentLoop = splitParentLoop->getParentLoop();
-      }
-    }
-
-
-    for (LiveIntervals::iterator liItr = lis->begin(),
-                                 liEnd = lis->end();
-         liItr != liEnd; ++liItr) {
-      LiveInterval &li = *liItr->second;
-      bool intersects = lis->isLiveOutOfMBB(li, &inBlock) &&
-                       lis->isLiveInToMBB(li, &outBlock);
-      if (lis->isLiveInToMBB(li, splitBlock)) {
-        if (!intersects) {
-          li.removeRange(lis->getMBBStartIdx(splitBlock),
-                         lis->getMBBEndIdx(splitBlock), true);
-        }
-      } else if (intersects) {
-        SlotIndex newDefIdx = lis->getMBBStartIdx(splitBlock);
-        assert(lis->getInstructionFromIndex(newDefIdx) == 0 &&
-               "PHI def index points at actual instruction.");
-        VNInfo *newVal = li.getNextValue(newDefIdx, 0,
-                                         lis->getVNInfoAllocator());
-        li.addRange(LiveRange(lis->getMBBStartIdx(splitBlock),
-                              lis->getMBBEndIdx(splitBlock),
-                              newVal));
-      }
-    }
-
-    //dbgs() << "done. (Added MBB#" << splitBlock->getNumber() << ")\n";
-
-    return *splitBlock;
-  }
-
-  LoopSplitter::LoopRanges& LoopSplitter::getLoopRanges(MachineLoop &loop) {
-    typedef std::set<MachineBasicBlock*, StartSlotComparator> LoopMBBSet;
-    LoopRangeMap::iterator lrItr = loopRangeMap.find(&loop);
-    if (lrItr == loopRangeMap.end()) {
-      LoopMBBSet loopMBBs((StartSlotComparator(*lis))); 
-      std::copy(loop.block_begin(), loop.block_end(),
-                std::inserter(loopMBBs, loopMBBs.begin()));
-
-      assert(!loopMBBs.empty() && "No blocks in loop?");
-
-      LoopRanges &loopRanges = loopRangeMap[&loop];
-      assert(loopRanges.empty() && "Loop encountered but not processed?");
-      SlotIndex oldEnd = lis->getMBBEndIdx(*loopMBBs.begin());
-      loopRanges.push_back(
-        std::make_pair(lis->getMBBStartIdx(*loopMBBs.begin()),
-                       lis->getInvalidIndex()));
-      for (LoopMBBSet::iterator curBlockItr = llvm::next(loopMBBs.begin()),
-                                curBlockEnd = loopMBBs.end();
-           curBlockItr != curBlockEnd; ++curBlockItr) {
-        SlotIndex newStart = lis->getMBBStartIdx(*curBlockItr);
-        if (newStart != oldEnd) {
-          loopRanges.back().second = oldEnd;
-          loopRanges.push_back(std::make_pair(newStart,
-                                              lis->getInvalidIndex()));
-        }
-        oldEnd = lis->getMBBEndIdx(*curBlockItr);
-      }
-
-      loopRanges.back().second =
-        lis->getMBBEndIdx(*llvm::prior(loopMBBs.end()));
-
-      return loopRanges;
-    }
-    return lrItr->second;
-  }
-
-  std::pair<bool, LoopSplitter::SlotPair> LoopSplitter::getLoopSubRange(
-                                                           const LiveRange &lr,
-                                                           MachineLoop &loop) {
-    LoopRanges &loopRanges = getLoopRanges(loop);
-    LoopRanges::iterator lrItr = loopRanges.begin(),
-                         lrEnd = loopRanges.end();
-    while (lrItr != lrEnd && lr.start >= lrItr->second) {
-      ++lrItr;
-    }
-
-    if (lrItr == lrEnd) {
-      SlotIndex invalid = lis->getInvalidIndex();
-      return std::make_pair(false, SlotPair(invalid, invalid));
-    }
-
-    SlotIndex srStart(lr.start < lrItr->first ? lrItr->first : lr.start);
-    SlotIndex srEnd(lr.end > lrItr->second ? lrItr->second : lr.end);
-
-    return std::make_pair(true, SlotPair(srStart, srEnd));      
-  }
-
-  void LoopSplitter::dumpLoopRanges(MachineLoop &loop) {
-    LoopRanges &loopRanges = getLoopRanges(loop);
-    dbgs() << "For loop MBB#" << loop.getHeader()->getNumber() << ", subranges are: [ ";
-    for (LoopRanges::iterator lrItr = loopRanges.begin(), lrEnd = loopRanges.end();
-         lrItr != lrEnd; ++lrItr) {
-      dbgs() << "[" << lrItr->first << ", " << lrItr->second << ") ";
-    }
-    dbgs() << "]\n";
-  }
-
-  void LoopSplitter::processHeader(LoopSplit &split) {
-    MachineBasicBlock &header = *split.getLoop().getHeader();
-    //dbgs() << "  Processing loop header BB#" << header.getNumber() << "\n";
-
-    if (!lis->isLiveInToMBB(split.getLI(), &header))
-      return; // Not live in, but nothing wrong so far.
-
-    MachineBasicBlock *preHeader = split.getLoop().getLoopPreheader();
-    if (!preHeader) {
-
-      if (!canInsertPreHeader(split.getLoop())) {
-        split.invalidate();
-        return; // Couldn't insert a pre-header. Bail on this interval.
-      }
-
-      for (MachineBasicBlock::pred_iterator predItr = header.pred_begin(),
-                                            predEnd = header.pred_end();
-           predItr != predEnd; ++predItr) {
-        if (lis->isLiveOutOfMBB(split.getLI(), *predItr)) {
-          split.splitIncoming();
-          break;
-        }
-      }
-    } else if (lis->isLiveOutOfMBB(split.getLI(), preHeader)) {
-      split.splitIncoming();
-    }
-  }
-
-  void LoopSplitter::processLoopExits(LoopSplit &split) {
-    typedef SmallVector<MachineLoop::Edge, 8> ExitEdgesList;
-    ExitEdgesList exitEdges;
-    split.getLoop().getExitEdges(exitEdges);
-
-    //dbgs() << "  Processing loop exits:\n";
-
-    for (ExitEdgesList::iterator exitEdgeItr = exitEdges.begin(),
-                                 exitEdgeEnd = exitEdges.end();
-         exitEdgeItr != exitEdgeEnd; ++exitEdgeItr) {
-      MachineLoop::Edge exitEdge = *exitEdgeItr;
-
-      LiveRange *outRange =
-        split.getLI().getLiveRangeContaining(lis->getMBBStartIdx(exitEdge.second));
-
-      if (outRange != 0) {
-        if (isCriticalEdge(exitEdge) && !canSplitEdge(exitEdge)) {
-          split.invalidate();
-          return;
-        }
-
-        split.splitOutgoing(exitEdge);
-      }
-    }
-  }
-
-  void LoopSplitter::processLoopUses(LoopSplit &split) {
-    std::set<MachineInstr*> processed;
-
-    for (MachineRegisterInfo::reg_iterator
-           rItr = mri->reg_begin(split.getLI().reg),
-           rEnd = mri->reg_end();
-      rItr != rEnd; ++rItr) {
-      MachineInstr &instr = *rItr;
-      if (split.getLoop().contains(&instr) && processed.count(&instr) == 0) {
-        split.addLoopInstr(&instr);
-        processed.insert(&instr);
-      }
-    }
-
-    //dbgs() << "  Rewriting reg" << li.reg << " to reg" << newLI->reg
-    //       << " in blocks [ ";
-    //dbgs() << "]\n";
-  }
-
-  bool LoopSplitter::splitOverLoop(LiveInterval &li, MachineLoop &loop) {
-    assert(TargetRegisterInfo::isVirtualRegister(li.reg) &&
-           "Attempt to split physical register.");
-
-    LoopSplit split(*this, li, loop);
-    processHeader(split);
-    if (split.isValid())
-      processLoopExits(split);
-    if (split.isValid())
-      processLoopUses(split);
-    if (split.isValid() /* && split.isWorthwhile() */) {
-      split.apply();
-      DEBUG(dbgs() << "Success.\n");
-      return true;
-    }
-    DEBUG(dbgs() << "Failed.\n");
-    return false;
-  }
-
-  void LoopSplitter::processInterval(LiveInterval &li) {
-    std::deque<MachineLoop*> loops;
-    std::copy(mli->begin(), mli->end(), std::back_inserter(loops));
-
-    while (!loops.empty()) {
-      MachineLoop &loop = *loops.front();
-      loops.pop_front();
-      DEBUG(
-        dbgs() << fqn << " reg" << li.reg << " " << li.weight << " BB#"
-               << loop.getHeader()->getNumber() << " ";
-      );
-      if (!splitOverLoop(li, loop)) {
-        // Couldn't split over outer loop, schedule sub-loops to be checked.
-	std::copy(loop.begin(), loop.end(), std::back_inserter(loops));
-      }
-    }
-  }
-
-  void LoopSplitter::processIntervals() {
-    while (!intervals.empty()) {
-      LiveInterval &li = *intervals.front();
-      intervals.pop_front();
-
-      assert(!lis->intervalIsInOneMBB(li) &&
-             "Single interval in process worklist.");
-
-      processInterval(li);      
-    }
-  }
-
-}
diff --git a/lib/CodeGen/Splitter.h b/lib/CodeGen/Splitter.h
deleted file mode 100644
index 9fb1b8b30139..000000000000
--- a/lib/CodeGen/Splitter.h
+++ /dev/null
@@ -1,101 +0,0 @@
-//===-- llvm/CodeGen/Splitter.h - Splitter -*- C++ -*----------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_SPLITTER_H
-#define LLVM_CODEGEN_SPLITTER_H
-
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/SlotIndexes.h"
-
-#include <deque>
-#include <map>
-#include <string>
-#include <vector>
-
-namespace llvm {
-
-  class LiveInterval;
-  class LiveIntervals;
-  struct LiveRange;
-  class LoopSplit;
-  class MachineDominatorTree;
-  class MachineRegisterInfo;
-  class SlotIndexes;
-  class TargetInstrInfo;
-  class VNInfo;
-
-  class LoopSplitter : public MachineFunctionPass {
-    friend class LoopSplit;
-  public:
-    static char ID;
-
-    LoopSplitter() : MachineFunctionPass(ID) {
-      initializeLoopSplitterPass(*PassRegistry::getPassRegistry());
-    }
-
-    virtual void getAnalysisUsage(AnalysisUsage &au) const;
-
-    virtual bool runOnMachineFunction(MachineFunction &fn);
-
-    virtual void releaseMemory();
-
-
-  private:
-
-    MachineFunction *mf;
-    LiveIntervals *lis;
-    MachineLoopInfo *mli;
-    MachineRegisterInfo *mri;
-    MachineDominatorTree *mdt;
-    SlotIndexes *sis;
-    const TargetInstrInfo *tii;
-    const TargetRegisterInfo *tri;
-
-    std::string fqn;
-    std::deque<LiveInterval*> intervals;
-
-    typedef std::pair<SlotIndex, SlotIndex> SlotPair;
-    typedef std::vector<SlotPair> LoopRanges;
-    typedef std::map<MachineLoop*, LoopRanges> LoopRangeMap;
-    LoopRangeMap loopRangeMap;
-
-    void dumpLoopInfo(MachineLoop &loop);
-
-    void dumpOddTerminators();
-
-    void updateTerminators(MachineBasicBlock &mbb);
-
-    bool canInsertPreHeader(MachineLoop &loop);
-    MachineBasicBlock& insertPreHeader(MachineLoop &loop);
-
-    bool isCriticalEdge(MachineLoop::Edge &edge);
-    bool canSplitEdge(MachineLoop::Edge &edge);
-    MachineBasicBlock& splitEdge(MachineLoop::Edge &edge, MachineLoop &loop);
-
-    LoopRanges& getLoopRanges(MachineLoop &loop);
-    std::pair<bool, SlotPair> getLoopSubRange(const LiveRange &lr,
-                                              MachineLoop &loop);
-
-    void dumpLoopRanges(MachineLoop &loop);
-
-    void processHeader(LoopSplit &split);
-    void processLoopExits(LoopSplit &split);
-    void processLoopUses(LoopSplit &split);
-
-    bool splitOverLoop(LiveInterval &li, MachineLoop &loop);
-
-    void processInterval(LiveInterval &li);
-
-    void processIntervals();
-  };
-
-}
-
-#endif
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index 1f0e5a2711ae..43a6ad8c97a4 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -123,16 +123,11 @@ bool StackProtector::RequiresStackProtector() const {
           // protectors.
           return true;
 
-        if (ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) {
-          // We apparently only care about character arrays.
-          if (!AT->getElementType()->isIntegerTy(8))
-            continue;
-
+        if (ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType()))
           // If an array has more than SSPBufferSize bytes of allocated space,
           // then we emit stack protectors.
           if (SSPBufferSize <= TD->getTypeAllocSize(AT))
             return true;
-        }
       }
   }
 
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index 57cbe1ba5960..1e940b1d0711 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "stackcoloring"
-#include "VirtRegMap.h"
 #include "llvm/Function.h"
 #include "llvm/Module.h"
 #include "llvm/CodeGen/Passes.h"
@@ -40,29 +39,17 @@ DisableSharing("no-stack-slot-sharing",
              cl::init(false), cl::Hidden,
              cl::desc("Suppress slot sharing during stack coloring"));
 
-static cl::opt<bool>
-ColorWithRegsOpt("color-ss-with-regs",
-                 cl::init(false), cl::Hidden,
-                 cl::desc("Color stack slots with free registers"));
-
-
 static cl::opt<int> DCELimit("ssc-dce-limit", cl::init(-1), cl::Hidden);
 
 STATISTIC(NumEliminated, "Number of stack slots eliminated due to coloring");
-STATISTIC(NumRegRepl,    "Number of stack slot refs replaced with reg refs");
-STATISTIC(NumLoadElim,   "Number of loads eliminated");
-STATISTIC(NumStoreElim,  "Number of stores eliminated");
 STATISTIC(NumDead,       "Number of trivially dead stack accesses eliminated");
 
 namespace {
   class StackSlotColoring : public MachineFunctionPass {
     bool ColorWithRegs;
     LiveStacks* LS;
-    VirtRegMap* VRM;
     MachineFrameInfo *MFI;
-    MachineRegisterInfo *MRI;
     const TargetInstrInfo  *TII;
-    const TargetRegisterInfo *TRI;
     const MachineLoopInfo *loopInfo;
 
     // SSIntervals - Spill slot intervals.
@@ -98,18 +85,12 @@ namespace {
       MachineFunctionPass(ID), ColorWithRegs(false), NextColor(-1) {
         initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
       }
-    StackSlotColoring(bool RegColor) :
-      MachineFunctionPass(ID), ColorWithRegs(RegColor), NextColor(-1) {
-        initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
-      }
-    
+
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
       AU.addRequired<SlotIndexes>();
       AU.addPreserved<SlotIndexes>();
       AU.addRequired<LiveStacks>();
-      AU.addRequired<VirtRegMap>();
-      AU.addPreserved<VirtRegMap>();      
       AU.addRequired<MachineLoopInfo>();
       AU.addPreserved<MachineLoopInfo>();
       AU.addPreservedID(MachineDominatorsID);
@@ -117,9 +98,6 @@ namespace {
     }
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
-    virtual const char* getPassName() const {
-      return "Stack Slot Coloring";
-    }
 
   private:
     void InitializeSlots();
@@ -127,41 +105,23 @@ namespace {
     bool OverlapWithAssignments(LiveInterval *li, int Color) const;
     int ColorSlot(LiveInterval *li);
     bool ColorSlots(MachineFunction &MF);
-    bool ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping,
-                                SmallVector<SmallVector<int, 4>, 16> &RevMap,
-                                BitVector &SlotIsReg);
     void RewriteInstruction(MachineInstr *MI, int OldFI, int NewFI,
                             MachineFunction &MF);
-    bool PropagateBackward(MachineBasicBlock::iterator MII,
-                           MachineBasicBlock *MBB,
-                           unsigned OldReg, unsigned NewReg);
-    bool PropagateForward(MachineBasicBlock::iterator MII,
-                          MachineBasicBlock *MBB,
-                          unsigned OldReg, unsigned NewReg);
-    void UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
-                                    unsigned Reg, const TargetRegisterClass *RC,
-                                    SmallSet<unsigned, 4> &Defs,
-                                    MachineFunction &MF);
-    bool AllMemRefsCanBeUnfolded(int SS);
     bool RemoveDeadStores(MachineBasicBlock* MBB);
   };
 } // end anonymous namespace
 
 char StackSlotColoring::ID = 0;
+char &llvm::StackSlotColoringID = StackSlotColoring::ID;
 
 INITIALIZE_PASS_BEGIN(StackSlotColoring, "stack-slot-coloring",
                 "Stack Slot Coloring", false, false)
 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
 INITIALIZE_PASS_DEPENDENCY(LiveStacks)
-INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
 INITIALIZE_PASS_END(StackSlotColoring, "stack-slot-coloring",
                 "Stack Slot Coloring", false, false)
 
-FunctionPass *llvm::createStackSlotColoringPass(bool RegColor) {
-  return new StackSlotColoring(RegColor);
-}
-
 namespace {
   // IntervalSorter - Comparison predicate that sort live intervals by
   // their weight.
@@ -248,79 +208,6 @@ StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const {
   return false;
 }
 
-/// ColorSlotsWithFreeRegs - If there are any free registers available, try
-/// replacing spill slots references with registers instead.
-bool
-StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping,
-                                   SmallVector<SmallVector<int, 4>, 16> &RevMap,
-                                   BitVector &SlotIsReg) {
-  if (!(ColorWithRegs || ColorWithRegsOpt) || !VRM->HasUnusedRegisters())
-    return false;
-
-  bool Changed = false;
-  DEBUG(dbgs() << "Assigning unused registers to spill slots:\n");
-  for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
-    LiveInterval *li = SSIntervals[i];
-    int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
-    if (!UsedColors[SS] || li->weight < 20)
-      // If the weight is < 20, i.e. two references in a loop with depth 1,
-      // don't bother with it.
-      continue;
-
-    // These slots allow to share the same registers.
-    bool AllColored = true;
-    SmallVector<unsigned, 4> ColoredRegs;
-    for (unsigned j = 0, ee = RevMap[SS].size(); j != ee; ++j) {
-      int RSS = RevMap[SS][j];
-      const TargetRegisterClass *RC = LS->getIntervalRegClass(RSS);
-      // If it's not colored to another stack slot, try coloring it
-      // to a "free" register.
-      if (!RC) {
-        AllColored = false;
-        continue;
-      }
-      unsigned Reg = VRM->getFirstUnusedRegister(RC);
-      if (!Reg) {
-        AllColored = false;
-        continue;
-      }
-      if (!AllMemRefsCanBeUnfolded(RSS)) {
-        AllColored = false;
-        continue;
-      } else {
-        DEBUG(dbgs() << "Assigning fi#" << RSS << " to "
-                     << TRI->getName(Reg) << '\n');
-        ColoredRegs.push_back(Reg);
-        SlotMapping[RSS] = Reg;
-        SlotIsReg.set(RSS);
-        Changed = true;
-      }
-    }
-
-    // Register and its sub-registers are no longer free.
-    while (!ColoredRegs.empty()) {
-      unsigned Reg = ColoredRegs.back();
-      ColoredRegs.pop_back();
-      VRM->setRegisterUsed(Reg);
-      // If reg is a callee-saved register, it will have to be spilled in
-      // the prologue.
-      MRI->setPhysRegUsed(Reg);
-      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
-        VRM->setRegisterUsed(*AS);
-        MRI->setPhysRegUsed(*AS);
-      }
-    }
-    // This spill slot is dead after the rewrites
-    if (AllColored) {
-      MFI->RemoveStackObject(SS);
-      ++NumEliminated;
-    }
-  }
-  DEBUG(dbgs() << '\n');
-
-  return Changed;
-}
-
 /// ColorSlot - Assign a "color" (stack slot) to the specified stack slot.
 ///
 int StackSlotColoring::ColorSlot(LiveInterval *li) {
@@ -372,7 +259,6 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
   SmallVector<int, 16> SlotMapping(NumObjs, -1);
   SmallVector<float, 16> SlotWeights(NumObjs, 0.0);
   SmallVector<SmallVector<int, 4>, 16> RevMap(NumObjs);
-  BitVector SlotIsReg(NumObjs);
   BitVector UsedColors(NumObjs);
 
   DEBUG(dbgs() << "Color spill slot intervals:\n");
@@ -404,31 +290,19 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
   DEBUG(dbgs() << '\n');
 #endif
 
-  // Can we "color" a stack slot with a unused register?
-  Changed |= ColorSlotsWithFreeRegs(SlotMapping, RevMap, SlotIsReg);
-
   if (!Changed)
     return false;
 
   // Rewrite all MO_FrameIndex operands.
   SmallVector<SmallSet<unsigned, 4>, 4> NewDefs(MF.getNumBlockIDs());
   for (unsigned SS = 0, SE = SSRefs.size(); SS != SE; ++SS) {
-    bool isReg = SlotIsReg[SS];
     int NewFI = SlotMapping[SS];
-    if (NewFI == -1 || (NewFI == (int)SS && !isReg))
+    if (NewFI == -1 || (NewFI == (int)SS))
       continue;
 
-    const TargetRegisterClass *RC = LS->getIntervalRegClass(SS);
     SmallVector<MachineInstr*, 8> &RefMIs = SSRefs[SS];
     for (unsigned i = 0, e = RefMIs.size(); i != e; ++i)
-      if (!isReg)
-        RewriteInstruction(RefMIs[i], SS, NewFI, MF);
-      else {
-        // Rewrite to use a register instead.
-        unsigned MBBId = RefMIs[i]->getParent()->getNumber();
-        SmallSet<unsigned, 4> &Defs = NewDefs[MBBId];
-        UnfoldAndRewriteInstruction(RefMIs[i], SS, NewFI, RC, Defs, MF);
-      }
+      RewriteInstruction(RefMIs[i], SS, NewFI, MF);
   }
 
   // Delete unused stack slots.
@@ -441,28 +315,6 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
   return true;
 }
 
-/// AllMemRefsCanBeUnfolded - Return true if all references of the specified
-/// spill slot index can be unfolded.
-bool StackSlotColoring::AllMemRefsCanBeUnfolded(int SS) {
-  SmallVector<MachineInstr*, 8> &RefMIs = SSRefs[SS];
-  for (unsigned i = 0, e = RefMIs.size(); i != e; ++i) {
-    MachineInstr *MI = RefMIs[i];
-    if (TII->isLoadFromStackSlot(MI, SS) ||
-        TII->isStoreToStackSlot(MI, SS))
-      // Restore and spill will become copies.
-      return true;
-    if (!TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), false, false))
-      return false;
-    for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
-      MachineOperand &MO = MI->getOperand(j);
-      if (MO.isFI() && MO.getIndex() != SS)
-        // If it uses another frameindex, we can, currently* unfold it.
-        return false;
-    }
-  }
-  return true;
-}
-
 /// RewriteInstruction - Rewrite specified instruction by replacing references
 /// to old frame index with new one.
 void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI,
@@ -489,179 +341,6 @@ void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI,
       (*I)->setValue(NewSV);
 }
 
-/// PropagateBackward - Traverse backward and look for the definition of
-/// OldReg. If it can successfully update all of the references with NewReg,
-/// do so and return true.
-bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII,
-                                          MachineBasicBlock *MBB,
-                                          unsigned OldReg, unsigned NewReg) {
-  if (MII == MBB->begin())
-    return false;
-
-  SmallVector<MachineOperand*, 4> Uses;
-  SmallVector<MachineOperand*, 4> Refs;
-  while (--MII != MBB->begin()) {
-    bool FoundDef = false;  // Not counting 2address def.
-
-    Uses.clear();
-    const MCInstrDesc &MCID = MII->getDesc();
-    for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = MII->getOperand(i);
-      if (!MO.isReg())
-        continue;
-      unsigned Reg = MO.getReg();
-      if (Reg == 0)
-        continue;
-      if (Reg == OldReg) {
-        if (MO.isImplicit())
-          return false;
-
-        // Abort the use is actually a sub-register def. We don't have enough
-        // information to figure out if it is really legal.
-        if (MO.getSubReg() || MII->isSubregToReg())
-          return false;
-
-        const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI);
-        if (RC && !RC->contains(NewReg))
-          return false;
-
-        if (MO.isUse()) {
-          Uses.push_back(&MO);
-        } else {
-          Refs.push_back(&MO);
-          if (!MII->isRegTiedToUseOperand(i))
-            FoundDef = true;
-        }
-      } else if (TRI->regsOverlap(Reg, NewReg)) {
-        return false;
-      } else if (TRI->regsOverlap(Reg, OldReg)) {
-        if (!MO.isUse() || !MO.isKill())
-          return false;
-      }
-    }
-
-    if (FoundDef) {
-      // Found non-two-address def. Stop here.
-      for (unsigned i = 0, e = Refs.size(); i != e; ++i)
-        Refs[i]->setReg(NewReg);
-      return true;
-    }
-
-    // Two-address uses must be updated as well.
-    for (unsigned i = 0, e = Uses.size(); i != e; ++i)
-      Refs.push_back(Uses[i]);
-  }
-  return false;
-}
-
-/// PropagateForward - Traverse forward and look for the kill of OldReg. If
-/// it can successfully update all of the uses with NewReg, do so and
-/// return true.
-bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII,
-                                         MachineBasicBlock *MBB,
-                                         unsigned OldReg, unsigned NewReg) {
-  if (MII == MBB->end())
-    return false;
-
-  SmallVector<MachineOperand*, 4> Uses;
-  while (++MII != MBB->end()) {
-    bool FoundKill = false;
-    const MCInstrDesc &MCID = MII->getDesc();
-    for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = MII->getOperand(i);
-      if (!MO.isReg())
-        continue;
-      unsigned Reg = MO.getReg();
-      if (Reg == 0)
-        continue;
-      if (Reg == OldReg) {
-        if (MO.isDef() || MO.isImplicit())
-          return false;
-
-        // Abort the use is actually a sub-register use. We don't have enough
-        // information to figure out if it is really legal.
-        if (MO.getSubReg())
-          return false;
-
-        const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI);
-        if (RC && !RC->contains(NewReg))
-          return false;
-        if (MO.isKill())
-          FoundKill = true;
-
-        Uses.push_back(&MO);
-      } else if (TRI->regsOverlap(Reg, NewReg) ||
-                 TRI->regsOverlap(Reg, OldReg))
-        return false;
-    }
-    if (FoundKill) {
-      for (unsigned i = 0, e = Uses.size(); i != e; ++i)
-        Uses[i]->setReg(NewReg);
-      return true;
-    }
-  }
-  return false;
-}
-
-/// UnfoldAndRewriteInstruction - Rewrite specified instruction by unfolding
-/// folded memory references and replacing those references with register
-/// references instead.
-void
-StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
-                                               unsigned Reg,
-                                               const TargetRegisterClass *RC,
-                                               SmallSet<unsigned, 4> &Defs,
-                                               MachineFunction &MF) {
-  MachineBasicBlock *MBB = MI->getParent();
-  if (unsigned DstReg = TII->isLoadFromStackSlot(MI, OldFI)) {
-    if (PropagateForward(MI, MBB, DstReg, Reg)) {
-      DEBUG(dbgs() << "Eliminated load: ");
-      DEBUG(MI->dump());
-      ++NumLoadElim;
-    } else {
-      BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY),
-              DstReg).addReg(Reg);
-      ++NumRegRepl;
-    }
-
-    if (!Defs.count(Reg)) {
-      // If this is the first use of Reg in this MBB and it wasn't previously
-      // defined in MBB, add it to livein.
-      MBB->addLiveIn(Reg);
-      Defs.insert(Reg);
-    }
-  } else if (unsigned SrcReg = TII->isStoreToStackSlot(MI, OldFI)) {
-    if (MI->killsRegister(SrcReg) && PropagateBackward(MI, MBB, SrcReg, Reg)) {
-      DEBUG(dbgs() << "Eliminated store: ");
-      DEBUG(MI->dump());
-      ++NumStoreElim;
-    } else {
-      BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY), Reg)
-        .addReg(SrcReg);
-      ++NumRegRepl;
-    }
-
-    // Remember reg has been defined in MBB.
-    Defs.insert(Reg);
-  } else {
-    SmallVector<MachineInstr*, 4> NewMIs;
-    bool Success = TII->unfoldMemoryOperand(MF, MI, Reg, false, false, NewMIs);
-    (void)Success; // Silence compiler warning.
-    assert(Success && "Failed to unfold!");
-    MachineInstr *NewMI = NewMIs[0];
-    MBB->insert(MI, NewMI);
-    ++NumRegRepl;
-
-    if (NewMI->readsRegister(Reg)) {
-      if (!Defs.count(Reg))
-        // If this is the first use of Reg in this MBB and it wasn't previously
-        // defined in MBB, add it to livein.
-        MBB->addLiveIn(Reg);
-      Defs.insert(Reg);
-    }
-  }
-  MBB->erase(MI);
-}
 
 /// RemoveDeadStores - Scan through a basic block and look for loads followed
 /// by stores.  If they're both using the same stack slot, then the store is
@@ -679,33 +358,33 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
        I != E; ++I) {
     if (DCELimit != -1 && (int)NumDead >= DCELimit)
       break;
-    
+
     MachineBasicBlock::iterator NextMI = llvm::next(I);
     if (NextMI == MBB->end()) continue;
-    
+
     int FirstSS, SecondSS;
     unsigned LoadReg = 0;
     unsigned StoreReg = 0;
     if (!(LoadReg = TII->isLoadFromStackSlot(I, FirstSS))) continue;
     if (!(StoreReg = TII->isStoreToStackSlot(NextMI, SecondSS))) continue;
     if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1) continue;
-    
+
     ++NumDead;
     changed = true;
-    
+
     if (NextMI->findRegisterUseOperandIdx(LoadReg, true, 0) != -1) {
       ++NumDead;
       toErase.push_back(I);
     }
-    
+
     toErase.push_back(NextMI);
     ++I;
   }
-  
+
   for (SmallVector<MachineInstr*, 4>::iterator I = toErase.begin(),
        E = toErase.end(); I != E; ++I)
     (*I)->eraseFromParent();
-  
+
   return changed;
 }
 
@@ -713,32 +392,27 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
 bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
   DEBUG({
       dbgs() << "********** Stack Slot Coloring **********\n"
-             << "********** Function: " 
+             << "********** Function: "
              << MF.getFunction()->getName() << '\n';
     });
 
   MFI = MF.getFrameInfo();
-  MRI = &MF.getRegInfo(); 
   TII = MF.getTarget().getInstrInfo();
-  TRI = MF.getTarget().getRegisterInfo();
   LS = &getAnalysis<LiveStacks>();
-  VRM = &getAnalysis<VirtRegMap>();
   loopInfo = &getAnalysis<MachineLoopInfo>();
 
   bool Changed = false;
 
   unsigned NumSlots = LS->getNumIntervals();
-  if (NumSlots < 2) {
-    if (NumSlots == 0 || !VRM->HasUnusedRegisters())
-      // Nothing to do!
-      return false;
-  }
+  if (NumSlots == 0)
+    // Nothing to do!
+    return false;
 
   // If there are calls to setjmp or sigsetjmp, don't perform stack slot
   // coloring. The stack could be modified before the longjmp is executed,
   // resulting in the wrong value being used afterwards. (See
   // <rdar://problem/8007500>.)
-  if (MF.callsSetJmp())
+  if (MF.exposesReturnsTwice())
     return false;
 
   // Gather spill slot references
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
index 260cc0ee50a5..c6fdc7382435 100644
--- a/lib/CodeGen/StrongPHIElimination.cpp
+++ b/lib/CodeGen/StrongPHIElimination.cpp
@@ -228,7 +228,6 @@ static MachineOperand *findLastUse(MachineBasicBlock *MBB, unsigned Reg) {
         return &MO;
     }
   }
-  return NULL;
 }
 
 bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) {
@@ -390,12 +389,10 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) {
     MachineOperand *LastUse = findLastUse(MBB, SrcReg);
     assert(LastUse);
     SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent());
-    SrcLI.removeRange(LastUseIndex.getDefIndex(), LI->getMBBEndIdx(MBB));
+    SrcLI.removeRange(LastUseIndex.getRegSlot(), LI->getMBBEndIdx(MBB));
     LastUse->setIsKill(true);
   }
 
-  LI->renumber();
-
   Allocator.Reset();
   RegNodeMap.clear();
   PHISrcDefs.clear();
@@ -745,7 +742,7 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
 
     // Set the phi-def flag for the VN at this PHI.
     SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
-    VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getDefIndex());
+    VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getRegSlot());
     assert(DestVNI);
     DestVNI->setIsPHIDef(true);
   
@@ -756,7 +753,7 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
     SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
     DestVNI->def = MBBStartIndex;
     DestLI.addRange(LiveRange(MBBStartIndex,
-                              PHIIndex.getDefIndex(),
+                              PHIIndex.getRegSlot(),
                               DestVNI));
     return;
   }
@@ -779,22 +776,21 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
   SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
   SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr);
   VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex,
-                                        CopyInstr,
                                         LI->getVNInfoAllocator());
   CopyVNI->setIsPHIDef(true);
   CopyLI.addRange(LiveRange(MBBStartIndex,
-                            DestCopyIndex.getDefIndex(),
+                            DestCopyIndex.getRegSlot(),
                             CopyVNI));
 
   // Adjust DestReg's live interval to adjust for its new definition at
   // CopyInstr.
   LiveInterval &DestLI = LI->getOrCreateInterval(DestReg);
   SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
-  DestLI.removeRange(PHIIndex.getDefIndex(), DestCopyIndex.getDefIndex());
+  DestLI.removeRange(PHIIndex.getRegSlot(), DestCopyIndex.getRegSlot());
 
-  VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getDefIndex());
+  VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot());
   assert(DestVNI);
-  DestVNI->def = DestCopyIndex.getDefIndex();
+  DestVNI->def = DestCopyIndex.getRegSlot();
 
   InsertedDestCopies[CopyReg] = CopyInstr;
 }
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index 3a6211a0f3e6..8ebfbcae785b 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -56,10 +56,10 @@ typedef std::vector<std::pair<MachineBasicBlock*,unsigned> > AvailableValsTy;
 namespace {
   /// TailDuplicatePass - Perform tail duplication.
   class TailDuplicatePass : public MachineFunctionPass {
-    bool PreRegAlloc;
     const TargetInstrInfo *TII;
     MachineModuleInfo *MMI;
     MachineRegisterInfo *MRI;
+    bool PreRegAlloc;
 
     // SSAUpdateVRs - A list of virtual registers for which to update SSA form.
     SmallVector<unsigned, 16> SSAUpdateVRs;
@@ -70,11 +70,10 @@ namespace {
 
   public:
     static char ID;
-    explicit TailDuplicatePass(bool PreRA) :
-      MachineFunctionPass(ID), PreRegAlloc(PreRA) {}
+    explicit TailDuplicatePass() :
+      MachineFunctionPass(ID), PreRegAlloc(false) {}
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
-    virtual const char *getPassName() const { return "Tail Duplication"; }
 
   private:
     void AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
@@ -118,14 +117,16 @@ namespace {
   char TailDuplicatePass::ID = 0;
 }
 
-FunctionPass *llvm::createTailDuplicatePass(bool PreRegAlloc) {
-  return new TailDuplicatePass(PreRegAlloc);
-}
+char &llvm::TailDuplicateID = TailDuplicatePass::ID;
+
+INITIALIZE_PASS(TailDuplicatePass, "tailduplication", "Tail Duplication",
+                false, false)
 
 bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
   TII = MF.getTarget().getInstrInfo();
   MRI = &MF.getRegInfo();
   MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+  PreRegAlloc = MRI->isSSA();
 
   bool MadeChange = false;
   while (TailDuplicateBlocks(MF))
@@ -432,7 +433,7 @@ void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI,
         MO.setReg(VI->second);
     }
   }
-  PredBB->insert(PredBB->end(), NewMI);
+  PredBB->insert(PredBB->instr_end(), NewMI);
 }
 
 /// UpdateSuccessorsPHIs - After FromBB is tail duplicated into its predecessor
@@ -553,7 +554,7 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
 
   bool HasIndirectbr = false;
   if (!TailBB.empty())
-    HasIndirectbr = TailBB.back().getDesc().isIndirectBranch();
+    HasIndirectbr = TailBB.back().isIndirectBranch();
 
   if (HasIndirectbr && PreRegAlloc)
     MaxDuplicateCount = 20;
@@ -561,22 +562,21 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
   // Check the instructions in the block to determine whether tail-duplication
   // is invalid or unlikely to be profitable.
   unsigned InstrCount = 0;
-  for (MachineBasicBlock::const_iterator I = TailBB.begin(); I != TailBB.end();
-       ++I) {
+  for (MachineBasicBlock::iterator I = TailBB.begin(); I != TailBB.end(); ++I) {
     // Non-duplicable things shouldn't be tail-duplicated.
-    if (I->getDesc().isNotDuplicable())
+    if (I->isNotDuplicable())
       return false;
 
     // Do not duplicate 'return' instructions if this is a pre-regalloc run.
     // A return may expand into a lot more instructions (e.g. reload of callee
     // saved registers) after PEI.
-    if (PreRegAlloc && I->getDesc().isReturn())
+    if (PreRegAlloc && I->isReturn())
       return false;
 
     // Avoid duplicating calls before register allocation. Calls presents a
     // barrier to register allocation so duplicating them may end up increasing
     // spills.
-    if (PreRegAlloc && I->getDesc().isCall())
+    if (PreRegAlloc && I->isCall())
       return false;
 
     if (!I->isPHI() && !I->isDebugValue())
@@ -611,7 +611,7 @@ TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) {
     ++I;
   if (I == E)
     return true;
-  return I->getDesc().isUnconditionalBranch();
+  return I->isUnconditionalBranch();
 }
 
 static bool
@@ -778,8 +778,10 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
     // Clone the contents of TailBB into PredBB.
     DenseMap<unsigned, unsigned> LocalVRMap;
     SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
-    MachineBasicBlock::iterator I = TailBB->begin();
-    while (I != TailBB->end()) {
+    // Use instr_iterator here to properly handle bundles, e.g.
+    // ARM Thumb2 IT block.
+    MachineBasicBlock::instr_iterator I = TailBB->instr_begin();
+    while (I != TailBB->instr_end()) {
       MachineInstr *MI = &*I;
       ++I;
       if (MI->isPHI()) {
@@ -824,7 +826,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
   SmallVector<MachineOperand, 4> PriorCond;
   // This has to check PrevBB->succ_size() because EH edges are ignored by
   // AnalyzeBranch.
-  if (PrevBB->succ_size() == 1 && 
+  if (PrevBB->succ_size() == 1 &&
       !TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) &&
       PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 &&
       !TailBB->hasAddressTaken()) {
@@ -849,6 +851,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
         // Replace def of virtual registers with new registers, and update
         // uses with PHI source register or the new registers.
         MachineInstr *MI = &*I++;
+        assert(!MI->isBundle() && "Not expecting bundles before regalloc!");
         DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi);
         MI->eraseFromParent();
       }
diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp
new file mode 100644
index 000000000000..cadb87815dbe
--- /dev/null
+++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -0,0 +1,45 @@
+//===----- TargetFrameLoweringImpl.cpp - Implement target frame interface --==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the layout of a stack frame on the target machine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include <cstdlib>
+using namespace llvm;
+
+TargetFrameLowering::~TargetFrameLowering() {
+}
+
+/// getFrameIndexOffset - Returns the displacement from the frame register to
+/// the stack frame of the specified index. This is the default implementation
+/// which is overridden for some targets.
+int TargetFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+                                         int FI) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  return MFI->getObjectOffset(FI) + MFI->getStackSize() -
+    getOffsetOfLocalArea() + MFI->getOffsetAdjustment();
+}
+
+int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF,
+                                             int FI, unsigned &FrameReg) const {
+  const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo();
+
+  // By default, assume all frame indices are referenced via whatever
+  // getFrameRegister() says. The target can override this if it's doing
+  // something different.
+  FrameReg = RI->getFrameRegister(MF);
+  return getFrameIndexOffset(MF, FI);
+}
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index f32678f12b0a..2beb9281e35b 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -24,6 +24,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -77,6 +78,9 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
   unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0;
   unsigned Reg1 = MI->getOperand(Idx1).getReg();
   unsigned Reg2 = MI->getOperand(Idx2).getReg();
+  unsigned SubReg0 = HasDef ? MI->getOperand(0).getSubReg() : 0;
+  unsigned SubReg1 = MI->getOperand(Idx1).getSubReg();
+  unsigned SubReg2 = MI->getOperand(Idx2).getSubReg();
   bool Reg1IsKill = MI->getOperand(Idx1).isKill();
   bool Reg2IsKill = MI->getOperand(Idx2).isKill();
   // If destination is tied to either of the commuted source register, then
@@ -85,10 +89,12 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
       MI->getDesc().getOperandConstraint(Idx1, MCOI::TIED_TO) == 0) {
     Reg2IsKill = false;
     Reg0 = Reg2;
+    SubReg0 = SubReg2;
   } else if (HasDef && Reg0 == Reg2 &&
              MI->getDesc().getOperandConstraint(Idx2, MCOI::TIED_TO) == 0) {
     Reg1IsKill = false;
     Reg0 = Reg1;
+    SubReg0 = SubReg1;
   }
 
   if (NewMI) {
@@ -97,19 +103,23 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
     MachineFunction &MF = *MI->getParent()->getParent();
     if (HasDef)
       return BuildMI(MF, MI->getDebugLoc(), MI->getDesc())
-        .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
-        .addReg(Reg2, getKillRegState(Reg2IsKill))
-        .addReg(Reg1, getKillRegState(Reg2IsKill));
+        .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead), SubReg0)
+        .addReg(Reg2, getKillRegState(Reg2IsKill), SubReg2)
+        .addReg(Reg1, getKillRegState(Reg1IsKill), SubReg1);
     else
       return BuildMI(MF, MI->getDebugLoc(), MI->getDesc())
-        .addReg(Reg2, getKillRegState(Reg2IsKill))
-        .addReg(Reg1, getKillRegState(Reg2IsKill));
+        .addReg(Reg2, getKillRegState(Reg2IsKill), SubReg2)
+        .addReg(Reg1, getKillRegState(Reg1IsKill), SubReg1);
   }
 
-  if (HasDef)
+  if (HasDef) {
     MI->getOperand(0).setReg(Reg0);
+    MI->getOperand(0).setSubReg(SubReg0);
+  }
   MI->getOperand(Idx2).setReg(Reg1);
   MI->getOperand(Idx1).setReg(Reg2);
+  MI->getOperand(Idx2).setSubReg(SubReg1);
+  MI->getOperand(Idx1).setSubReg(SubReg2);
   MI->getOperand(Idx2).setIsKill(Reg1IsKill);
   MI->getOperand(Idx1).setIsKill(Reg2IsKill);
   return MI;
@@ -121,6 +131,9 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
 bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI,
                                                 unsigned &SrcOpIdx1,
                                                 unsigned &SrcOpIdx2) const {
+  assert(!MI->isBundle() &&
+         "TargetInstrInfoImpl::findCommutedOpIndices() can't handle bundles");
+
   const MCInstrDesc &MCID = MI->getDesc();
   if (!MCID.isCommutable())
     return false;
@@ -136,11 +149,28 @@ bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI,
 }
 
 
+bool
+TargetInstrInfoImpl::isUnpredicatedTerminator(const MachineInstr *MI) const {
+  if (!MI->isTerminator()) return false;
+
+  // Conditional branch is a special case.
+  if (MI->isBranch() && !MI->isBarrier())
+    return true;
+  if (!MI->isPredicable())
+    return true;
+  return !isPredicated(MI);
+}
+
+
 bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI,
                             const SmallVectorImpl<MachineOperand> &Pred) const {
   bool MadeChange = false;
+
+  assert(!MI->isBundle() &&
+         "TargetInstrInfoImpl::PredicateInstruction() can't handle bundles");
+
   const MCInstrDesc &MCID = MI->getDesc();
-  if (!MCID.isPredicable())
+  if (!MI->isPredicable())
     return false;
 
   for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -218,7 +248,7 @@ TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0,
 
 MachineInstr *TargetInstrInfoImpl::duplicate(MachineInstr *Orig,
                                              MachineFunction &MF) const {
-  assert(!Orig->getDesc().isNotDuplicable() &&
+  assert(!Orig->isNotDuplicable() &&
          "Instruction cannot be duplicated");
   return MF.CloneMachineInstr(Orig);
 }
@@ -288,16 +318,15 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
   if (MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FI)) {
     // Add a memory operand, foldMemoryOperandImpl doesn't do that.
     assert((!(Flags & MachineMemOperand::MOStore) ||
-            NewMI->getDesc().mayStore()) &&
+            NewMI->mayStore()) &&
            "Folded a def to a non-store!");
     assert((!(Flags & MachineMemOperand::MOLoad) ||
-            NewMI->getDesc().mayLoad()) &&
+            NewMI->mayLoad()) &&
            "Folded a use to a non-load!");
     const MachineFrameInfo &MFI = *MF.getFrameInfo();
     assert(MFI.getObjectOffset(FI) != -1);
     MachineMemOperand *MMO =
-      MF.getMachineMemOperand(
-                    MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+      MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
                               Flags, MFI.getObjectSize(FI),
                               MFI.getObjectAlignment(FI));
     NewMI->addMemOperand(MF, MMO);
@@ -332,7 +361,7 @@ MachineInstr*
 TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
                                    const SmallVectorImpl<unsigned> &Ops,
                                    MachineInstr* LoadMI) const {
-  assert(LoadMI->getDesc().canFoldAsLoad() && "LoadMI isn't foldable!");
+  assert(LoadMI->canFoldAsLoad() && "LoadMI isn't foldable!");
 #ifndef NDEBUG
   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
     assert(MI->getOperand(Ops[i]).isUse() && "Folding load into def!");
@@ -360,7 +389,6 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
   const MachineRegisterInfo &MRI = MF.getRegInfo();
   const TargetMachine &TM = MF.getTarget();
   const TargetInstrInfo &TII = *TM.getInstrInfo();
-  const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
 
   // Remat clients assume operand 0 is the defined register.
   if (!MI->getNumOperands() || !MI->getOperand(0).isReg())
@@ -383,10 +411,8 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
       MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx))
     return true;
 
-  const MCInstrDesc &MCID = MI->getDesc();
-
   // Avoid instructions obviously unsafe for remat.
-  if (MCID.isNotDuplicable() || MCID.mayStore() ||
+  if (MI->isNotDuplicable() || MI->mayStore() ||
       MI->hasUnmodeledSideEffects())
     return false;
 
@@ -396,7 +422,7 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
     return false;
 
   // Avoid instructions which load from potentially varying memory.
-  if (MCID.mayLoad() && !MI->isInvariantLoad(AA))
+  if (MI->mayLoad() && !MI->isInvariantLoad(AA))
     return false;
 
   // If any of the registers accessed are non-constant, conservatively assume
@@ -414,19 +440,8 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
         // If the physreg has no defs anywhere, it's just an ambient register
         // and we can freely move its uses. Alternatively, if it's allocatable,
         // it could get allocated to something with a def during allocation.
-        if (!MRI.def_empty(Reg))
-          return false;
-        BitVector AllocatableRegs = TRI.getAllocatableSet(MF, 0);
-        if (AllocatableRegs.test(Reg))
+        if (!MRI.isConstantPhysReg(Reg, MF))
           return false;
-        // Check for a def among the register's aliases too.
-        for (const unsigned *Alias = TRI.getAliasSet(Reg); *Alias; ++Alias) {
-          unsigned AliasReg = *Alias;
-          if (!MRI.def_empty(AliasReg))
-            return false;
-          if (AllocatableRegs.test(AliasReg))
-            return false;
-        }
       } else {
         // A physreg def. We can't remat it.
         return false;
@@ -457,7 +472,7 @@ bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI,
                                                const MachineBasicBlock *MBB,
                                                const MachineFunction &MF) const{
   // Terminators and labels can't be scheduled around.
-  if (MI->getDesc().isTerminator() || MI->isLabel())
+  if (MI->isTerminator() || MI->isLabel())
     return true;
 
   // Don't attempt to schedule around any instruction that defines
@@ -493,3 +508,32 @@ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
   return (ScheduleHazardRecognizer *)
     new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched");
 }
+
+int
+TargetInstrInfoImpl::getOperandLatency(const InstrItineraryData *ItinData,
+                                       SDNode *DefNode, unsigned DefIdx,
+                                       SDNode *UseNode, unsigned UseIdx) const {
+  if (!ItinData || ItinData->isEmpty())
+    return -1;
+
+  if (!DefNode->isMachineOpcode())
+    return -1;
+
+  unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass();
+  if (!UseNode->isMachineOpcode())
+    return ItinData->getOperandCycle(DefClass, DefIdx);
+  unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass();
+  return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+}
+
+int TargetInstrInfoImpl::getInstrLatency(const InstrItineraryData *ItinData,
+                                         SDNode *N) const {
+  if (!ItinData || ItinData->isEmpty())
+    return 1;
+
+  if (!N->isMachineOpcode())
+    return 1;
+
+  return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass());
+}
+
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 3848f4d4d4c4..9925185be120 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -17,6 +17,7 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/GlobalVariable.h"
+#include "llvm/Module.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
@@ -53,11 +54,9 @@ TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV,
     report_fatal_error("We do not support this DWARF encoding yet!");
   case dwarf::DW_EH_PE_absptr:
     return  Mang->getSymbol(GV);
-    break;
   case dwarf::DW_EH_PE_pcrel: {
     return getContext().GetOrCreateSymbol(StringRef("DW.ref.") +
                                           Mang->getSymbol(GV)->getName());
-    break;
   }
   }
 }
@@ -78,14 +77,14 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
                                                     Flags,
                                                     SectionKind::getDataRel(),
                                                     0, Label->getName());
+  unsigned Size = TM.getTargetData()->getPointerSize();
   Streamer.SwitchSection(Sec);
-  Streamer.EmitValueToAlignment(8);
+  Streamer.EmitValueToAlignment(TM.getTargetData()->getPointerABIAlignment());
   Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject);
-  const MCExpr *E = MCConstantExpr::Create(8, getContext());
+  const MCExpr *E = MCConstantExpr::Create(Size, getContext());
   Streamer.EmitELFSize(Label, E);
   Streamer.EmitLabel(Label);
 
-  unsigned Size = TM.getTargetData()->getPointerSize();
   Streamer.EmitSymbolValue(Sym, Size);
 }
 
@@ -189,6 +188,7 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
 static const char *getSectionPrefixForGlobal(SectionKind Kind) {
   if (Kind.isText())                 return ".text.";
   if (Kind.isReadOnly())             return ".rodata.";
+  if (Kind.isBSS())                  return ".bss.";
 
   if (Kind.isThreadData())           return ".tdata.";
   if (Kind.isThreadBSS())            return ".tbss.";
@@ -217,7 +217,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
   // If this global is linkonce/weak and the target handles this by emitting it
   // into a 'uniqued' section name, create and return the section now.
   if ((GV->isWeakForLinker() || EmitUniquedSection) &&
-      !Kind.isCommon() && !Kind.isBSS()) {
+      !Kind.isCommon()) {
     const char *Prefix;
     Prefix = getSectionPrefixForGlobal(Kind);
 
@@ -342,10 +342,92 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
     getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
 }
 
+const MCSection *
+TargetLoweringObjectFileELF::getStaticCtorSection(unsigned Priority) const {
+  // The default scheme is .ctor / .dtor, so we have to invert the priority
+  // numbering.
+  if (Priority == 65535)
+    return StaticCtorSection;
+
+  std::string Name = std::string(".ctors.") + utostr(65535 - Priority);
+  return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
+                                    ELF::SHF_ALLOC |ELF::SHF_WRITE,
+                                    SectionKind::getDataRel());
+}
+
+const MCSection *
+TargetLoweringObjectFileELF::getStaticDtorSection(unsigned Priority) const {
+  // The default scheme is .ctor / .dtor, so we have to invert the priority
+  // numbering.
+  if (Priority == 65535)
+    return StaticDtorSection;
+
+  std::string Name = std::string(".dtors.") + utostr(65535 - Priority);
+  return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
+                                    ELF::SHF_ALLOC |ELF::SHF_WRITE,
+                                    SectionKind::getDataRel());
+}
+
 //===----------------------------------------------------------------------===//
 //                                 MachO
 //===----------------------------------------------------------------------===//
 
+/// emitModuleFlags - Emit the module flags that specify the garbage collection
+/// information.
+void TargetLoweringObjectFileMachO::
+emitModuleFlags(MCStreamer &Streamer,
+                ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
+                Mangler *Mang, const TargetMachine &TM) const {
+  unsigned VersionVal = 0;
+  unsigned GCFlags = 0;
+  StringRef SectionVal;
+
+  for (ArrayRef<Module::ModuleFlagEntry>::iterator
+         i = ModuleFlags.begin(), e = ModuleFlags.end(); i != e; ++i) {
+    const Module::ModuleFlagEntry &MFE = *i;
+
+    // Ignore flags with 'Require' behavior.
+    if (MFE.Behavior == Module::Require)
+      continue;
+
+    StringRef Key = MFE.Key->getString();
+    Value *Val = MFE.Val;
+
+    if (Key == "Objective-C Image Info Version")
+      VersionVal = cast<ConstantInt>(Val)->getZExtValue();
+    else if (Key == "Objective-C Garbage Collection" ||
+             Key == "Objective-C GC Only")
+      GCFlags |= cast<ConstantInt>(Val)->getZExtValue();
+    else if (Key == "Objective-C Image Info Section")
+      SectionVal = cast<MDString>(Val)->getString();
+  }
+
+  // The section is mandatory. If we don't have it, then we don't have GC info.
+  if (SectionVal.empty()) return;
+
+  StringRef Segment, Section;
+  unsigned TAA = 0, StubSize = 0;
+  bool TAAParsed;
+  std::string ErrorCode =
+    MCSectionMachO::ParseSectionSpecifier(SectionVal, Segment, Section,
+                                          TAA, TAAParsed, StubSize);
+  if (!ErrorCode.empty())
+    // If invalid, report the error with report_fatal_error.
+    report_fatal_error("Invalid section specifier '" + Section + "': " +
+                       ErrorCode + ".");
+
+  // Get the section.
+  const MCSectionMachO *S =
+    getContext().getMachOSection(Segment, Section, TAA, StubSize,
+                                 SectionKind::getDataNoRel());
+  Streamer.SwitchSection(S);
+  Streamer.EmitLabel(getContext().
+                     GetOrCreateSymbol(StringRef("L_OBJC_IMAGE_INFO")));
+  Streamer.EmitIntValue(VersionVal, 4);
+  Streamer.EmitIntValue(GCFlags, 4);
+  Streamer.AddBlankLine();
+}
+
 const MCSection *TargetLoweringObjectFileMachO::
 getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
                          Mangler *Mang, const TargetMachine &TM) const {
@@ -358,11 +440,9 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
                                           TAA, TAAParsed, StubSize);
   if (!ErrorCode.empty()) {
     // If invalid, report the error with report_fatal_error.
-    report_fatal_error("Global variable '" + GV->getNameStr() +
-                      "' has an invalid section specifier '" + GV->getSection()+
-                      "': " + ErrorCode + ".");
-    // Fall back to dropping it into the data section.
-    return DataSection;
+    report_fatal_error("Global variable '" + GV->getName() +
+                       "' has an invalid section specifier '" +
+                       GV->getSection() + "': " + ErrorCode + ".");
   }
 
   // Get the section.
@@ -379,9 +459,9 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
   // to reject it here.
   if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) {
     // If invalid, report the error with report_fatal_error.
-    report_fatal_error("Global variable '" + GV->getNameStr() +
-                      "' section type or attributes does not match previous"
-                      " section specifier");
+    report_fatal_error("Global variable '" + GV->getName() +
+                       "' section type or attributes does not match previous"
+                       " section specifier");
   }
 
   return S;
@@ -536,9 +616,7 @@ getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
   // Add information about the stub reference to MachOMMI so that the stub
   // gets emitted by the asmprinter.
   MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
-  MachineModuleInfoImpl::StubValueTy &StubSym =
-      GV->hasHiddenVisibility() ? MachOMMI.getHiddenGVStubEntry(SSym) :
-                                  MachOMMI.getGVStubEntry(SSym);
+  MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym);
   if (StubSym.getPointer() == 0) {
     MCSymbol *Sym = Mang->getSymbol(GV);
     StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
@@ -568,6 +646,11 @@ getCOFFSectionFlags(SectionKind K) {
       COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA |
       COFF::IMAGE_SCN_MEM_READ |
       COFF::IMAGE_SCN_MEM_WRITE;
+  else if (K.isThreadLocal())
+    Flags |=
+      COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+      COFF::IMAGE_SCN_MEM_READ |
+      COFF::IMAGE_SCN_MEM_WRITE;
   else if (K.isReadOnly())
     Flags |=
       COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
@@ -594,6 +677,8 @@ static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) {
     return ".text$";
   if (Kind.isBSS ())
     return ".bss$";
+  if (Kind.isThreadLocal())
+    return ".tls$";
   if (Kind.isWriteable())
     return ".data$";
   return ".rdata$";
@@ -603,7 +688,6 @@ static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) {
 const MCSection *TargetLoweringObjectFileCOFF::
 SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
                        Mangler *Mang, const TargetMachine &TM) const {
-  assert(!Kind.isThreadLocal() && "Doesn't support TLS");
 
   // If this global is linkonce/weak and the target handles this by emitting it
   // into a 'uniqued' section name, create and return the section now.
@@ -624,6 +708,9 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
   if (Kind.isText())
     return getTextSection();
 
+  if (Kind.isThreadLocal())
+    return getTLSDataSection();
+
   return getDataSection();
 }
 
diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp
new file mode 100644
index 000000000000..0f59d0169e18
--- /dev/null
+++ b/lib/CodeGen/TargetOptionsImpl.cpp
@@ -0,0 +1,52 @@
+//===-- TargetOptionsImpl.cpp - Options that apply to all targets ----------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the methods in the TargetOptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+/// DisableFramePointerElim - This returns true if frame pointer elimination
+/// optimization should be disabled for the given machine function.
+bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const {
+  // Check to see if we should eliminate non-leaf frame pointers and then
+  // check to see if we should eliminate all frame pointers.
+  if (NoFramePointerElimNonLeaf && !NoFramePointerElim) {
+    const MachineFrameInfo *MFI = MF.getFrameInfo();
+    return MFI->hasCalls();
+  }
+
+  return NoFramePointerElim;
+}
+
+/// LessPreciseFPMAD - This flag return true when -enable-fp-mad option
+/// is specified on the command line.  When this flag is off(default), the
+/// code generator is not allowed to generate mad (multiply add) if the
+/// result is "less precise" than doing those operations individually.
+bool TargetOptions::LessPreciseFPMAD() const {
+  return UnsafeFPMath || LessPreciseFPMADOption;
+}
+
+/// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
+/// that the rounding mode of the FPU can change from its default.
+bool TargetOptions::HonorSignDependentRoundingFPMath() const {
+  return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption;
+}
+
+/// getTrapFunctionName - If this returns a non-empty string, this means isel
+/// should lower Intrinsic::trap to a call to the specified function name
+/// instead of an ISD::TRAP node.
+StringRef TargetOptions::getTrapFunctionName() const {
+  return TrapFuncName;
+}
+
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index d87937822280..c30b1333bb2a 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -36,6 +36,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
@@ -56,14 +57,18 @@ STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address");
 STATISTIC(Num3AddrSunk,        "Number of 3-address instructions sunk");
 STATISTIC(NumReMats,           "Number of instructions re-materialized");
 STATISTIC(NumDeletes,          "Number of dead instructions deleted");
+STATISTIC(NumReSchedUps,       "Number of instructions re-scheduled up");
+STATISTIC(NumReSchedDowns,     "Number of instructions re-scheduled down");
 
 namespace {
   class TwoAddressInstructionPass : public MachineFunctionPass {
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
+    const InstrItineraryData *InstrItins;
     MachineRegisterInfo *MRI;
     LiveVariables *LV;
     AliasAnalysis *AA;
+    CodeGenOpt::Level OptLevel;
 
     // DistanceMap - Keep track the distance of a MI from the start of the
     // current basic block.
@@ -120,6 +125,18 @@ namespace {
                            MachineBasicBlock::iterator &nmi,
                            MachineFunction::iterator &mbbi, unsigned Dist);
 
+    bool isDefTooClose(unsigned Reg, unsigned Dist,
+                       MachineInstr *MI, MachineBasicBlock *MBB);
+
+    bool RescheduleMIBelowKill(MachineBasicBlock *MBB,
+                               MachineBasicBlock::iterator &mi,
+                               MachineBasicBlock::iterator &nmi,
+                               unsigned Reg);
+    bool RescheduleKillAboveMI(MachineBasicBlock *MBB,
+                               MachineBasicBlock::iterator &mi,
+                               MachineBasicBlock::iterator &nmi,
+                               unsigned Reg);
+
     bool TryInstructionTransform(MachineBasicBlock::iterator &mi,
                                  MachineBasicBlock::iterator &nmi,
                                  MachineFunction::iterator &mbbi,
@@ -152,7 +169,6 @@ namespace {
       AU.addPreserved<LiveVariables>();
       AU.addPreservedID(MachineLoopInfoID);
       AU.addPreservedID(MachineDominatorsID);
-      AU.addPreservedID(PHIEliminationID);
       MachineFunctionPass::getAnalysisUsage(AU);
     }
 
@@ -225,12 +241,12 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
   // appropriate location, we can try to sink the current instruction
   // past it.
   if (!KillMI || KillMI->getParent() != MBB || KillMI == MI ||
-      KillMI->getDesc().isTerminator())
+      KillMI->isTerminator())
     return false;
 
   // If any of the definitions are used by another instruction between the
   // position and the kill use, then it's not safe to sink it.
-  // 
+  //
   // FIXME: This can be sped up if there is an easy way to query whether an
   // instruction is before or after another instruction. Then we can use
   // MachineRegisterInfo def / use instead.
@@ -273,7 +289,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
   KillMO->setIsKill(false);
   KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI);
   KillMO->setIsKill(true);
-  
+
   if (LV)
     LV->replaceKillInstruction(SavedReg, KillMI, MI);
 
@@ -319,7 +335,7 @@ TwoAddressInstructionPass::isProfitableToReMat(unsigned Reg,
         continue;  // Current use.
       OtherUse = true;
       // There is at least one other use in the MBB that will clobber the
-      // register. 
+      // register.
       if (isTwoAddrUse(UseMI, Reg))
         return true;
     }
@@ -467,6 +483,32 @@ static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) {
   return false;
 }
 
+/// findLocalKill - Look for an instruction below MI in the MBB that kills the
+/// specified register. Returns null if there are any other Reg use between the
+/// instructions.
+static
+MachineInstr *findLocalKill(unsigned Reg, MachineBasicBlock *MBB,
+                            MachineInstr *MI, MachineRegisterInfo *MRI,
+                            DenseMap<MachineInstr*, unsigned> &DistanceMap) {
+  MachineInstr *KillMI = 0;
+  for (MachineRegisterInfo::use_nodbg_iterator
+         UI = MRI->use_nodbg_begin(Reg),
+         UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
+    MachineInstr *UseMI = &*UI;
+    if (UseMI == MI || UseMI->getParent() != MBB)
+      continue;
+    if (DistanceMap.count(UseMI))
+      continue;
+    if (!UI.getOperand().isKill())
+      return 0;
+    if (KillMI)
+      return 0;  // -O0 kill markers cannot be trusted?
+    KillMI = UseMI;
+  }
+
+  return KillMI;
+}
+
 /// findOnlyInterestingUse - Given a register, if has a single in-basic block
 /// use, return the use instruction if it's a copy or a two-address use.
 static
@@ -528,6 +570,9 @@ bool
 TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC,
                                        MachineInstr *MI, MachineBasicBlock *MBB,
                                        unsigned Dist) {
+  if (OptLevel == CodeGenOpt::None)
+    return false;
+
   // Determine if it's profitable to commute this two address instruction. In
   // general, we want no uses between this instruction and the definition of
   // the two-address register.
@@ -544,7 +589,7 @@ TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC,
   // %reg1029<def> = MOV8rr %reg1028
   // %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead>
   // insert => %reg1030<def> = MOV8rr %reg1029
-  // %reg1030<def> = ADD8rr %reg1029<kill>, %reg1028<kill>, %EFLAGS<imp-def,dead>  
+  // %reg1030<def> = ADD8rr %reg1029<kill>, %reg1028<kill>, %EFLAGS<imp-def,dead>
 
   if (!MI->killsRegister(regC))
     return false;
@@ -770,10 +815,9 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI,
 static bool isSafeToDelete(MachineInstr *MI,
                            const TargetInstrInfo *TII,
                            SmallVector<unsigned, 4> &Kills) {
-  const MCInstrDesc &MCID = MI->getDesc();
-  if (MCID.mayStore() || MCID.isCall())
+  if (MI->mayStore() || MI->isCall())
     return false;
-  if (MCID.isTerminator() || MI->hasUnmodeledSideEffects())
+  if (MI->isTerminator() || MI->hasUnmodeledSideEffects())
     return false;
 
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -852,28 +896,316 @@ TwoAddressInstructionPass::DeleteUnusedInstr(MachineBasicBlock::iterator &mi,
   return true;
 }
 
+/// RescheduleMIBelowKill - If there is one more local instruction that reads
+/// 'Reg' and it kills 'Reg, consider moving the instruction below the kill
+/// instruction in order to eliminate the need for the copy.
+bool
+TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB,
+                                     MachineBasicBlock::iterator &mi,
+                                     MachineBasicBlock::iterator &nmi,
+                                     unsigned Reg) {
+  MachineInstr *MI = &*mi;
+  DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
+  if (DI == DistanceMap.end())
+    // Must be created from unfolded load. Don't waste time trying this.
+    return false;
+
+  MachineInstr *KillMI = findLocalKill(Reg, MBB, mi, MRI, DistanceMap);
+  if (!KillMI || KillMI->isCopy() || KillMI->isCopyLike())
+    // Don't mess with copies, they may be coalesced later.
+    return false;
+
+  if (KillMI->hasUnmodeledSideEffects() || KillMI->isCall() ||
+      KillMI->isBranch() || KillMI->isTerminator())
+    // Don't move pass calls, etc.
+    return false;
+
+  unsigned DstReg;
+  if (isTwoAddrUse(*KillMI, Reg, DstReg))
+    return false;
+
+  bool SeenStore = true;
+  if (!MI->isSafeToMove(TII, AA, SeenStore))
+    return false;
+
+  if (TII->getInstrLatency(InstrItins, MI) > 1)
+    // FIXME: Needs more sophisticated heuristics.
+    return false;
+
+  SmallSet<unsigned, 2> Uses;
+  SmallSet<unsigned, 2> Kills;
+  SmallSet<unsigned, 2> Defs;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    unsigned MOReg = MO.getReg();
+    if (!MOReg)
+      continue;
+    if (MO.isDef())
+      Defs.insert(MOReg);
+    else {
+      Uses.insert(MOReg);
+      if (MO.isKill() && MOReg != Reg)
+        Kills.insert(MOReg);
+    }
+  }
+
+  // Move the copies connected to MI down as well.
+  MachineBasicBlock::iterator From = MI;
+  MachineBasicBlock::iterator To = llvm::next(From);
+  while (To->isCopy() && Defs.count(To->getOperand(1).getReg())) {
+    Defs.insert(To->getOperand(0).getReg());
+    ++To;
+  }
+
+  // Check if the reschedule will not break depedencies.
+  unsigned NumVisited = 0;
+  MachineBasicBlock::iterator KillPos = KillMI;
+  ++KillPos;
+  for (MachineBasicBlock::iterator I = To; I != KillPos; ++I) {
+    MachineInstr *OtherMI = I;
+    // DBG_VALUE cannot be counted against the limit.
+    if (OtherMI->isDebugValue())
+      continue;
+    if (NumVisited > 10)  // FIXME: Arbitrary limit to reduce compile time cost.
+      return false;
+    ++NumVisited;
+    if (OtherMI->hasUnmodeledSideEffects() || OtherMI->isCall() ||
+        OtherMI->isBranch() || OtherMI->isTerminator())
+      // Don't move pass calls, etc.
+      return false;
+    for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = OtherMI->getOperand(i);
+      if (!MO.isReg())
+        continue;
+      unsigned MOReg = MO.getReg();
+      if (!MOReg)
+        continue;
+      if (MO.isDef()) {
+        if (Uses.count(MOReg))
+          // Physical register use would be clobbered.
+          return false;
+        if (!MO.isDead() && Defs.count(MOReg))
+          // May clobber a physical register def.
+          // FIXME: This may be too conservative. It's ok if the instruction
+          // is sunken completely below the use.
+          return false;
+      } else {
+        if (Defs.count(MOReg))
+          return false;
+        if (MOReg != Reg &&
+            ((MO.isKill() && Uses.count(MOReg)) || Kills.count(MOReg)))
+          // Don't want to extend other live ranges and update kills.
+          return false;
+      }
+    }
+  }
+
+  // Move debug info as well.
+  while (From != MBB->begin() && llvm::prior(From)->isDebugValue())
+    --From;
+
+  // Copies following MI may have been moved as well.
+  nmi = To;
+  MBB->splice(KillPos, MBB, From, To);
+  DistanceMap.erase(DI);
+
+  if (LV) {
+    // Update live variables
+    LV->removeVirtualRegisterKilled(Reg, KillMI);
+    LV->addVirtualRegisterKilled(Reg, MI);
+  } else {
+    for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = KillMI->getOperand(i);
+      if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
+        continue;
+      MO.setIsKill(false);
+    }
+    MI->addRegisterKilled(Reg, 0);
+  }
+
+  return true;
+}
+
+/// isDefTooClose - Return true if the re-scheduling will put the given
+/// instruction too close to the defs of its register dependencies.
+bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist,
+                                              MachineInstr *MI,
+                                              MachineBasicBlock *MBB) {
+  for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg),
+         DE = MRI->def_end(); DI != DE; ++DI) {
+    MachineInstr *DefMI = &*DI;
+    if (DefMI->getParent() != MBB || DefMI->isCopy() || DefMI->isCopyLike())
+      continue;
+    if (DefMI == MI)
+      return true; // MI is defining something KillMI uses
+    DenseMap<MachineInstr*, unsigned>::iterator DDI = DistanceMap.find(DefMI);
+    if (DDI == DistanceMap.end())
+      return true;  // Below MI
+    unsigned DefDist = DDI->second;
+    assert(Dist > DefDist && "Visited def already?");
+    if (TII->getInstrLatency(InstrItins, DefMI) > (int)(Dist - DefDist))
+      return true;
+  }
+  return false;
+}
+
+/// RescheduleKillAboveMI - If there is one more local instruction that reads
+/// 'Reg' and it kills 'Reg, consider moving the kill instruction above the
+/// current two-address instruction in order to eliminate the need for the
+/// copy.
+bool
+TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB,
+                                     MachineBasicBlock::iterator &mi,
+                                     MachineBasicBlock::iterator &nmi,
+                                     unsigned Reg) {
+  MachineInstr *MI = &*mi;
+  DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
+  if (DI == DistanceMap.end())
+    // Must be created from unfolded load. Don't waste time trying this.
+    return false;
+
+  MachineInstr *KillMI = findLocalKill(Reg, MBB, mi, MRI, DistanceMap);
+  if (!KillMI || KillMI->isCopy() || KillMI->isCopyLike())
+    // Don't mess with copies, they may be coalesced later.
+    return false;
+
+  unsigned DstReg;
+  if (isTwoAddrUse(*KillMI, Reg, DstReg))
+    return false;
+
+  bool SeenStore = true;
+  if (!KillMI->isSafeToMove(TII, AA, SeenStore))
+    return false;
+
+  SmallSet<unsigned, 2> Uses;
+  SmallSet<unsigned, 2> Kills;
+  SmallSet<unsigned, 2> Defs;
+  SmallSet<unsigned, 2> LiveDefs;
+  for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = KillMI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    unsigned MOReg = MO.getReg();
+    if (MO.isUse()) {
+      if (!MOReg)
+        continue;
+      if (isDefTooClose(MOReg, DI->second, MI, MBB))
+        return false;
+      Uses.insert(MOReg);
+      if (MO.isKill() && MOReg != Reg)
+        Kills.insert(MOReg);
+    } else if (TargetRegisterInfo::isPhysicalRegister(MOReg)) {
+      Defs.insert(MOReg);
+      if (!MO.isDead())
+        LiveDefs.insert(MOReg);
+    }
+  }
+
+  // Check if the reschedule will not break depedencies.
+  unsigned NumVisited = 0;
+  MachineBasicBlock::iterator KillPos = KillMI;
+  for (MachineBasicBlock::iterator I = mi; I != KillPos; ++I) {
+    MachineInstr *OtherMI = I;
+    // DBG_VALUE cannot be counted against the limit.
+    if (OtherMI->isDebugValue())
+      continue;
+    if (NumVisited > 10)  // FIXME: Arbitrary limit to reduce compile time cost.
+      return false;
+    ++NumVisited;
+    if (OtherMI->hasUnmodeledSideEffects() || OtherMI->isCall() ||
+        OtherMI->isBranch() || OtherMI->isTerminator())
+      // Don't move pass calls, etc.
+      return false;
+    SmallVector<unsigned, 2> OtherDefs;
+    for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = OtherMI->getOperand(i);
+      if (!MO.isReg())
+        continue;
+      unsigned MOReg = MO.getReg();
+      if (!MOReg)
+        continue;
+      if (MO.isUse()) {
+        if (Defs.count(MOReg))
+          // Moving KillMI can clobber the physical register if the def has
+          // not been seen.
+          return false;
+        if (Kills.count(MOReg))
+          // Don't want to extend other live ranges and update kills.
+          return false;
+      } else {
+        OtherDefs.push_back(MOReg);
+      }
+    }
+
+    for (unsigned i = 0, e = OtherDefs.size(); i != e; ++i) {
+      unsigned MOReg = OtherDefs[i];
+      if (Uses.count(MOReg))
+        return false;
+      if (TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+          LiveDefs.count(MOReg))
+        return false;
+      // Physical register def is seen.
+      Defs.erase(MOReg);
+    }
+  }
+
+  // Move the old kill above MI, don't forget to move debug info as well.
+  MachineBasicBlock::iterator InsertPos = mi;
+  while (InsertPos != MBB->begin() && llvm::prior(InsertPos)->isDebugValue())
+    --InsertPos;
+  MachineBasicBlock::iterator From = KillMI;
+  MachineBasicBlock::iterator To = llvm::next(From);
+  while (llvm::prior(From)->isDebugValue())
+    --From;
+  MBB->splice(InsertPos, MBB, From, To);
+
+  nmi = llvm::prior(InsertPos); // Backtrack so we process the moved instr.
+  DistanceMap.erase(DI);
+
+  if (LV) {
+    // Update live variables
+    LV->removeVirtualRegisterKilled(Reg, KillMI);
+    LV->addVirtualRegisterKilled(Reg, MI);
+  } else {
+    for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = KillMI->getOperand(i);
+      if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
+        continue;
+      MO.setIsKill(false);
+    }
+    MI->addRegisterKilled(Reg, 0);
+  }
+  return true;
+}
+
 /// TryInstructionTransform - For the case where an instruction has a single
 /// pair of tied register operands, attempt some transformations that may
 /// either eliminate the tied operands or improve the opportunities for
-/// coalescing away the register copy.  Returns true if the tied operands
-/// are eliminated altogether.
+/// coalescing away the register copy.  Returns true if no copy needs to be
+/// inserted to untie mi's operands (either because they were untied, or
+/// because mi was rescheduled, and will be visited again later).
 bool TwoAddressInstructionPass::
 TryInstructionTransform(MachineBasicBlock::iterator &mi,
                         MachineBasicBlock::iterator &nmi,
                         MachineFunction::iterator &mbbi,
                         unsigned SrcIdx, unsigned DstIdx, unsigned Dist,
                         SmallPtrSet<MachineInstr*, 8> &Processed) {
-  const MCInstrDesc &MCID = mi->getDesc();
-  unsigned regA = mi->getOperand(DstIdx).getReg();
-  unsigned regB = mi->getOperand(SrcIdx).getReg();
+  if (OptLevel == CodeGenOpt::None)
+    return false;
+
+  MachineInstr &MI = *mi;
+  unsigned regA = MI.getOperand(DstIdx).getReg();
+  unsigned regB = MI.getOperand(SrcIdx).getReg();
 
   assert(TargetRegisterInfo::isVirtualRegister(regB) &&
          "cannot make instruction into two-address form");
 
   // If regA is dead and the instruction can be deleted, just delete
   // it so it doesn't clobber regB.
-  bool regBKilled = isKilled(*mi, regB, MRI, TII);
-  if (!regBKilled && mi->getOperand(DstIdx).isDead() &&
+  bool regBKilled = isKilled(MI, regB, MRI, TII);
+  if (!regBKilled && MI.getOperand(DstIdx).isDead() &&
       DeleteUnusedInstr(mi, nmi, mbbi, Dist)) {
     ++NumDeletes;
     return true; // Done with this instruction.
@@ -885,20 +1217,20 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
   unsigned regCIdx = ~0U;
   bool TryCommute = false;
   bool AggressiveCommute = false;
-  if (MCID.isCommutable() && mi->getNumOperands() >= 3 &&
-      TII->findCommutedOpIndices(mi, SrcOp1, SrcOp2)) {
+  if (MI.isCommutable() && MI.getNumOperands() >= 3 &&
+      TII->findCommutedOpIndices(&MI, SrcOp1, SrcOp2)) {
     if (SrcIdx == SrcOp1)
       regCIdx = SrcOp2;
     else if (SrcIdx == SrcOp2)
       regCIdx = SrcOp1;
 
     if (regCIdx != ~0U) {
-      regC = mi->getOperand(regCIdx).getReg();
-      if (!regBKilled && isKilled(*mi, regC, MRI, TII))
+      regC = MI.getOperand(regCIdx).getReg();
+      if (!regBKilled && isKilled(MI, regC, MRI, TII))
         // If C dies but B does not, swap the B and C operands.
         // This makes the live ranges of A and C joinable.
         TryCommute = true;
-      else if (isProfitableToCommute(regB, regC, mi, mbbi, Dist)) {
+      else if (isProfitableToCommute(regB, regC, &MI, mbbi, Dist)) {
         TryCommute = true;
         AggressiveCommute = true;
       }
@@ -913,10 +1245,17 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
     return false;
   }
 
+  // If there is one more use of regB later in the same MBB, consider
+  // re-schedule this MI below it.
+  if (RescheduleMIBelowKill(mbbi, mi, nmi, regB)) {
+    ++NumReSchedDowns;
+    return true;
+  }
+
   if (TargetRegisterInfo::isVirtualRegister(regA))
     ScanUses(regA, &*mbbi, Processed);
 
-  if (MCID.isConvertibleTo3Addr()) {
+  if (MI.isConvertibleTo3Addr()) {
     // This instruction is potentially convertible to a true
     // three-address instruction.  Check if it is profitable.
     if (!regBKilled || isProfitableToConv3Addr(regA, regB)) {
@@ -928,6 +1267,13 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
     }
   }
 
+  // If there is one more use of regB later in the same MBB, consider
+  // re-schedule it before this MI if it's legal.
+  if (RescheduleKillAboveMI(mbbi, mi, nmi, regB)) {
+    ++NumReSchedUps;
+    return true;
+  }
+
   // If this is an instruction with a load folded into it, try unfolding
   // the load, e.g. avoid this:
   //   movq %rdx, %rcx
@@ -936,11 +1282,11 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
   //   movq (%rax), %rcx
   //   addq %rdx, %rcx
   // because it's preferable to schedule a load than a register copy.
-  if (MCID.mayLoad() && !regBKilled) {
+  if (MI.mayLoad() && !regBKilled) {
     // Determine if a load can be unfolded.
     unsigned LoadRegIndex;
     unsigned NewOpc =
-      TII->getOpcodeAfterMemoryUnfold(mi->getOpcode(),
+      TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(),
                                       /*UnfoldLoad=*/true,
                                       /*UnfoldStore=*/false,
                                       &LoadRegIndex);
@@ -950,12 +1296,12 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
         MachineFunction &MF = *mbbi->getParent();
 
         // Unfold the load.
-        DEBUG(dbgs() << "2addr:   UNFOLDING: " << *mi);
+        DEBUG(dbgs() << "2addr:   UNFOLDING: " << MI);
         const TargetRegisterClass *RC =
           TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI);
         unsigned Reg = MRI->createVirtualRegister(RC);
         SmallVector<MachineInstr *, 2> NewMIs;
-        if (!TII->unfoldMemoryOperand(MF, mi, Reg,
+        if (!TII->unfoldMemoryOperand(MF, &MI, Reg,
                                       /*UnfoldLoad=*/true,/*UnfoldStore=*/false,
                                       NewMIs)) {
           DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
@@ -986,21 +1332,21 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
           // Success, or at least we made an improvement. Keep the unfolded
           // instructions and discard the original.
           if (LV) {
-            for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
-              MachineOperand &MO = mi->getOperand(i);
-              if (MO.isReg() && 
+            for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+              MachineOperand &MO = MI.getOperand(i);
+              if (MO.isReg() &&
                   TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
                 if (MO.isUse()) {
                   if (MO.isKill()) {
                     if (NewMIs[0]->killsRegister(MO.getReg()))
-                      LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[0]);
+                      LV->replaceKillInstruction(MO.getReg(), &MI, NewMIs[0]);
                     else {
                       assert(NewMIs[1]->killsRegister(MO.getReg()) &&
                              "Kill missing after load unfold!");
-                      LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[1]);
+                      LV->replaceKillInstruction(MO.getReg(), &MI, NewMIs[1]);
                     }
                   }
-                } else if (LV->removeVirtualRegisterDead(MO.getReg(), mi)) {
+                } else if (LV->removeVirtualRegisterDead(MO.getReg(), &MI)) {
                   if (NewMIs[1]->registerDefIsDead(MO.getReg()))
                     LV->addVirtualRegisterDead(MO.getReg(), NewMIs[1]);
                   else {
@@ -1013,7 +1359,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
             }
             LV->addVirtualRegisterKilled(Reg, NewMIs[1]);
           }
-          mi->eraseFromParent();
+          MI.eraseFromParent();
           mi = NewMIs[1];
           if (TransformSuccess)
             return true;
@@ -1035,18 +1381,19 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
 /// runOnMachineFunction - Reduce two-address instructions to two operands.
 ///
 bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
-  DEBUG(dbgs() << "Machine Function\n");
   const TargetMachine &TM = MF.getTarget();
   MRI = &MF.getRegInfo();
   TII = TM.getInstrInfo();
   TRI = TM.getRegisterInfo();
+  InstrItins = TM.getInstrItineraryData();
   LV = getAnalysisIfAvailable<LiveVariables>();
   AA = &getAnalysis<AliasAnalysis>();
+  OptLevel = TM.getOptLevel();
 
   bool MadeChange = false;
 
   DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n");
-  DEBUG(dbgs() << "********** Function: " 
+  DEBUG(dbgs() << "********** Function: "
         << MF.getFunction()->getName() << '\n');
 
   // This pass takes the function out of SSA form.
@@ -1177,7 +1524,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
           // If it's safe and profitable, remat the definition instead of
           // copying it.
           if (DefMI &&
-              DefMI->getDesc().isAsCheapAsAMove() &&
+              DefMI->isAsCheapAsAMove() &&
               DefMI->isSafeToReMat(TII, AA, regB) &&
               isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){
             DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n");
@@ -1248,19 +1595,19 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
         MadeChange = true;
 
         DEBUG(dbgs() << "\t\trewrite to:\t" << *mi);
-      }
 
-      // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form.
-      if (mi->isInsertSubreg()) {
-        // From %reg = INSERT_SUBREG %reg, %subreg, subidx
-        // To   %reg:subidx = COPY %subreg
-        unsigned SubIdx = mi->getOperand(3).getImm();
-        mi->RemoveOperand(3);
-        assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx");
-        mi->getOperand(0).setSubReg(SubIdx);
-        mi->RemoveOperand(1);
-        mi->setDesc(TII->get(TargetOpcode::COPY));
-        DEBUG(dbgs() << "\t\tconvert to:\t" << *mi);
+        // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form.
+        if (mi->isInsertSubreg()) {
+          // From %reg = INSERT_SUBREG %reg, %subreg, subidx
+          // To   %reg:subidx = COPY %subreg
+          unsigned SubIdx = mi->getOperand(3).getImm();
+          mi->RemoveOperand(3);
+          assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx");
+          mi->getOperand(0).setSubReg(SubIdx);
+          mi->RemoveOperand(1);
+          mi->setDesc(TII->get(TargetOpcode::COPY));
+          DEBUG(dbgs() << "\t\tconvert to:\t" << *mi);
+        }
       }
 
       // Clear TiedOperands here instead of at the top of the loop
@@ -1298,6 +1645,36 @@ static void UpdateRegSequenceSrcs(unsigned SrcReg,
   }
 }
 
+// Find the first def of Reg, assuming they are all in the same basic block.
+static MachineInstr *findFirstDef(unsigned Reg, MachineRegisterInfo *MRI) {
+  SmallPtrSet<MachineInstr*, 8> Defs;
+  MachineInstr *First = 0;
+  for (MachineRegisterInfo::def_iterator RI = MRI->def_begin(Reg);
+       MachineInstr *MI = RI.skipInstruction(); Defs.insert(MI))
+    First = MI;
+  if (!First)
+    return 0;
+
+  MachineBasicBlock *MBB = First->getParent();
+  MachineBasicBlock::iterator A = First, B = First;
+  bool Moving;
+  do {
+    Moving = false;
+    if (A != MBB->begin()) {
+      Moving = true;
+      --A;
+      if (Defs.erase(A)) First = A;
+    }
+    if (B != MBB->end()) {
+      Defs.erase(B);
+      ++B;
+      Moving = true;
+    }
+  } while (Moving && !Defs.empty());
+  assert(Defs.empty() && "Instructions outside basic block!");
+  return First;
+}
+
 /// CoalesceExtSubRegs - If a number of sources of the REG_SEQUENCE are
 /// EXTRACT_SUBREG from the same register and to the same virtual register
 /// with different sub-register indices, attempt to combine the
@@ -1380,8 +1757,10 @@ TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs,
         CanCoalesce = false;
         break;
       }
-      // Keep track of one of the uses.
-      SomeMI = UseMI;
+      // Keep track of one of the uses.  Preferably the first one which has a
+      // <def,undef> flag.
+      if (!SomeMI || UseMI->getOperand(0).isUndef())
+        SomeMI = UseMI;
     }
     if (!CanCoalesce)
       continue;
@@ -1390,7 +1769,9 @@ TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs,
     MachineInstr *CopyMI = BuildMI(*SomeMI->getParent(), SomeMI,
                                    SomeMI->getDebugLoc(),
                                    TII->get(TargetOpcode::COPY))
-      .addReg(DstReg, RegState::Define, NewDstSubIdx)
+      .addReg(DstReg, RegState::Define |
+                      getUndefRegState(SomeMI->getOperand(0).isUndef()),
+              NewDstSubIdx)
       .addReg(SrcReg, 0, NewSrcSubIdx);
 
     // Remove all the old extract instructions.
@@ -1452,26 +1833,30 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
     SmallSet<unsigned, 4> Seen;
     for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
       unsigned SrcReg = MI->getOperand(i).getReg();
+      unsigned SrcSubIdx = MI->getOperand(i).getSubReg();
       unsigned SubIdx = MI->getOperand(i+1).getImm();
-      if (MI->getOperand(i).getSubReg() ||
-          TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
-        DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI);
-        llvm_unreachable(0);
+      // DefMI of NULL means the value does not have a vreg in this block
+      // i.e., its a physical register or a subreg.
+      // In either case we force a copy to be generated.
+      MachineInstr *DefMI = NULL;
+      if (!MI->getOperand(i).getSubReg() &&
+          !TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
+        DefMI = MRI->getVRegDef(SrcReg);
       }
 
-      MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
-      if (DefMI->isImplicitDef()) {
+      if (DefMI && DefMI->isImplicitDef()) {
         DefMI->eraseFromParent();
         continue;
       }
       IsImpDef = false;
 
       // Remember COPY sources. These might be candidate for coalescing.
-      if (DefMI->isCopy() && DefMI->getOperand(1).getSubReg())
+      if (DefMI && DefMI->isCopy() && DefMI->getOperand(1).getSubReg())
         RealSrcs.push_back(DefMI->getOperand(1).getReg());
 
       bool isKill = MI->getOperand(i).isKill();
-      if (!Seen.insert(SrcReg) || MI->getParent() != DefMI->getParent() ||
+      if (!DefMI || !Seen.insert(SrcReg) ||
+          MI->getParent() != DefMI->getParent() ||
           !isKill || HasOtherRegSequenceUses(SrcReg, MI, MRI) ||
           !TRI->getMatchingSuperRegClass(MRI->getRegClass(DstReg),
                                          MRI->getRegClass(SrcReg), SubIdx)) {
@@ -1504,9 +1889,9 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
         MachineInstr *CopyMI = BuildMI(*MI->getParent(), InsertLoc,
                                 MI->getDebugLoc(), TII->get(TargetOpcode::COPY))
             .addReg(DstReg, RegState::Define, SubIdx)
-            .addReg(SrcReg, getKillRegState(isKill));
+            .addReg(SrcReg, getKillRegState(isKill), SrcSubIdx);
         MI->getOperand(i).setReg(0);
-        if (LV && isKill)
+        if (LV && isKill && !TargetRegisterInfo::isPhysicalRegister(SrcReg))
           LV->replaceKillInstruction(SrcReg, MI, CopyMI);
         DEBUG(dbgs() << "Inserted: " << *CopyMI);
       }
@@ -1519,11 +1904,27 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
       UpdateRegSequenceSrcs(SrcReg, DstReg, SubIdx, MRI, *TRI);
     }
 
+    // Set <def,undef> flags on the first DstReg def in the basic block.
+    // It marks the beginning of the live range. All the other defs are
+    // read-modify-write.
+    if (MachineInstr *Def = findFirstDef(DstReg, MRI)) {
+      for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
+        MachineOperand &MO = Def->getOperand(i);
+        if (MO.isReg() && MO.isDef() && MO.getReg() == DstReg)
+          MO.setIsUndef();
+      }
+      // Make sure there is a full non-subreg imp-def operand on the
+      // instruction.  This shouldn't be necessary, but it seems that at least
+      // RAFast requires it.
+      Def->addRegisterDefined(DstReg, TRI);
+      DEBUG(dbgs() << "First def: " << *Def);
+    }
+
     if (IsImpDef) {
       DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF");
       MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
       for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
-        MI->RemoveOperand(j);      
+        MI->RemoveOperand(j);
     } else {
       DEBUG(dbgs() << "Eliminated: " << *MI);
       MI->eraseFromParent();
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index 8a1cdc01c494..3bab93bdc098 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -16,10 +16,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "virtregmap"
+#define DEBUG_TYPE "regalloc"
 #include "VirtRegMap.h"
 #include "llvm/Function.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -32,12 +31,8 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallSet.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -58,34 +53,11 @@ bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
   TRI = mf.getTarget().getRegisterInfo();
   MF = &mf;
 
-  ReMatId = MAX_STACK_SLOT+1;
-  LowSpillSlot = HighSpillSlot = NO_STACK_SLOT;
-  
   Virt2PhysMap.clear();
   Virt2StackSlotMap.clear();
-  Virt2ReMatIdMap.clear();
   Virt2SplitMap.clear();
-  Virt2SplitKillMap.clear();
-  ReMatMap.clear();
-  ImplicitDefed.clear();
-  SpillSlotToUsesMap.clear();
-  MI2VirtMap.clear();
-  SpillPt2VirtMap.clear();
-  RestorePt2VirtMap.clear();
-  EmergencySpillMap.clear();
-  EmergencySpillSlots.clear();
-  
-  SpillSlotToUsesMap.resize(8);
-  ImplicitDefed.resize(MF->getRegInfo().getNumVirtRegs());
-
-  allocatableRCRegs.clear();
-  for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
-         E = TRI->regclass_end(); I != E; ++I)
-    allocatableRCRegs.insert(std::make_pair(*I,
-                                            TRI->getAllocatableSet(mf, *I)));
 
   grow();
-  
   return false;
 }
 
@@ -93,24 +65,12 @@ void VirtRegMap::grow() {
   unsigned NumRegs = MF->getRegInfo().getNumVirtRegs();
   Virt2PhysMap.resize(NumRegs);
   Virt2StackSlotMap.resize(NumRegs);
-  Virt2ReMatIdMap.resize(NumRegs);
   Virt2SplitMap.resize(NumRegs);
-  Virt2SplitKillMap.resize(NumRegs);
-  ReMatMap.resize(NumRegs);
-  ImplicitDefed.resize(NumRegs);
 }
 
 unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) {
   int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
                                                       RC->getAlignment());
-  if (LowSpillSlot == NO_STACK_SLOT)
-    LowSpillSlot = SS;
-  if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot)
-    HighSpillSlot = SS;
-  assert(SS >= LowSpillSlot && "Unexpected low spill slot");
-  unsigned Idx = SS-LowSpillSlot;
-  while (Idx >= SpillSlotToUsesMap.size())
-    SpillSlotToUsesMap.resize(SpillSlotToUsesMap.size()*2);
   ++NumSpillSlots;
   return SS;
 }
@@ -144,118 +104,6 @@ void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) {
   Virt2StackSlotMap[virtReg] = SS;
 }
 
-int VirtRegMap::assignVirtReMatId(unsigned virtReg) {
-  assert(TargetRegisterInfo::isVirtualRegister(virtReg));
-  assert(Virt2ReMatIdMap[virtReg] == NO_STACK_SLOT &&
-         "attempt to assign re-mat id to already spilled register");
-  Virt2ReMatIdMap[virtReg] = ReMatId;
-  return ReMatId++;
-}
-
-void VirtRegMap::assignVirtReMatId(unsigned virtReg, int id) {
-  assert(TargetRegisterInfo::isVirtualRegister(virtReg));
-  assert(Virt2ReMatIdMap[virtReg] == NO_STACK_SLOT &&
-         "attempt to assign re-mat id to already spilled register");
-  Virt2ReMatIdMap[virtReg] = id;
-}
-
-int VirtRegMap::getEmergencySpillSlot(const TargetRegisterClass *RC) {
-  std::map<const TargetRegisterClass*, int>::iterator I =
-    EmergencySpillSlots.find(RC);
-  if (I != EmergencySpillSlots.end())
-    return I->second;
-  return EmergencySpillSlots[RC] = createSpillSlot(RC);
-}
-
-void VirtRegMap::addSpillSlotUse(int FI, MachineInstr *MI) {
-  if (!MF->getFrameInfo()->isFixedObjectIndex(FI)) {
-    // If FI < LowSpillSlot, this stack reference was produced by
-    // instruction selection and is not a spill
-    if (FI >= LowSpillSlot) {
-      assert(FI >= 0 && "Spill slot index should not be negative!");
-      assert((unsigned)FI-LowSpillSlot < SpillSlotToUsesMap.size()
-             && "Invalid spill slot");
-      SpillSlotToUsesMap[FI-LowSpillSlot].insert(MI);
-    }
-  }
-}
-
-void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *OldMI,
-                            MachineInstr *NewMI, ModRef MRInfo) {
-  // Move previous memory references folded to new instruction.
-  MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(NewMI);
-  for (MI2VirtMapTy::iterator I = MI2VirtMap.lower_bound(OldMI),
-         E = MI2VirtMap.end(); I != E && I->first == OldMI; ) {
-    MI2VirtMap.insert(IP, std::make_pair(NewMI, I->second));
-    MI2VirtMap.erase(I++);
-  }
-
-  // add new memory reference
-  MI2VirtMap.insert(IP, std::make_pair(NewMI, std::make_pair(VirtReg, MRInfo)));
-}
-
-void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *MI, ModRef MRInfo) {
-  MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(MI);
-  MI2VirtMap.insert(IP, std::make_pair(MI, std::make_pair(VirtReg, MRInfo)));
-}
-
-void VirtRegMap::RemoveMachineInstrFromMaps(MachineInstr *MI) {
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = MI->getOperand(i);
-    if (!MO.isFI())
-      continue;
-    int FI = MO.getIndex();
-    if (MF->getFrameInfo()->isFixedObjectIndex(FI))
-      continue;
-    // This stack reference was produced by instruction selection and
-    // is not a spill
-    if (FI < LowSpillSlot)
-      continue;
-    assert((unsigned)FI-LowSpillSlot < SpillSlotToUsesMap.size()
-           && "Invalid spill slot");
-    SpillSlotToUsesMap[FI-LowSpillSlot].erase(MI);
-  }
-  MI2VirtMap.erase(MI);
-  SpillPt2VirtMap.erase(MI);
-  RestorePt2VirtMap.erase(MI);
-  EmergencySpillMap.erase(MI);
-}
-
-/// FindUnusedRegisters - Gather a list of allocatable registers that
-/// have not been allocated to any virtual register.
-bool VirtRegMap::FindUnusedRegisters(LiveIntervals* LIs) {
-  unsigned NumRegs = TRI->getNumRegs();
-  UnusedRegs.reset();
-  UnusedRegs.resize(NumRegs);
-
-  BitVector Used(NumRegs);
-  for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
-    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
-    if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG)
-      Used.set(Virt2PhysMap[Reg]);
-  }
-
-  BitVector Allocatable = TRI->getAllocatableSet(*MF);
-  bool AnyUnused = false;
-  for (unsigned Reg = 1; Reg < NumRegs; ++Reg) {
-    if (Allocatable[Reg] && !Used[Reg] && !LIs->hasInterval(Reg)) {
-      bool ReallyUnused = true;
-      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
-        if (Used[*AS] || LIs->hasInterval(*AS)) {
-          ReallyUnused = false;
-          break;
-        }
-      }
-      if (ReallyUnused) {
-        AnyUnused = true;
-        UnusedRegs.set(Reg);
-      }
-    }
-  }
-
-  return AnyUnused;
-}
-
 void VirtRegMap::rewrite(SlotIndexes *Indexes) {
   DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n"
                << "********** Function: "
@@ -264,23 +112,32 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) {
   SmallVector<unsigned, 8> SuperDeads;
   SmallVector<unsigned, 8> SuperDefs;
   SmallVector<unsigned, 8> SuperKills;
+#ifndef NDEBUG
+  BitVector Reserved = TRI->getReservedRegs(*MF);
+#endif
 
   for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
        MBBI != MBBE; ++MBBI) {
     DEBUG(MBBI->print(dbgs(), Indexes));
-    for (MachineBasicBlock::iterator MII = MBBI->begin(), MIE = MBBI->end();
-         MII != MIE;) {
+    for (MachineBasicBlock::instr_iterator
+           MII = MBBI->instr_begin(), MIE = MBBI->instr_end(); MII != MIE;) {
       MachineInstr *MI = MII;
       ++MII;
 
       for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
            MOE = MI->operands_end(); MOI != MOE; ++MOI) {
         MachineOperand &MO = *MOI;
+
+        // Make sure MRI knows about registers clobbered by regmasks.
+        if (MO.isRegMask())
+          MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
+
         if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
           continue;
         unsigned VirtReg = MO.getReg();
         unsigned PhysReg = getPhys(VirtReg);
         assert(PhysReg != NO_PHYS_REG && "Instruction uses unmapped VirtReg");
+        assert(!Reserved.test(PhysReg) && "Reserved register assignment");
 
         // Preserve semantics of sub-register operands.
         if (MO.getSubReg()) {
@@ -332,7 +189,6 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) {
         ++NumIdCopies;
         if (MI->getNumOperands() == 2) {
           DEBUG(dbgs() << "Deleting identity copy.\n");
-          RemoveMachineInstrFromMaps(MI);
           if (Indexes)
             Indexes->removeMachineInstrFromMaps(MI);
           // It's safe to erase MI because MII has already been incremented.
diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h
index 03abff356934..8cac31137e3d 100644
--- a/lib/CodeGen/VirtRegMap.h
+++ b/lib/CodeGen/VirtRegMap.h
@@ -18,22 +18,14 @@
 #define LLVM_CODEGEN_VIRTREGMAP_H
 
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/LiveInterval.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/IndexedMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include <map>
 
 namespace llvm {
-  class LiveIntervals;
   class MachineInstr;
   class MachineFunction;
   class MachineRegisterInfo;
   class TargetInstrInfo;
-  class TargetRegisterInfo;
   class raw_ostream;
   class SlotIndexes;
 
@@ -45,18 +37,12 @@ namespace llvm {
       MAX_STACK_SLOT = (1L << 18)-1
     };
 
-    enum ModRef { isRef = 1, isMod = 2, isModRef = 3 };
-    typedef std::multimap<MachineInstr*,
-                          std::pair<unsigned, ModRef> > MI2VirtMapTy;
-
   private:
     MachineRegisterInfo *MRI;
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
     MachineFunction *MF;
 
-    DenseMap<const TargetRegisterClass*, BitVector> allocatableRCRegs;
-
     /// Virt2PhysMap - This is a virtual to physical register
     /// mapping. Each virtual register is required to have an entry in
     /// it; even spilled virtual registers (the register mapped to a
@@ -70,71 +56,10 @@ namespace llvm {
     /// at.
     IndexedMap<int, VirtReg2IndexFunctor> Virt2StackSlotMap;
 
-    /// Virt2ReMatIdMap - This is virtual register to rematerialization id
-    /// mapping. Each spilled virtual register that should be remat'd has an
-    /// entry in it which corresponds to the remat id.
-    IndexedMap<int, VirtReg2IndexFunctor> Virt2ReMatIdMap;
-
     /// Virt2SplitMap - This is virtual register to splitted virtual register
     /// mapping.
     IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2SplitMap;
 
-    /// Virt2SplitKillMap - This is splitted virtual register to its last use
-    /// (kill) index mapping.
-    IndexedMap<SlotIndex, VirtReg2IndexFunctor> Virt2SplitKillMap;
-
-    /// ReMatMap - This is virtual register to re-materialized instruction
-    /// mapping. Each virtual register whose definition is going to be
-    /// re-materialized has an entry in it.
-    IndexedMap<MachineInstr*, VirtReg2IndexFunctor> ReMatMap;
-
-    /// MI2VirtMap - This is MachineInstr to virtual register
-    /// mapping. In the case of memory spill code being folded into
-    /// instructions, we need to know which virtual register was
-    /// read/written by this instruction.
-    MI2VirtMapTy MI2VirtMap;
-
-    /// SpillPt2VirtMap - This records the virtual registers which should
-    /// be spilled right after the MachineInstr due to live interval
-    /// splitting.
-    std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >
-    SpillPt2VirtMap;
-
-    /// RestorePt2VirtMap - This records the virtual registers which should
-    /// be restored right before the MachineInstr due to live interval
-    /// splitting.
-    std::map<MachineInstr*, std::vector<unsigned> > RestorePt2VirtMap;
-
-    /// EmergencySpillMap - This records the physical registers that should
-    /// be spilled / restored around the MachineInstr since the register
-    /// allocator has run out of registers.
-    std::map<MachineInstr*, std::vector<unsigned> > EmergencySpillMap;
-
-    /// EmergencySpillSlots - This records emergency spill slots used to
-    /// spill physical registers when the register allocator runs out of
-    /// registers. Ideally only one stack slot is used per function per
-    /// register class.
-    std::map<const TargetRegisterClass*, int> EmergencySpillSlots;
-
-    /// ReMatId - Instead of assigning a stack slot to a to be rematerialized
-    /// virtual register, an unique id is being assigned. This keeps track of
-    /// the highest id used so far. Note, this starts at (1<<18) to avoid
-    /// conflicts with stack slot numbers.
-    int ReMatId;
-
-    /// LowSpillSlot, HighSpillSlot - Lowest and highest spill slot indexes.
-    int LowSpillSlot, HighSpillSlot;
-
-    /// SpillSlotToUsesMap - Records uses for each register spill slot.
-    SmallVector<SmallPtrSet<MachineInstr*, 4>, 8> SpillSlotToUsesMap;
-
-    /// ImplicitDefed - One bit for each virtual register. If set it indicates
-    /// the register is implicitly defined.
-    BitVector ImplicitDefed;
-
-    /// UnusedRegs - A list of physical registers that have not been used.
-    BitVector UnusedRegs;
-
     /// createSpillSlot - Allocate a spill slot for RC from MFI.
     unsigned createSpillSlot(const TargetRegisterClass *RC);
 
@@ -144,11 +69,7 @@ namespace llvm {
   public:
     static char ID;
     VirtRegMap() : MachineFunctionPass(ID), Virt2PhysMap(NO_PHYS_REG),
-                   Virt2StackSlotMap(NO_STACK_SLOT), 
-                   Virt2ReMatIdMap(NO_STACK_SLOT), Virt2SplitMap(0),
-                   Virt2SplitKillMap(SlotIndex()), ReMatMap(NULL),
-                   ReMatId(MAX_STACK_SLOT+1),
-                   LowSpillSlot(NO_STACK_SLOT), HighSpillSlot(NO_STACK_SLOT) { }
+                   Virt2StackSlotMap(NO_STACK_SLOT), Virt2SplitMap(0) { }
     virtual bool runOnMachineFunction(MachineFunction &MF);
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -235,8 +156,7 @@ namespace llvm {
     /// @brief returns true if the specified virtual register is not
     /// mapped to a stack slot or rematerialized.
     bool isAssignedReg(unsigned virtReg) const {
-      if (getStackSlot(virtReg) == NO_STACK_SLOT &&
-          getReMatId(virtReg) == NO_STACK_SLOT)
+      if (getStackSlot(virtReg) == NO_STACK_SLOT)
         return true;
       // Split register can be assigned a physical register as well as a
       // stack slot or remat id.
@@ -250,13 +170,6 @@ namespace llvm {
       return Virt2StackSlotMap[virtReg];
     }
 
-    /// @brief returns the rematerialization id mapped to the specified virtual
-    /// register
-    int getReMatId(unsigned virtReg) const {
-      assert(TargetRegisterInfo::isVirtualRegister(virtReg));
-      return Virt2ReMatIdMap[virtReg];
-    }
-
     /// @brief create a mapping for the specifed virtual register to
     /// the next available stack slot
     int assignVirt2StackSlot(unsigned virtReg);
@@ -264,250 +177,6 @@ namespace llvm {
     /// the specified stack slot
     void assignVirt2StackSlot(unsigned virtReg, int frameIndex);
 
-    /// @brief assign an unique re-materialization id to the specified
-    /// virtual register.
-    int assignVirtReMatId(unsigned virtReg);
-    /// @brief assign an unique re-materialization id to the specified
-    /// virtual register.
-    void assignVirtReMatId(unsigned virtReg, int id);
-
-    /// @brief returns true if the specified virtual register is being
-    /// re-materialized.
-    bool isReMaterialized(unsigned virtReg) const {
-      return ReMatMap[virtReg] != NULL;
-    }
-
-    /// @brief returns the original machine instruction being re-issued
-    /// to re-materialize the specified virtual register.
-    MachineInstr *getReMaterializedMI(unsigned virtReg) const {
-      return ReMatMap[virtReg];
-    }
-
-    /// @brief records the specified virtual register will be
-    /// re-materialized and the original instruction which will be re-issed
-    /// for this purpose.  If parameter all is true, then all uses of the
-    /// registers are rematerialized and it's safe to delete the definition.
-    void setVirtIsReMaterialized(unsigned virtReg, MachineInstr *def) {
-      ReMatMap[virtReg] = def;
-    }
-
-    /// @brief record the last use (kill) of a split virtual register.
-    void addKillPoint(unsigned virtReg, SlotIndex index) {
-      Virt2SplitKillMap[virtReg] = index;
-    }
-
-    SlotIndex getKillPoint(unsigned virtReg) const {
-      return Virt2SplitKillMap[virtReg];
-    }
-
-    /// @brief remove the last use (kill) of a split virtual register.
-    void removeKillPoint(unsigned virtReg) {
-      Virt2SplitKillMap[virtReg] = SlotIndex();
-    }
-
-    /// @brief returns true if the specified MachineInstr is a spill point.
-    bool isSpillPt(MachineInstr *Pt) const {
-      return SpillPt2VirtMap.find(Pt) != SpillPt2VirtMap.end();
-    }
-
-    /// @brief returns the virtual registers that should be spilled due to
-    /// splitting right after the specified MachineInstr.
-    std::vector<std::pair<unsigned,bool> > &getSpillPtSpills(MachineInstr *Pt) {
-      return SpillPt2VirtMap[Pt];
-    }
-
-    /// @brief records the specified MachineInstr as a spill point for virtReg.
-    void addSpillPoint(unsigned virtReg, bool isKill, MachineInstr *Pt) {
-      std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >::iterator
-        I = SpillPt2VirtMap.find(Pt);
-      if (I != SpillPt2VirtMap.end())
-        I->second.push_back(std::make_pair(virtReg, isKill));
-      else {
-        std::vector<std::pair<unsigned,bool> > Virts;
-        Virts.push_back(std::make_pair(virtReg, isKill));
-        SpillPt2VirtMap.insert(std::make_pair(Pt, Virts));
-      }
-    }
-
-    /// @brief - transfer spill point information from one instruction to
-    /// another.
-    void transferSpillPts(MachineInstr *Old, MachineInstr *New) {
-      std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >::iterator
-        I = SpillPt2VirtMap.find(Old);
-      if (I == SpillPt2VirtMap.end())
-        return;
-      while (!I->second.empty()) {
-        unsigned virtReg = I->second.back().first;
-        bool isKill = I->second.back().second;
-        I->second.pop_back();
-        addSpillPoint(virtReg, isKill, New);
-      }
-      SpillPt2VirtMap.erase(I);
-    }
-
-    /// @brief returns true if the specified MachineInstr is a restore point.
-    bool isRestorePt(MachineInstr *Pt) const {
-      return RestorePt2VirtMap.find(Pt) != RestorePt2VirtMap.end();
-    }
-
-    /// @brief returns the virtual registers that should be restoreed due to
-    /// splitting right after the specified MachineInstr.
-    std::vector<unsigned> &getRestorePtRestores(MachineInstr *Pt) {
-      return RestorePt2VirtMap[Pt];
-    }
-
-    /// @brief records the specified MachineInstr as a restore point for virtReg.
-    void addRestorePoint(unsigned virtReg, MachineInstr *Pt) {
-      std::map<MachineInstr*, std::vector<unsigned> >::iterator I =
-        RestorePt2VirtMap.find(Pt);
-      if (I != RestorePt2VirtMap.end())
-        I->second.push_back(virtReg);
-      else {
-        std::vector<unsigned> Virts;
-        Virts.push_back(virtReg);
-        RestorePt2VirtMap.insert(std::make_pair(Pt, Virts));
-      }
-    }
-
-    /// @brief - transfer restore point information from one instruction to
-    /// another.
-    void transferRestorePts(MachineInstr *Old, MachineInstr *New) {
-      std::map<MachineInstr*, std::vector<unsigned> >::iterator I =
-        RestorePt2VirtMap.find(Old);
-      if (I == RestorePt2VirtMap.end())
-        return;
-      while (!I->second.empty()) {
-        unsigned virtReg = I->second.back();
-        I->second.pop_back();
-        addRestorePoint(virtReg, New);
-      }
-      RestorePt2VirtMap.erase(I);
-    }
-
-    /// @brief records that the specified physical register must be spilled
-    /// around the specified machine instr.
-    void addEmergencySpill(unsigned PhysReg, MachineInstr *MI) {
-      if (EmergencySpillMap.find(MI) != EmergencySpillMap.end())
-        EmergencySpillMap[MI].push_back(PhysReg);
-      else {
-        std::vector<unsigned> PhysRegs;
-        PhysRegs.push_back(PhysReg);
-        EmergencySpillMap.insert(std::make_pair(MI, PhysRegs));
-      }
-    }
-
-    /// @brief returns true if one or more physical registers must be spilled
-    /// around the specified instruction.
-    bool hasEmergencySpills(MachineInstr *MI) const {
-      return EmergencySpillMap.find(MI) != EmergencySpillMap.end();
-    }
-
-    /// @brief returns the physical registers to be spilled and restored around
-    /// the instruction.
-    std::vector<unsigned> &getEmergencySpills(MachineInstr *MI) {
-      return EmergencySpillMap[MI];
-    }
-
-    /// @brief - transfer emergency spill information from one instruction to
-    /// another.
-    void transferEmergencySpills(MachineInstr *Old, MachineInstr *New) {
-      std::map<MachineInstr*,std::vector<unsigned> >::iterator I =
-        EmergencySpillMap.find(Old);
-      if (I == EmergencySpillMap.end())
-        return;
-      while (!I->second.empty()) {
-        unsigned virtReg = I->second.back();
-        I->second.pop_back();
-        addEmergencySpill(virtReg, New);
-      }
-      EmergencySpillMap.erase(I);
-    }
-
-    /// @brief return or get a emergency spill slot for the register class.
-    int getEmergencySpillSlot(const TargetRegisterClass *RC);
-
-    /// @brief Return lowest spill slot index.
-    int getLowSpillSlot() const {
-      return LowSpillSlot;
-    }
-
-    /// @brief Return highest spill slot index.
-    int getHighSpillSlot() const {
-      return HighSpillSlot;
-    }
-
-    /// @brief Records a spill slot use.
-    void addSpillSlotUse(int FrameIndex, MachineInstr *MI);
-
-    /// @brief Returns true if spill slot has been used.
-    bool isSpillSlotUsed(int FrameIndex) const {
-      assert(FrameIndex >= 0 && "Spill slot index should not be negative!");
-      return !SpillSlotToUsesMap[FrameIndex-LowSpillSlot].empty();
-    }
-
-    /// @brief Mark the specified register as being implicitly defined.
-    void setIsImplicitlyDefined(unsigned VirtReg) {
-      ImplicitDefed.set(TargetRegisterInfo::virtReg2Index(VirtReg));
-    }
-
-    /// @brief Returns true if the virtual register is implicitly defined.
-    bool isImplicitlyDefined(unsigned VirtReg) const {
-      return ImplicitDefed[TargetRegisterInfo::virtReg2Index(VirtReg)];
-    }
-
-    /// @brief Updates information about the specified virtual register's value
-    /// folded into newMI machine instruction.
-    void virtFolded(unsigned VirtReg, MachineInstr *OldMI, MachineInstr *NewMI,
-                    ModRef MRInfo);
-
-    /// @brief Updates information about the specified virtual register's value
-    /// folded into the specified machine instruction.
-    void virtFolded(unsigned VirtReg, MachineInstr *MI, ModRef MRInfo);
-
-    /// @brief returns the virtual registers' values folded in memory
-    /// operands of this instruction
-    std::pair<MI2VirtMapTy::const_iterator, MI2VirtMapTy::const_iterator>
-    getFoldedVirts(MachineInstr* MI) const {
-      return MI2VirtMap.equal_range(MI);
-    }
-    
-    /// RemoveMachineInstrFromMaps - MI is being erased, remove it from the
-    /// the folded instruction map and spill point map.
-    void RemoveMachineInstrFromMaps(MachineInstr *MI);
-
-    /// FindUnusedRegisters - Gather a list of allocatable registers that
-    /// have not been allocated to any virtual register.
-    bool FindUnusedRegisters(LiveIntervals* LIs);
-
-    /// HasUnusedRegisters - Return true if there are any allocatable registers
-    /// that have not been allocated to any virtual register.
-    bool HasUnusedRegisters() const {
-      return !UnusedRegs.none();
-    }
-
-    /// setRegisterUsed - Remember the physical register is now used.
-    void setRegisterUsed(unsigned Reg) {
-      UnusedRegs.reset(Reg);
-    }
-
-    /// isRegisterUnused - Return true if the physical register has not been
-    /// used.
-    bool isRegisterUnused(unsigned Reg) const {
-      return UnusedRegs[Reg];
-    }
-
-    /// getFirstUnusedRegister - Return the first physical register that has not
-    /// been used.
-    unsigned getFirstUnusedRegister(const TargetRegisterClass *RC) {
-      int Reg = UnusedRegs.find_first();
-      while (Reg != -1) {
-        if (allocatableRCRegs[RC][Reg])
-          return (unsigned)Reg;
-        Reg = UnusedRegs.find_next(Reg);
-      }
-      return 0;
-    }
-
     /// rewrite - Rewrite all instructions in MF to use only physical registers
     /// by mapping all virtual register operands to their assigned physical
     /// registers.
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
deleted file mode 100644
index a5ec797b27db..000000000000
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ /dev/null
@@ -1,2633 +0,0 @@
-//===-- llvm/CodeGen/Rewriter.cpp -  Rewriter -----------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "virtregrewriter"
-#include "VirtRegRewriter.h"
-#include "VirtRegMap.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-using namespace llvm;
-
-STATISTIC(NumDSE     , "Number of dead stores elided");
-STATISTIC(NumDSS     , "Number of dead spill slots removed");
-STATISTIC(NumCommutes, "Number of instructions commuted");
-STATISTIC(NumDRM     , "Number of re-materializable defs elided");
-STATISTIC(NumStores  , "Number of stores added");
-STATISTIC(NumPSpills , "Number of physical register spills");
-STATISTIC(NumOmitted , "Number of reloads omitted");
-STATISTIC(NumAvoided , "Number of reloads deemed unnecessary");
-STATISTIC(NumCopified, "Number of available reloads turned into copies");
-STATISTIC(NumReMats  , "Number of re-materialization");
-STATISTIC(NumLoads   , "Number of loads added");
-STATISTIC(NumReused  , "Number of values reused");
-STATISTIC(NumDCE     , "Number of copies elided");
-STATISTIC(NumSUnfold , "Number of stores unfolded");
-STATISTIC(NumModRefUnfold, "Number of modref unfolded");
-
-namespace {
-  enum RewriterName { local, trivial };
-}
-
-static cl::opt<RewriterName>
-RewriterOpt("rewriter",
-            cl::desc("Rewriter to use (default=local)"),
-            cl::Prefix,
-            cl::values(clEnumVal(local,   "local rewriter"),
-                       clEnumVal(trivial, "trivial rewriter"),
-                       clEnumValEnd),
-            cl::init(local));
-
-static cl::opt<bool>
-ScheduleSpills("schedule-spills",
-               cl::desc("Schedule spill code"),
-               cl::init(false));
-
-VirtRegRewriter::~VirtRegRewriter() {}
-
-/// substitutePhysReg - Replace virtual register in MachineOperand with a
-/// physical register. Do the right thing with the sub-register index.
-/// Note that operands may be added, so the MO reference is no longer valid.
-static void substitutePhysReg(MachineOperand &MO, unsigned Reg,
-                              const TargetRegisterInfo &TRI) {
-  if (MO.getSubReg()) {
-    MO.substPhysReg(Reg, TRI);
-
-    // Any kill flags apply to the full virtual register, so they also apply to
-    // the full physical register.
-    // We assume that partial defs have already been decorated with a super-reg
-    // <imp-def> operand by LiveIntervals.
-    MachineInstr &MI = *MO.getParent();
-    if (MO.isUse() && !MO.isUndef() &&
-        (MO.isKill() || MI.isRegTiedToDefOperand(&MO-&MI.getOperand(0))))
-      MI.addRegisterKilled(Reg, &TRI, /*AddIfNotFound=*/ true);
-  } else {
-    MO.setReg(Reg);
-  }
-}
-
-namespace {
-
-/// This class is intended for use with the new spilling framework only. It
-/// rewrites vreg def/uses to use the assigned preg, but does not insert any
-/// spill code.
-struct TrivialRewriter : public VirtRegRewriter {
-
-  bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
-                            LiveIntervals* LIs) {
-    DEBUG(dbgs() << "********** REWRITE MACHINE CODE **********\n");
-    DEBUG(dbgs() << "********** Function: "
-          << MF.getFunction()->getName() << '\n');
-    DEBUG(dbgs() << "**** Machine Instrs"
-          << "(NOTE! Does not include spills and reloads!) ****\n");
-    DEBUG(MF.dump());
-
-    MachineRegisterInfo *mri = &MF.getRegInfo();
-    const TargetRegisterInfo *tri = MF.getTarget().getRegisterInfo();
-
-    bool changed = false;
-
-    for (LiveIntervals::iterator liItr = LIs->begin(), liEnd = LIs->end();
-         liItr != liEnd; ++liItr) {
-
-      const LiveInterval *li = liItr->second;
-      unsigned reg = li->reg;
-
-      if (TargetRegisterInfo::isPhysicalRegister(reg)) {
-        if (!li->empty())
-          mri->setPhysRegUsed(reg);
-      }
-      else {
-        if (!VRM.hasPhys(reg))
-          continue;
-        unsigned pReg = VRM.getPhys(reg);
-        mri->setPhysRegUsed(pReg);
-        // Copy the register use-list before traversing it.
-        SmallVector<std::pair<MachineInstr*, unsigned>, 32> reglist;
-        for (MachineRegisterInfo::reg_iterator I = mri->reg_begin(reg),
-               E = mri->reg_end(); I != E; ++I)
-          reglist.push_back(std::make_pair(&*I, I.getOperandNo()));
-        for (unsigned N=0; N != reglist.size(); ++N)
-          substitutePhysReg(reglist[N].first->getOperand(reglist[N].second),
-                            pReg, *tri);
-        changed |= !reglist.empty();
-      }
-    }
-
-    DEBUG(dbgs() << "**** Post Machine Instrs ****\n");
-    DEBUG(MF.dump());
-
-    return changed;
-  }
-
-};
-
-}
-
-// ************************************************************************ //
-
-namespace {
-
-/// AvailableSpills - As the local rewriter is scanning and rewriting an MBB
-/// from top down, keep track of which spill slots or remat are available in
-/// each register.
-///
-/// Note that not all physregs are created equal here.  In particular, some
-/// physregs are reloads that we are allowed to clobber or ignore at any time.
-/// Other physregs are values that the register allocated program is using
-/// that we cannot CHANGE, but we can read if we like.  We keep track of this
-/// on a per-stack-slot / remat id basis as the low bit in the value of the
-/// SpillSlotsAvailable entries.  The predicate 'canClobberPhysReg()' checks
-/// this bit and addAvailable sets it if.
-class AvailableSpills {
-  const TargetRegisterInfo *TRI;
-  const TargetInstrInfo *TII;
-
-  // SpillSlotsOrReMatsAvailable - This map keeps track of all of the spilled
-  // or remat'ed virtual register values that are still available, due to
-  // being loaded or stored to, but not invalidated yet.
-  std::map<int, unsigned> SpillSlotsOrReMatsAvailable;
-
-  // PhysRegsAvailable - This is the inverse of SpillSlotsOrReMatsAvailable,
-  // indicating which stack slot values are currently held by a physreg.  This
-  // is used to invalidate entries in SpillSlotsOrReMatsAvailable when a
-  // physreg is modified.
-  std::multimap<unsigned, int> PhysRegsAvailable;
-
-  void disallowClobberPhysRegOnly(unsigned PhysReg);
-
-  void ClobberPhysRegOnly(unsigned PhysReg);
-public:
-  AvailableSpills(const TargetRegisterInfo *tri, const TargetInstrInfo *tii)
-    : TRI(tri), TII(tii) {
-  }
-
-  /// clear - Reset the state.
-  void clear() {
-    SpillSlotsOrReMatsAvailable.clear();
-    PhysRegsAvailable.clear();
-  }
-
-  const TargetRegisterInfo *getRegInfo() const { return TRI; }
-
-  /// getSpillSlotOrReMatPhysReg - If the specified stack slot or remat is
-  /// available in a physical register, return that PhysReg, otherwise
-  /// return 0.
-  unsigned getSpillSlotOrReMatPhysReg(int Slot) const {
-    std::map<int, unsigned>::const_iterator I =
-      SpillSlotsOrReMatsAvailable.find(Slot);
-    if (I != SpillSlotsOrReMatsAvailable.end()) {
-      return I->second >> 1;  // Remove the CanClobber bit.
-    }
-    return 0;
-  }
-
-  /// addAvailable - Mark that the specified stack slot / remat is available
-  /// in the specified physreg.  If CanClobber is true, the physreg can be
-  /// modified at any time without changing the semantics of the program.
-  void addAvailable(int SlotOrReMat, unsigned Reg, bool CanClobber = true) {
-    // If this stack slot is thought to be available in some other physreg,
-    // remove its record.
-    ModifyStackSlotOrReMat(SlotOrReMat);
-
-    PhysRegsAvailable.insert(std::make_pair(Reg, SlotOrReMat));
-    SpillSlotsOrReMatsAvailable[SlotOrReMat]= (Reg << 1) |
-                                              (unsigned)CanClobber;
-
-    if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT)
-      DEBUG(dbgs() << "Remembering RM#"
-                   << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1);
-    else
-      DEBUG(dbgs() << "Remembering SS#" << SlotOrReMat);
-    DEBUG(dbgs() << " in physreg " << TRI->getName(Reg)
-          << (CanClobber ? " canclobber" : "") << "\n");
-  }
-
-  /// canClobberPhysRegForSS - Return true if the spiller is allowed to change
-  /// the value of the specified stackslot register if it desires. The
-  /// specified stack slot must be available in a physreg for this query to
-  /// make sense.
-  bool canClobberPhysRegForSS(int SlotOrReMat) const {
-    assert(SpillSlotsOrReMatsAvailable.count(SlotOrReMat) &&
-           "Value not available!");
-    return SpillSlotsOrReMatsAvailable.find(SlotOrReMat)->second & 1;
-  }
-
-  /// canClobberPhysReg - Return true if the spiller is allowed to clobber the
-  /// physical register where values for some stack slot(s) might be
-  /// available.
-  bool canClobberPhysReg(unsigned PhysReg) const {
-    std::multimap<unsigned, int>::const_iterator I =
-      PhysRegsAvailable.lower_bound(PhysReg);
-    while (I != PhysRegsAvailable.end() && I->first == PhysReg) {
-      int SlotOrReMat = I->second;
-      I++;
-      if (!canClobberPhysRegForSS(SlotOrReMat))
-        return false;
-    }
-    return true;
-  }
-
-  /// disallowClobberPhysReg - Unset the CanClobber bit of the specified
-  /// stackslot register. The register is still available but is no longer
-  /// allowed to be modifed.
-  void disallowClobberPhysReg(unsigned PhysReg);
-
-  /// ClobberPhysReg - This is called when the specified physreg changes
-  /// value.  We use this to invalidate any info about stuff that lives in
-  /// it and any of its aliases.
-  void ClobberPhysReg(unsigned PhysReg);
-
-  /// ModifyStackSlotOrReMat - This method is called when the value in a stack
-  /// slot changes.  This removes information about which register the
-  /// previous value for this slot lives in (as the previous value is dead
-  /// now).
-  void ModifyStackSlotOrReMat(int SlotOrReMat);
-
-  /// ClobberSharingStackSlots - When a register mapped to a stack slot changes,
-  /// other stack slots sharing the same register are no longer valid.
-  void ClobberSharingStackSlots(int StackSlot);
-
-  /// AddAvailableRegsToLiveIn - Availability information is being kept coming
-  /// into the specified MBB. Add available physical registers as potential
-  /// live-in's. If they are reused in the MBB, they will be added to the
-  /// live-in set to make register scavenger and post-allocation scheduler.
-  void AddAvailableRegsToLiveIn(MachineBasicBlock &MBB, BitVector &RegKills,
-                                std::vector<MachineOperand*> &KillOps);
-};
-
-}
-
-// ************************************************************************ //
-
-// Given a location where a reload of a spilled register or a remat of
-// a constant is to be inserted, attempt to find a safe location to
-// insert the load at an earlier point in the basic-block, to hide
-// latency of the load and to avoid address-generation interlock
-// issues.
-static MachineBasicBlock::iterator
-ComputeReloadLoc(MachineBasicBlock::iterator const InsertLoc,
-                 MachineBasicBlock::iterator const Begin,
-                 unsigned PhysReg,
-                 const TargetRegisterInfo *TRI,
-                 bool DoReMat,
-                 int SSorRMId,
-                 const TargetInstrInfo *TII,
-                 const MachineFunction &MF)
-{
-  if (!ScheduleSpills)
-    return InsertLoc;
-
-  // Spill backscheduling is of primary interest to addresses, so
-  // don't do anything if the register isn't in the register class
-  // used for pointers.
-
-  const TargetLowering *TL = MF.getTarget().getTargetLowering();
-
-  if (!TL->isTypeLegal(TL->getPointerTy()))
-    // Believe it or not, this is true on 16-bit targets like PIC16.
-    return InsertLoc;
-
-  const TargetRegisterClass *ptrRegClass =
-    TL->getRegClassFor(TL->getPointerTy());
-  if (!ptrRegClass->contains(PhysReg))
-    return InsertLoc;
-
-  // Scan upwards through the preceding instructions. If an instruction doesn't
-  // reference the stack slot or the register we're loading, we can
-  // backschedule the reload up past it.
-  MachineBasicBlock::iterator NewInsertLoc = InsertLoc;
-  while (NewInsertLoc != Begin) {
-    MachineBasicBlock::iterator Prev = prior(NewInsertLoc);
-    for (unsigned i = 0; i < Prev->getNumOperands(); ++i) {
-      MachineOperand &Op = Prev->getOperand(i);
-      if (!DoReMat && Op.isFI() && Op.getIndex() == SSorRMId)
-        goto stop;
-    }
-    if (Prev->findRegisterUseOperandIdx(PhysReg) != -1 ||
-        Prev->findRegisterDefOperand(PhysReg))
-      goto stop;
-    for (const unsigned *Alias = TRI->getAliasSet(PhysReg); *Alias; ++Alias)
-      if (Prev->findRegisterUseOperandIdx(*Alias) != -1 ||
-          Prev->findRegisterDefOperand(*Alias))
-        goto stop;
-    NewInsertLoc = Prev;
-  }
-stop:;
-
-  // If we made it to the beginning of the block, turn around and move back
-  // down just past any existing reloads. They're likely to be reloads/remats
-  // for instructions earlier than what our current reload/remat is for, so
-  // they should be scheduled earlier.
-  if (NewInsertLoc == Begin) {
-    int FrameIdx;
-    while (InsertLoc != NewInsertLoc &&
-           (TII->isLoadFromStackSlot(NewInsertLoc, FrameIdx) ||
-            TII->isTriviallyReMaterializable(NewInsertLoc)))
-      ++NewInsertLoc;
-  }
-
-  return NewInsertLoc;
-}
-
-namespace {
-
-// ReusedOp - For each reused operand, we keep track of a bit of information,
-// in case we need to rollback upon processing a new operand.  See comments
-// below.
-struct ReusedOp {
-  // The MachineInstr operand that reused an available value.
-  unsigned Operand;
-
-  // StackSlotOrReMat - The spill slot or remat id of the value being reused.
-  unsigned StackSlotOrReMat;
-
-  // PhysRegReused - The physical register the value was available in.
-  unsigned PhysRegReused;
-
-  // AssignedPhysReg - The physreg that was assigned for use by the reload.
-  unsigned AssignedPhysReg;
-
-  // VirtReg - The virtual register itself.
-  unsigned VirtReg;
-
-  ReusedOp(unsigned o, unsigned ss, unsigned prr, unsigned apr,
-           unsigned vreg)
-    : Operand(o), StackSlotOrReMat(ss), PhysRegReused(prr),
-      AssignedPhysReg(apr), VirtReg(vreg) {}
-};
-
-/// ReuseInfo - This maintains a collection of ReuseOp's for each operand that
-/// is reused instead of reloaded.
-class ReuseInfo {
-  MachineInstr &MI;
-  std::vector<ReusedOp> Reuses;
-  BitVector PhysRegsClobbered;
-public:
-  ReuseInfo(MachineInstr &mi, const TargetRegisterInfo *tri) : MI(mi) {
-    PhysRegsClobbered.resize(tri->getNumRegs());
-  }
-
-  bool hasReuses() const {
-    return !Reuses.empty();
-  }
-
-  /// addReuse - If we choose to reuse a virtual register that is already
-  /// available instead of reloading it, remember that we did so.
-  void addReuse(unsigned OpNo, unsigned StackSlotOrReMat,
-                unsigned PhysRegReused, unsigned AssignedPhysReg,
-                unsigned VirtReg) {
-    // If the reload is to the assigned register anyway, no undo will be
-    // required.
-    if (PhysRegReused == AssignedPhysReg) return;
-
-    // Otherwise, remember this.
-    Reuses.push_back(ReusedOp(OpNo, StackSlotOrReMat, PhysRegReused,
-                              AssignedPhysReg, VirtReg));
-  }
-
-  void markClobbered(unsigned PhysReg) {
-    PhysRegsClobbered.set(PhysReg);
-  }
-
-  bool isClobbered(unsigned PhysReg) const {
-    return PhysRegsClobbered.test(PhysReg);
-  }
-
-  /// GetRegForReload - We are about to emit a reload into PhysReg.  If there
-  /// is some other operand that is using the specified register, either pick
-  /// a new register to use, or evict the previous reload and use this reg.
-  unsigned GetRegForReload(const TargetRegisterClass *RC, unsigned PhysReg,
-                           MachineFunction &MF, MachineInstr *MI,
-                           AvailableSpills &Spills,
-                           std::vector<MachineInstr*> &MaybeDeadStores,
-                           SmallSet<unsigned, 8> &Rejected,
-                           BitVector &RegKills,
-                           std::vector<MachineOperand*> &KillOps,
-                           VirtRegMap &VRM);
-
-  /// GetRegForReload - Helper for the above GetRegForReload(). Add a
-  /// 'Rejected' set to remember which registers have been considered and
-  /// rejected for the reload. This avoids infinite looping in case like
-  /// this:
-  /// t1 := op t2, t3
-  /// t2 <- assigned r0 for use by the reload but ended up reuse r1
-  /// t3 <- assigned r1 for use by the reload but ended up reuse r0
-  /// t1 <- desires r1
-  ///       sees r1 is taken by t2, tries t2's reload register r0
-  ///       sees r0 is taken by t3, tries t3's reload register r1
-  ///       sees r1 is taken by t2, tries t2's reload register r0 ...
-  unsigned GetRegForReload(unsigned VirtReg, unsigned PhysReg, MachineInstr *MI,
-                           AvailableSpills &Spills,
-                           std::vector<MachineInstr*> &MaybeDeadStores,
-                           BitVector &RegKills,
-                           std::vector<MachineOperand*> &KillOps,
-                           VirtRegMap &VRM) {
-    SmallSet<unsigned, 8> Rejected;
-    MachineFunction &MF = *MI->getParent()->getParent();
-    const TargetRegisterClass* RC = MF.getRegInfo().getRegClass(VirtReg);
-    return GetRegForReload(RC, PhysReg, MF, MI, Spills, MaybeDeadStores,
-                           Rejected, RegKills, KillOps, VRM);
-  }
-};
-
-}
-
-// ****************** //
-// Utility Functions  //
-// ****************** //
-
-/// findSinglePredSuccessor - Return via reference a vector of machine basic
-/// blocks each of which is a successor of the specified BB and has no other
-/// predecessor.
-static void findSinglePredSuccessor(MachineBasicBlock *MBB,
-                                   SmallVectorImpl<MachineBasicBlock *> &Succs){
-  for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
-         SE = MBB->succ_end(); SI != SE; ++SI) {
-    MachineBasicBlock *SuccMBB = *SI;
-    if (SuccMBB->pred_size() == 1)
-      Succs.push_back(SuccMBB);
-  }
-}
-
-/// ResurrectConfirmedKill - Helper for ResurrectKill. This register is killed
-/// but not re-defined and it's being reused. Remove the kill flag for the
-/// register and unset the kill's marker and last kill operand.
-static void ResurrectConfirmedKill(unsigned Reg, const TargetRegisterInfo* TRI,
-                                   BitVector &RegKills,
-                                   std::vector<MachineOperand*> &KillOps) {
-  DEBUG(dbgs() << "Resurrect " << TRI->getName(Reg) << "\n");
-
-  MachineOperand *KillOp = KillOps[Reg];
-  KillOp->setIsKill(false);
-  // KillOps[Reg] might be a def of a super-register.
-  unsigned KReg = KillOp->getReg();
-  if (!RegKills[KReg])
-    return;
-
-  assert(KillOps[KReg]->getParent() == KillOp->getParent() &&
-         "invalid superreg kill flags");
-  KillOps[KReg] = NULL;
-  RegKills.reset(KReg);
-
-  // If it's a def of a super-register. Its other sub-regsters are no
-  // longer killed as well.
-  for (const unsigned *SR = TRI->getSubRegisters(KReg); *SR; ++SR) {
-    DEBUG(dbgs() << "  Resurrect subreg " << TRI->getName(*SR) << "\n");
-
-    assert(KillOps[*SR]->getParent() == KillOp->getParent() &&
-           "invalid subreg kill flags");
-    KillOps[*SR] = NULL;
-    RegKills.reset(*SR);
-  }
-}
-
-/// ResurrectKill - Invalidate kill info associated with a previous MI. An
-/// optimization may have decided that it's safe to reuse a previously killed
-/// register. If we fail to erase the invalid kill flags, then the register
-/// scavenger may later clobber the register used by this MI. Note that this
-/// must be done even if this MI is being deleted! Consider:
-///
-/// USE $r1 (vreg1) <kill>
-/// ...
-/// $r1(vreg3) = COPY $r1 (vreg2)
-///
-/// RegAlloc has smartly assigned all three vregs to the same physreg. Initially
-/// vreg1's only use is a kill. The rewriter doesn't know it should be live
-/// until it rewrites vreg2. At that points it sees that the copy is dead and
-/// deletes it. However, deleting the copy implicitly forwards liveness of $r1
-/// (it's copy coalescing). We must resurrect $r1 by removing the kill flag at
-/// vreg1 before deleting the copy.
-static void ResurrectKill(MachineInstr &MI, unsigned Reg,
-                          const TargetRegisterInfo* TRI, BitVector &RegKills,
-                          std::vector<MachineOperand*> &KillOps) {
-  if (RegKills[Reg] && KillOps[Reg]->getParent() != &MI) {
-    ResurrectConfirmedKill(Reg, TRI, RegKills, KillOps);
-    return;
-  }
-  // No previous kill for this reg. Check for subreg kills as well.
-  // d4 =
-  // store d4, fi#0
-  // ...
-  //    = s8<kill>
-  // ...
-  //    = d4  <avoiding reload>
-  for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
-    unsigned SReg = *SR;
-    if (RegKills[SReg] && KillOps[SReg]->getParent() != &MI)
-      ResurrectConfirmedKill(SReg, TRI, RegKills, KillOps);
-  }
-}
-
-/// InvalidateKills - MI is going to be deleted. If any of its operands are
-/// marked kill, then invalidate the information.
-static void InvalidateKills(MachineInstr &MI,
-                            const TargetRegisterInfo* TRI,
-                            BitVector &RegKills,
-                            std::vector<MachineOperand*> &KillOps,
-                            SmallVector<unsigned, 2> *KillRegs = NULL) {
-  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = MI.getOperand(i);
-    if (!MO.isReg() || !MO.isUse() || !MO.isKill() || MO.isUndef())
-      continue;
-    unsigned Reg = MO.getReg();
-    if (TargetRegisterInfo::isVirtualRegister(Reg))
-      continue;
-    if (KillRegs)
-      KillRegs->push_back(Reg);
-    assert(Reg < KillOps.size());
-    if (KillOps[Reg] == &MO) {
-      // This operand was the kill, now no longer.
-      KillOps[Reg] = NULL;
-      RegKills.reset(Reg);
-      for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
-        if (RegKills[*SR]) {
-          assert(KillOps[*SR] == &MO && "bad subreg kill flags");
-          KillOps[*SR] = NULL;
-          RegKills.reset(*SR);
-        }
-      }
-    }
-    else {
-      // This operand may have reused a previously killed reg. Keep it live in
-      // case it continues to be used after erasing this instruction.
-      ResurrectKill(MI, Reg, TRI, RegKills, KillOps);
-    }
-  }
-}
-
-/// InvalidateRegDef - If the def operand of the specified def MI is now dead
-/// (since its spill instruction is removed), mark it isDead. Also checks if
-/// the def MI has other definition operands that are not dead. Returns it by
-/// reference.
-static bool InvalidateRegDef(MachineBasicBlock::iterator I,
-                             MachineInstr &NewDef, unsigned Reg,
-                             bool &HasLiveDef,
-                             const TargetRegisterInfo *TRI) {
-  // Due to remat, it's possible this reg isn't being reused. That is,
-  // the def of this reg (by prev MI) is now dead.
-  MachineInstr *DefMI = I;
-  MachineOperand *DefOp = NULL;
-  for (unsigned i = 0, e = DefMI->getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = DefMI->getOperand(i);
-    if (!MO.isReg() || !MO.isDef() || !MO.isKill() || MO.isUndef())
-      continue;
-    if (MO.getReg() == Reg)
-      DefOp = &MO;
-    else if (!MO.isDead())
-      HasLiveDef = true;
-  }
-  if (!DefOp)
-    return false;
-
-  bool FoundUse = false, Done = false;
-  MachineBasicBlock::iterator E = &NewDef;
-  ++I; ++E;
-  for (; !Done && I != E; ++I) {
-    MachineInstr *NMI = I;
-    for (unsigned j = 0, ee = NMI->getNumOperands(); j != ee; ++j) {
-      MachineOperand &MO = NMI->getOperand(j);
-      if (!MO.isReg() || MO.getReg() == 0 ||
-          (MO.getReg() != Reg && !TRI->isSubRegister(Reg, MO.getReg())))
-        continue;
-      if (MO.isUse())
-        FoundUse = true;
-      Done = true; // Stop after scanning all the operands of this MI.
-    }
-  }
-  if (!FoundUse) {
-    // Def is dead!
-    DefOp->setIsDead();
-    return true;
-  }
-  return false;
-}
-
-/// UpdateKills - Track and update kill info. If a MI reads a register that is
-/// marked kill, then it must be due to register reuse. Transfer the kill info
-/// over.
-static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI,
-                        BitVector &RegKills,
-                        std::vector<MachineOperand*> &KillOps) {
-  // These do not affect kill info at all.
-  if (MI.isDebugValue())
-    return;
-  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = MI.getOperand(i);
-    if (!MO.isReg() || !MO.isUse() || MO.isUndef())
-      continue;
-    unsigned Reg = MO.getReg();
-    if (Reg == 0)
-      continue;
-
-    // This operand may have reused a previously killed reg. Keep it live.
-    ResurrectKill(MI, Reg, TRI, RegKills, KillOps);
-
-    if (MO.isKill()) {
-      RegKills.set(Reg);
-      KillOps[Reg] = &MO;
-      for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
-        RegKills.set(*SR);
-        KillOps[*SR] = &MO;
-      }
-    }
-  }
-
-  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI.getOperand(i);
-    if (!MO.isReg() || !MO.getReg() || !MO.isDef())
-      continue;
-    unsigned Reg = MO.getReg();
-    RegKills.reset(Reg);
-    KillOps[Reg] = NULL;
-    // It also defines (or partially define) aliases.
-    for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
-      RegKills.reset(*SR);
-      KillOps[*SR] = NULL;
-    }
-    for (const unsigned *SR = TRI->getSuperRegisters(Reg); *SR; ++SR) {
-      RegKills.reset(*SR);
-      KillOps[*SR] = NULL;
-    }
-  }
-}
-
-/// ReMaterialize - Re-materialize definition for Reg targeting DestReg.
-///
-static void ReMaterialize(MachineBasicBlock &MBB,
-                          MachineBasicBlock::iterator &MII,
-                          unsigned DestReg, unsigned Reg,
-                          const TargetInstrInfo *TII,
-                          const TargetRegisterInfo *TRI,
-                          VirtRegMap &VRM) {
-  MachineInstr *ReMatDefMI = VRM.getReMaterializedMI(Reg);
-#ifndef NDEBUG
-  const MCInstrDesc &MCID = ReMatDefMI->getDesc();
-  assert(MCID.getNumDefs() == 1 &&
-         "Don't know how to remat instructions that define > 1 values!");
-#endif
-  TII->reMaterialize(MBB, MII, DestReg, 0, ReMatDefMI, *TRI);
-  MachineInstr *NewMI = prior(MII);
-  for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = NewMI->getOperand(i);
-    if (!MO.isReg() || MO.getReg() == 0)
-      continue;
-    unsigned VirtReg = MO.getReg();
-    if (TargetRegisterInfo::isPhysicalRegister(VirtReg))
-      continue;
-    assert(MO.isUse());
-    unsigned Phys = VRM.getPhys(VirtReg);
-    assert(Phys && "Virtual register is not assigned a register?");
-    substitutePhysReg(MO, Phys, *TRI);
-  }
-  ++NumReMats;
-}
-
-/// findSuperReg - Find the SubReg's super-register of given register class
-/// where its SubIdx sub-register is SubReg.
-static unsigned findSuperReg(const TargetRegisterClass *RC, unsigned SubReg,
-                             unsigned SubIdx, const TargetRegisterInfo *TRI) {
-  for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
-       I != E; ++I) {
-    unsigned Reg = *I;
-    if (TRI->getSubReg(Reg, SubIdx) == SubReg)
-      return Reg;
-  }
-  return 0;
-}
-
-// ******************************** //
-// Available Spills Implementation  //
-// ******************************** //
-
-/// disallowClobberPhysRegOnly - Unset the CanClobber bit of the specified
-/// stackslot register. The register is still available but is no longer
-/// allowed to be modifed.
-void AvailableSpills::disallowClobberPhysRegOnly(unsigned PhysReg) {
-  std::multimap<unsigned, int>::iterator I =
-    PhysRegsAvailable.lower_bound(PhysReg);
-  while (I != PhysRegsAvailable.end() && I->first == PhysReg) {
-    int SlotOrReMat = I->second;
-    I++;
-    assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg &&
-           "Bidirectional map mismatch!");
-    SpillSlotsOrReMatsAvailable[SlotOrReMat] &= ~1;
-    DEBUG(dbgs() << "PhysReg " << TRI->getName(PhysReg)
-         << " copied, it is available for use but can no longer be modified\n");
-  }
-}
-
-/// disallowClobberPhysReg - Unset the CanClobber bit of the specified
-/// stackslot register and its aliases. The register and its aliases may
-/// still available but is no longer allowed to be modifed.
-void AvailableSpills::disallowClobberPhysReg(unsigned PhysReg) {
-  for (const unsigned *AS = TRI->getAliasSet(PhysReg); *AS; ++AS)
-    disallowClobberPhysRegOnly(*AS);
-  disallowClobberPhysRegOnly(PhysReg);
-}
-
-/// ClobberPhysRegOnly - This is called when the specified physreg changes
-/// value.  We use this to invalidate any info about stuff we thing lives in it.
-void AvailableSpills::ClobberPhysRegOnly(unsigned PhysReg) {
-  std::multimap<unsigned, int>::iterator I =
-    PhysRegsAvailable.lower_bound(PhysReg);
-  while (I != PhysRegsAvailable.end() && I->first == PhysReg) {
-    int SlotOrReMat = I->second;
-    PhysRegsAvailable.erase(I++);
-    assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg &&
-           "Bidirectional map mismatch!");
-    SpillSlotsOrReMatsAvailable.erase(SlotOrReMat);
-    DEBUG(dbgs() << "PhysReg " << TRI->getName(PhysReg)
-          << " clobbered, invalidating ");
-    if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT)
-      DEBUG(dbgs() << "RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1 <<"\n");
-    else
-      DEBUG(dbgs() << "SS#" << SlotOrReMat << "\n");
-  }
-}
-
-/// ClobberPhysReg - This is called when the specified physreg changes
-/// value.  We use this to invalidate any info about stuff we thing lives in
-/// it and any of its aliases.
-void AvailableSpills::ClobberPhysReg(unsigned PhysReg) {
-  for (const unsigned *AS = TRI->getAliasSet(PhysReg); *AS; ++AS)
-    ClobberPhysRegOnly(*AS);
-  ClobberPhysRegOnly(PhysReg);
-}
-
-/// AddAvailableRegsToLiveIn - Availability information is being kept coming
-/// into the specified MBB. Add available physical registers as potential
-/// live-in's. If they are reused in the MBB, they will be added to the
-/// live-in set to make register scavenger and post-allocation scheduler.
-void AvailableSpills::AddAvailableRegsToLiveIn(MachineBasicBlock &MBB,
-                                        BitVector &RegKills,
-                                        std::vector<MachineOperand*> &KillOps) {
-  std::set<unsigned> NotAvailable;
-  for (std::multimap<unsigned, int>::iterator
-         I = PhysRegsAvailable.begin(), E = PhysRegsAvailable.end();
-       I != E; ++I) {
-    unsigned Reg = I->first;
-    const TargetRegisterClass* RC = TRI->getMinimalPhysRegClass(Reg);
-    // FIXME: A temporary workaround. We can't reuse available value if it's
-    // not safe to move the def of the virtual register's class. e.g.
-    // X86::RFP* register classes. Do not add it as a live-in.
-    if (!TII->isSafeToMoveRegClassDefs(RC))
-      // This is no longer available.
-      NotAvailable.insert(Reg);
-    else {
-      MBB.addLiveIn(Reg);
-      if (RegKills[Reg])
-        ResurrectConfirmedKill(Reg, TRI, RegKills, KillOps);
-    }
-
-    // Skip over the same register.
-    std::multimap<unsigned, int>::iterator NI = llvm::next(I);
-    while (NI != E && NI->first == Reg) {
-      ++I;
-      ++NI;
-    }
-  }
-
-  for (std::set<unsigned>::iterator I = NotAvailable.begin(),
-         E = NotAvailable.end(); I != E; ++I) {
-    ClobberPhysReg(*I);
-    for (const unsigned *SubRegs = TRI->getSubRegisters(*I);
-       *SubRegs; ++SubRegs)
-      ClobberPhysReg(*SubRegs);
-  }
-}
-
-/// ModifyStackSlotOrReMat - This method is called when the value in a stack
-/// slot changes.  This removes information about which register the previous
-/// value for this slot lives in (as the previous value is dead now).
-void AvailableSpills::ModifyStackSlotOrReMat(int SlotOrReMat) {
-  std::map<int, unsigned>::iterator It =
-    SpillSlotsOrReMatsAvailable.find(SlotOrReMat);
-  if (It == SpillSlotsOrReMatsAvailable.end()) return;
-  unsigned Reg = It->second >> 1;
-  SpillSlotsOrReMatsAvailable.erase(It);
-
-  // This register may hold the value of multiple stack slots, only remove this
-  // stack slot from the set of values the register contains.
-  std::multimap<unsigned, int>::iterator I = PhysRegsAvailable.lower_bound(Reg);
-  for (; ; ++I) {
-    assert(I != PhysRegsAvailable.end() && I->first == Reg &&
-           "Map inverse broken!");
-    if (I->second == SlotOrReMat) break;
-  }
-  PhysRegsAvailable.erase(I);
-}
-
-void AvailableSpills::ClobberSharingStackSlots(int StackSlot) {
-  std::map<int, unsigned>::iterator It =
-    SpillSlotsOrReMatsAvailable.find(StackSlot);
-  if (It == SpillSlotsOrReMatsAvailable.end()) return;
-  unsigned Reg = It->second >> 1;
-
-  // Erase entries in PhysRegsAvailable for other stack slots.
-  std::multimap<unsigned, int>::iterator I = PhysRegsAvailable.lower_bound(Reg);
-  while (I != PhysRegsAvailable.end() && I->first == Reg) {
-    std::multimap<unsigned, int>::iterator NextI = llvm::next(I);
-    if (I->second != StackSlot) {
-      DEBUG(dbgs() << "Clobbered sharing SS#" << I->second << " in "
-                   << PrintReg(Reg, TRI) << '\n');
-      SpillSlotsOrReMatsAvailable.erase(I->second);
-      PhysRegsAvailable.erase(I);
-    }
-    I = NextI;
-  }
-}
-
-// ************************** //
-// Reuse Info Implementation  //
-// ************************** //
-
-/// GetRegForReload - We are about to emit a reload into PhysReg.  If there
-/// is some other operand that is using the specified register, either pick
-/// a new register to use, or evict the previous reload and use this reg.
-unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC,
-                         unsigned PhysReg,
-                         MachineFunction &MF,
-                         MachineInstr *MI, AvailableSpills &Spills,
-                         std::vector<MachineInstr*> &MaybeDeadStores,
-                         SmallSet<unsigned, 8> &Rejected,
-                         BitVector &RegKills,
-                         std::vector<MachineOperand*> &KillOps,
-                         VirtRegMap &VRM) {
-  const TargetInstrInfo* TII = MF.getTarget().getInstrInfo();
-  const TargetRegisterInfo *TRI = Spills.getRegInfo();
-
-  if (Reuses.empty()) return PhysReg;  // This is most often empty.
-
-  for (unsigned ro = 0, e = Reuses.size(); ro != e; ++ro) {
-    ReusedOp &Op = Reuses[ro];
-    // If we find some other reuse that was supposed to use this register
-    // exactly for its reload, we can change this reload to use ITS reload
-    // register. That is, unless its reload register has already been
-    // considered and subsequently rejected because it has also been reused
-    // by another operand.
-    if (Op.PhysRegReused == PhysReg &&
-        Rejected.count(Op.AssignedPhysReg) == 0 &&
-        RC->contains(Op.AssignedPhysReg)) {
-      // Yup, use the reload register that we didn't use before.
-      unsigned NewReg = Op.AssignedPhysReg;
-      Rejected.insert(PhysReg);
-      return GetRegForReload(RC, NewReg, MF, MI, Spills, MaybeDeadStores,
-                             Rejected, RegKills, KillOps, VRM);
-    } else {
-      // Otherwise, we might also have a problem if a previously reused
-      // value aliases the new register. If so, codegen the previous reload
-      // and use this one.
-      unsigned PRRU = Op.PhysRegReused;
-      if (TRI->regsOverlap(PRRU, PhysReg)) {
-        // Okay, we found out that an alias of a reused register
-        // was used.  This isn't good because it means we have
-        // to undo a previous reuse.
-        MachineBasicBlock *MBB = MI->getParent();
-        const TargetRegisterClass *AliasRC =
-          MBB->getParent()->getRegInfo().getRegClass(Op.VirtReg);
-
-        // Copy Op out of the vector and remove it, we're going to insert an
-        // explicit load for it.
-        ReusedOp NewOp = Op;
-        Reuses.erase(Reuses.begin()+ro);
-
-        // MI may be using only a sub-register of PhysRegUsed.
-        unsigned RealPhysRegUsed = MI->getOperand(NewOp.Operand).getReg();
-        unsigned SubIdx = 0;
-        assert(TargetRegisterInfo::isPhysicalRegister(RealPhysRegUsed) &&
-               "A reuse cannot be a virtual register");
-        if (PRRU != RealPhysRegUsed) {
-          // What was the sub-register index?
-          SubIdx = TRI->getSubRegIndex(PRRU, RealPhysRegUsed);
-          assert(SubIdx &&
-                 "Operand physreg is not a sub-register of PhysRegUsed");
-        }
-
-        // Ok, we're going to try to reload the assigned physreg into the
-        // slot that we were supposed to in the first place.  However, that
-        // register could hold a reuse.  Check to see if it conflicts or
-        // would prefer us to use a different register.
-        unsigned NewPhysReg = GetRegForReload(RC, NewOp.AssignedPhysReg,
-                                              MF, MI, Spills, MaybeDeadStores,
-                                              Rejected, RegKills, KillOps, VRM);
-
-        bool DoReMat = NewOp.StackSlotOrReMat > VirtRegMap::MAX_STACK_SLOT;
-        int SSorRMId = DoReMat
-          ? VRM.getReMatId(NewOp.VirtReg) : (int) NewOp.StackSlotOrReMat;
-
-        // Back-schedule reloads and remats.
-        MachineBasicBlock::iterator InsertLoc =
-          ComputeReloadLoc(MI, MBB->begin(), PhysReg, TRI,
-                           DoReMat, SSorRMId, TII, MF);
-
-        if (DoReMat) {
-          ReMaterialize(*MBB, InsertLoc, NewPhysReg, NewOp.VirtReg, TII,
-                        TRI, VRM);
-        } else {
-          TII->loadRegFromStackSlot(*MBB, InsertLoc, NewPhysReg,
-                                    NewOp.StackSlotOrReMat, AliasRC, TRI);
-          MachineInstr *LoadMI = prior(InsertLoc);
-          VRM.addSpillSlotUse(NewOp.StackSlotOrReMat, LoadMI);
-          // Any stores to this stack slot are not dead anymore.
-          MaybeDeadStores[NewOp.StackSlotOrReMat] = NULL;
-          ++NumLoads;
-        }
-        Spills.ClobberPhysReg(NewPhysReg);
-        Spills.ClobberPhysReg(NewOp.PhysRegReused);
-
-        unsigned RReg = SubIdx ? TRI->getSubReg(NewPhysReg, SubIdx) :NewPhysReg;
-        MI->getOperand(NewOp.Operand).setReg(RReg);
-        MI->getOperand(NewOp.Operand).setSubReg(0);
-
-        Spills.addAvailable(NewOp.StackSlotOrReMat, NewPhysReg);
-        UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
-        DEBUG(dbgs() << '\t' << *prior(InsertLoc));
-
-        DEBUG(dbgs() << "Reuse undone!\n");
-        --NumReused;
-
-        // Finally, PhysReg is now available, go ahead and use it.
-        return PhysReg;
-      }
-    }
-  }
-  return PhysReg;
-}
-
-// ************************************************************************ //
-
-/// FoldsStackSlotModRef - Return true if the specified MI folds the specified
-/// stack slot mod/ref. It also checks if it's possible to unfold the
-/// instruction by having it define a specified physical register instead.
-static bool FoldsStackSlotModRef(MachineInstr &MI, int SS, unsigned PhysReg,
-                                 const TargetInstrInfo *TII,
-                                 const TargetRegisterInfo *TRI,
-                                 VirtRegMap &VRM) {
-  if (VRM.hasEmergencySpills(&MI) || VRM.isSpillPt(&MI))
-    return false;
-
-  bool Found = false;
-  VirtRegMap::MI2VirtMapTy::const_iterator I, End;
-  for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ++I) {
-    unsigned VirtReg = I->second.first;
-    VirtRegMap::ModRef MR = I->second.second;
-    if (MR & VirtRegMap::isModRef)
-      if (VRM.getStackSlot(VirtReg) == SS) {
-        Found= TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(), true, true) != 0;
-        break;
-      }
-  }
-  if (!Found)
-    return false;
-
-  // Does the instruction uses a register that overlaps the scratch register?
-  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = MI.getOperand(i);
-    if (!MO.isReg() || MO.getReg() == 0)
-      continue;
-    unsigned Reg = MO.getReg();
-    if (TargetRegisterInfo::isVirtualRegister(Reg)) {
-      if (!VRM.hasPhys(Reg))
-        continue;
-      Reg = VRM.getPhys(Reg);
-    }
-    if (TRI->regsOverlap(PhysReg, Reg))
-      return false;
-  }
-  return true;
-}
-
-/// FindFreeRegister - Find a free register of a given register class by looking
-/// at (at most) the last two machine instructions.
-static unsigned FindFreeRegister(MachineBasicBlock::iterator MII,
-                                 MachineBasicBlock &MBB,
-                                 const TargetRegisterClass *RC,
-                                 const TargetRegisterInfo *TRI,
-                                 BitVector &AllocatableRegs) {
-  BitVector Defs(TRI->getNumRegs());
-  BitVector Uses(TRI->getNumRegs());
-  SmallVector<unsigned, 4> LocalUses;
-  SmallVector<unsigned, 4> Kills;
-
-  // Take a look at 2 instructions at most.
-  unsigned Count = 0;
-  while (Count < 2) {
-    if (MII == MBB.begin())
-      break;
-    MachineInstr *PrevMI = prior(MII);
-    MII = PrevMI;
-
-    if (PrevMI->isDebugValue())
-      continue; // Skip over dbg_value instructions.
-    ++Count;
-
-    for (unsigned i = 0, e = PrevMI->getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = PrevMI->getOperand(i);
-      if (!MO.isReg() || MO.getReg() == 0)
-        continue;
-      unsigned Reg = MO.getReg();
-      if (MO.isDef()) {
-        Defs.set(Reg);
-        for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
-          Defs.set(*AS);
-      } else  {
-        LocalUses.push_back(Reg);
-        if (MO.isKill() && AllocatableRegs[Reg])
-          Kills.push_back(Reg);
-      }
-    }
-
-    for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
-      unsigned Kill = Kills[i];
-      if (!Defs[Kill] && !Uses[Kill] &&
-          RC->contains(Kill))
-        return Kill;
-    }
-    for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) {
-      unsigned Reg = LocalUses[i];
-      Uses.set(Reg);
-      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
-        Uses.set(*AS);
-    }
-  }
-
-  return 0;
-}
-
-static
-void AssignPhysToVirtReg(MachineInstr *MI, unsigned VirtReg, unsigned PhysReg,
-                         const TargetRegisterInfo &TRI) {
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = MI->getOperand(i);
-    if (MO.isReg() && MO.getReg() == VirtReg)
-      substitutePhysReg(MO, PhysReg, TRI);
-  }
-}
-
-namespace {
-
-struct RefSorter {
-  bool operator()(const std::pair<MachineInstr*, int> &A,
-                  const std::pair<MachineInstr*, int> &B) {
-    return A.second < B.second;
-  }
-};
-
-// ***************************** //
-// Local Spiller Implementation  //
-// ***************************** //
-
-class LocalRewriter : public VirtRegRewriter {
-  MachineRegisterInfo *MRI;
-  const TargetRegisterInfo *TRI;
-  const TargetInstrInfo *TII;
-  VirtRegMap *VRM;
-  LiveIntervals *LIs;
-  BitVector AllocatableRegs;
-  DenseMap<MachineInstr*, unsigned> DistanceMap;
-  DenseMap<int, SmallVector<MachineInstr*,4> > Slot2DbgValues;
-
-  MachineBasicBlock *MBB;       // Basic block currently being processed.
-
-public:
-
-  bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
-                            LiveIntervals* LIs);
-
-private:
-  void EraseInstr(MachineInstr *MI) {
-    VRM->RemoveMachineInstrFromMaps(MI);
-    LIs->RemoveMachineInstrFromMaps(MI);
-    MI->eraseFromParent();
-  }
-
-  bool OptimizeByUnfold2(unsigned VirtReg, int SS,
-                         MachineBasicBlock::iterator &MII,
-                         std::vector<MachineInstr*> &MaybeDeadStores,
-                         AvailableSpills &Spills,
-                         BitVector &RegKills,
-                         std::vector<MachineOperand*> &KillOps);
-
-  bool OptimizeByUnfold(MachineBasicBlock::iterator &MII,
-                        std::vector<MachineInstr*> &MaybeDeadStores,
-                        AvailableSpills &Spills,
-                        BitVector &RegKills,
-                        std::vector<MachineOperand*> &KillOps);
-
-  bool CommuteToFoldReload(MachineBasicBlock::iterator &MII,
-                           unsigned VirtReg, unsigned SrcReg, int SS,
-                           AvailableSpills &Spills,
-                           BitVector &RegKills,
-                           std::vector<MachineOperand*> &KillOps,
-                           const TargetRegisterInfo *TRI);
-
-  void SpillRegToStackSlot(MachineBasicBlock::iterator &MII,
-                           int Idx, unsigned PhysReg, int StackSlot,
-                           const TargetRegisterClass *RC,
-                           bool isAvailable, MachineInstr *&LastStore,
-                           AvailableSpills &Spills,
-                           SmallSet<MachineInstr*, 4> &ReMatDefs,
-                           BitVector &RegKills,
-                           std::vector<MachineOperand*> &KillOps);
-
-  void TransferDeadness(unsigned Reg, BitVector &RegKills,
-                        std::vector<MachineOperand*> &KillOps);
-
-  bool InsertEmergencySpills(MachineInstr *MI);
-
-  bool InsertRestores(MachineInstr *MI,
-                      AvailableSpills &Spills,
-                      BitVector &RegKills,
-                      std::vector<MachineOperand*> &KillOps);
-
-  bool InsertSpills(MachineInstr *MI);
-
-  void ProcessUses(MachineInstr &MI, AvailableSpills &Spills,
-                   std::vector<MachineInstr*> &MaybeDeadStores,
-                   BitVector &RegKills,
-                   ReuseInfo &ReusedOperands,
-                   std::vector<MachineOperand*> &KillOps);
-
-  void RewriteMBB(LiveIntervals *LIs,
-                  AvailableSpills &Spills, BitVector &RegKills,
-                  std::vector<MachineOperand*> &KillOps);
-};
-}
-
-bool LocalRewriter::runOnMachineFunction(MachineFunction &MF, VirtRegMap &vrm,
-                                         LiveIntervals* lis) {
-  MRI = &MF.getRegInfo();
-  TRI = MF.getTarget().getRegisterInfo();
-  TII = MF.getTarget().getInstrInfo();
-  VRM = &vrm;
-  LIs = lis;
-  AllocatableRegs = TRI->getAllocatableSet(MF);
-  DEBUG(dbgs() << "\n**** Local spiller rewriting function '"
-        << MF.getFunction()->getName() << "':\n");
-  DEBUG(dbgs() << "**** Machine Instrs (NOTE! Does not include spills and"
-        " reloads!) ****\n");
-  DEBUG(MF.print(dbgs(), LIs->getSlotIndexes()));
-
-  // Spills - Keep track of which spilled values are available in physregs
-  // so that we can choose to reuse the physregs instead of emitting
-  // reloads. This is usually refreshed per basic block.
-  AvailableSpills Spills(TRI, TII);
-
-  // Keep track of kill information.
-  BitVector RegKills(TRI->getNumRegs());
-  std::vector<MachineOperand*> KillOps;
-  KillOps.resize(TRI->getNumRegs(), NULL);
-
-  // SingleEntrySuccs - Successor blocks which have a single predecessor.
-  SmallVector<MachineBasicBlock*, 4> SinglePredSuccs;
-  SmallPtrSet<MachineBasicBlock*,16> EarlyVisited;
-
-  // Traverse the basic blocks depth first.
-  MachineBasicBlock *Entry = MF.begin();
-  SmallPtrSet<MachineBasicBlock*,16> Visited;
-  for (df_ext_iterator<MachineBasicBlock*,
-         SmallPtrSet<MachineBasicBlock*,16> >
-         DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
-       DFI != E; ++DFI) {
-    MBB = *DFI;
-    if (!EarlyVisited.count(MBB))
-      RewriteMBB(LIs, Spills, RegKills, KillOps);
-
-    // If this MBB is the only predecessor of a successor. Keep the
-    // availability information and visit it next.
-    do {
-      // Keep visiting single predecessor successor as long as possible.
-      SinglePredSuccs.clear();
-      findSinglePredSuccessor(MBB, SinglePredSuccs);
-      if (SinglePredSuccs.empty())
-        MBB = 0;
-      else {
-        // FIXME: More than one successors, each of which has MBB has
-        // the only predecessor.
-        MBB = SinglePredSuccs[0];
-        if (!Visited.count(MBB) && EarlyVisited.insert(MBB)) {
-          Spills.AddAvailableRegsToLiveIn(*MBB, RegKills, KillOps);
-          RewriteMBB(LIs, Spills, RegKills, KillOps);
-        }
-      }
-    } while (MBB);
-
-    // Clear the availability info.
-    Spills.clear();
-  }
-
-  DEBUG(dbgs() << "**** Post Machine Instrs ****\n");
-  DEBUG(MF.print(dbgs(), LIs->getSlotIndexes()));
-
-  // Mark unused spill slots.
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  int SS = VRM->getLowSpillSlot();
-  if (SS != VirtRegMap::NO_STACK_SLOT) {
-    for (int e = VRM->getHighSpillSlot(); SS <= e; ++SS) {
-      SmallVector<MachineInstr*, 4> &DbgValues = Slot2DbgValues[SS];
-      if (!VRM->isSpillSlotUsed(SS)) {
-        MFI->RemoveStackObject(SS);
-        for (unsigned j = 0, ee = DbgValues.size(); j != ee; ++j) {
-          MachineInstr *DVMI = DbgValues[j];
-          DEBUG(dbgs() << "Removing debug info referencing FI#" << SS << '\n');
-          EraseInstr(DVMI);
-        }
-        ++NumDSS;
-      }
-      DbgValues.clear();
-    }
-  }
-  Slot2DbgValues.clear();
-
-  return true;
-}
-
-/// OptimizeByUnfold2 - Unfold a series of load / store folding instructions if
-/// a scratch register is available.
-///     xorq  %r12<kill>, %r13
-///     addq  %rax, -184(%rbp)
-///     addq  %r13, -184(%rbp)
-/// ==>
-///     xorq  %r12<kill>, %r13
-///     movq  -184(%rbp), %r12
-///     addq  %rax, %r12
-///     addq  %r13, %r12
-///     movq  %r12, -184(%rbp)
-bool LocalRewriter::
-OptimizeByUnfold2(unsigned VirtReg, int SS,
-                  MachineBasicBlock::iterator &MII,
-                  std::vector<MachineInstr*> &MaybeDeadStores,
-                  AvailableSpills &Spills,
-                  BitVector &RegKills,
-                  std::vector<MachineOperand*> &KillOps) {
-
-  MachineBasicBlock::iterator NextMII = llvm::next(MII);
-  // Skip over dbg_value instructions.
-  while (NextMII != MBB->end() && NextMII->isDebugValue())
-    NextMII = llvm::next(NextMII);
-  if (NextMII == MBB->end())
-    return false;
-
-  if (TII->getOpcodeAfterMemoryUnfold(MII->getOpcode(), true, true) == 0)
-    return false;
-
-  // Now let's see if the last couple of instructions happens to have freed up
-  // a register.
-  const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
-  unsigned PhysReg = FindFreeRegister(MII, *MBB, RC, TRI, AllocatableRegs);
-  if (!PhysReg)
-    return false;
-
-  MachineFunction &MF = *MBB->getParent();
-  TRI = MF.getTarget().getRegisterInfo();
-  MachineInstr &MI = *MII;
-  if (!FoldsStackSlotModRef(MI, SS, PhysReg, TII, TRI, *VRM))
-    return false;
-
-  // If the next instruction also folds the same SS modref and can be unfoled,
-  // then it's worthwhile to issue a load from SS into the free register and
-  // then unfold these instructions.
-  if (!FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, *VRM))
-    return false;
-
-  // Back-schedule reloads and remats.
-  ComputeReloadLoc(MII, MBB->begin(), PhysReg, TRI, false, SS, TII, MF);
-
-  // Load from SS to the spare physical register.
-  TII->loadRegFromStackSlot(*MBB, MII, PhysReg, SS, RC, TRI);
-  // This invalidates Phys.
-  Spills.ClobberPhysReg(PhysReg);
-  // Remember it's available.
-  Spills.addAvailable(SS, PhysReg);
-  MaybeDeadStores[SS] = NULL;
-
-  // Unfold current MI.
-  SmallVector<MachineInstr*, 4> NewMIs;
-  if (!TII->unfoldMemoryOperand(MF, &MI, VirtReg, false, false, NewMIs))
-    llvm_unreachable("Unable unfold the load / store folding instruction!");
-  assert(NewMIs.size() == 1);
-  AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI);
-  VRM->transferRestorePts(&MI, NewMIs[0]);
-  MII = MBB->insert(MII, NewMIs[0]);
-  InvalidateKills(MI, TRI, RegKills, KillOps);
-  EraseInstr(&MI);
-  ++NumModRefUnfold;
-
-  // Unfold next instructions that fold the same SS.
-  do {
-    MachineInstr &NextMI = *NextMII;
-    NextMII = llvm::next(NextMII);
-    NewMIs.clear();
-    if (!TII->unfoldMemoryOperand(MF, &NextMI, VirtReg, false, false, NewMIs))
-      llvm_unreachable("Unable unfold the load / store folding instruction!");
-    assert(NewMIs.size() == 1);
-    AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI);
-    VRM->transferRestorePts(&NextMI, NewMIs[0]);
-    MBB->insert(NextMII, NewMIs[0]);
-    InvalidateKills(NextMI, TRI, RegKills, KillOps);
-    EraseInstr(&NextMI);
-    ++NumModRefUnfold;
-    // Skip over dbg_value instructions.
-    while (NextMII != MBB->end() && NextMII->isDebugValue())
-      NextMII = llvm::next(NextMII);
-    if (NextMII == MBB->end())
-      break;
-  } while (FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, *VRM));
-
-  // Store the value back into SS.
-  TII->storeRegToStackSlot(*MBB, NextMII, PhysReg, true, SS, RC, TRI);
-  MachineInstr *StoreMI = prior(NextMII);
-  VRM->addSpillSlotUse(SS, StoreMI);
-  VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
-
-  return true;
-}
-
-/// OptimizeByUnfold - Turn a store folding instruction into a load folding
-/// instruction. e.g.
-///     xorl  %edi, %eax
-///     movl  %eax, -32(%ebp)
-///     movl  -36(%ebp), %eax
-///     orl   %eax, -32(%ebp)
-/// ==>
-///     xorl  %edi, %eax
-///     orl   -36(%ebp), %eax
-///     mov   %eax, -32(%ebp)
-/// This enables unfolding optimization for a subsequent instruction which will
-/// also eliminate the newly introduced store instruction.
-bool LocalRewriter::
-OptimizeByUnfold(MachineBasicBlock::iterator &MII,
-                 std::vector<MachineInstr*> &MaybeDeadStores,
-                 AvailableSpills &Spills,
-                 BitVector &RegKills,
-                 std::vector<MachineOperand*> &KillOps) {
-  MachineFunction &MF = *MBB->getParent();
-  MachineInstr &MI = *MII;
-  unsigned UnfoldedOpc = 0;
-  unsigned UnfoldPR = 0;
-  unsigned UnfoldVR = 0;
-  int FoldedSS = VirtRegMap::NO_STACK_SLOT;
-  VirtRegMap::MI2VirtMapTy::const_iterator I, End;
-  for (tie(I, End) = VRM->getFoldedVirts(&MI); I != End; ) {
-    // Only transform a MI that folds a single register.
-    if (UnfoldedOpc)
-      return false;
-    UnfoldVR = I->second.first;
-    VirtRegMap::ModRef MR = I->second.second;
-    // MI2VirtMap be can updated which invalidate the iterator.
-    // Increment the iterator first.
-    ++I;
-    if (VRM->isAssignedReg(UnfoldVR))
-      continue;
-    // If this reference is not a use, any previous store is now dead.
-    // Otherwise, the store to this stack slot is not dead anymore.
-    FoldedSS = VRM->getStackSlot(UnfoldVR);
-    MachineInstr* DeadStore = MaybeDeadStores[FoldedSS];
-    if (DeadStore && (MR & VirtRegMap::isModRef)) {
-      unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(FoldedSS);
-      if (!PhysReg || !DeadStore->readsRegister(PhysReg))
-        continue;
-      UnfoldPR = PhysReg;
-      UnfoldedOpc = TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(),
-                                                    false, true);
-    }
-  }
-
-  if (!UnfoldedOpc) {
-    if (!UnfoldVR)
-      return false;
-
-    // Look for other unfolding opportunities.
-    return OptimizeByUnfold2(UnfoldVR, FoldedSS, MII, MaybeDeadStores, Spills,
-                             RegKills, KillOps);
-  }
-
-  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = MI.getOperand(i);
-    if (!MO.isReg() || MO.getReg() == 0 || !MO.isUse())
-      continue;
-    unsigned VirtReg = MO.getReg();
-    if (TargetRegisterInfo::isPhysicalRegister(VirtReg) || MO.getSubReg())
-      continue;
-    if (VRM->isAssignedReg(VirtReg)) {
-      unsigned PhysReg = VRM->getPhys(VirtReg);
-      if (PhysReg && TRI->regsOverlap(PhysReg, UnfoldPR))
-        return false;
-    } else if (VRM->isReMaterialized(VirtReg))
-      continue;
-    int SS = VRM->getStackSlot(VirtReg);
-    unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
-    if (PhysReg) {
-      if (TRI->regsOverlap(PhysReg, UnfoldPR))
-        return false;
-      continue;
-    }
-    if (VRM->hasPhys(VirtReg)) {
-      PhysReg = VRM->getPhys(VirtReg);
-      if (!TRI->regsOverlap(PhysReg, UnfoldPR))
-        continue;
-    }
-
-    // Ok, we'll need to reload the value into a register which makes
-    // it impossible to perform the store unfolding optimization later.
-    // Let's see if it is possible to fold the load if the store is
-    // unfolded. This allows us to perform the store unfolding
-    // optimization.
-    SmallVector<MachineInstr*, 4> NewMIs;
-    if (TII->unfoldMemoryOperand(MF, &MI, UnfoldVR, false, false, NewMIs)) {
-      assert(NewMIs.size() == 1);
-      MachineInstr *NewMI = NewMIs.back();
-      MBB->insert(MII, NewMI);
-      NewMIs.clear();
-      int Idx = NewMI->findRegisterUseOperandIdx(VirtReg, false);
-      assert(Idx != -1);
-      SmallVector<unsigned, 1> Ops;
-      Ops.push_back(Idx);
-      MachineInstr *FoldedMI = TII->foldMemoryOperand(NewMI, Ops, SS);
-      NewMI->eraseFromParent();
-      if (FoldedMI) {
-        VRM->addSpillSlotUse(SS, FoldedMI);
-        if (!VRM->hasPhys(UnfoldVR))
-          VRM->assignVirt2Phys(UnfoldVR, UnfoldPR);
-        VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
-        MII = FoldedMI;
-        InvalidateKills(MI, TRI, RegKills, KillOps);
-        EraseInstr(&MI);
-        return true;
-      }
-    }
-  }
-
-  return false;
-}
-
-/// CommuteChangesDestination - We are looking for r0 = op r1, r2 and
-/// where SrcReg is r1 and it is tied to r0. Return true if after
-/// commuting this instruction it will be r0 = op r2, r1.
-static bool CommuteChangesDestination(MachineInstr *DefMI,
-                                      const MCInstrDesc &MCID,
-                                      unsigned SrcReg,
-                                      const TargetInstrInfo *TII,
-                                      unsigned &DstIdx) {
-  if (MCID.getNumDefs() != 1 && MCID.getNumOperands() != 3)
-    return false;
-  if (!DefMI->getOperand(1).isReg() ||
-      DefMI->getOperand(1).getReg() != SrcReg)
-    return false;
-  unsigned DefIdx;
-  if (!DefMI->isRegTiedToDefOperand(1, &DefIdx) || DefIdx != 0)
-    return false;
-  unsigned SrcIdx1, SrcIdx2;
-  if (!TII->findCommutedOpIndices(DefMI, SrcIdx1, SrcIdx2))
-    return false;
-  if (SrcIdx1 == 1 && SrcIdx2 == 2) {
-    DstIdx = 2;
-    return true;
-  }
-  return false;
-}
-
-/// CommuteToFoldReload -
-/// Look for
-/// r1 = load fi#1
-/// r1 = op r1, r2<kill>
-/// store r1, fi#1
-///
-/// If op is commutable and r2 is killed, then we can xform these to
-/// r2 = op r2, fi#1
-/// store r2, fi#1
-bool LocalRewriter::
-CommuteToFoldReload(MachineBasicBlock::iterator &MII,
-                    unsigned VirtReg, unsigned SrcReg, int SS,
-                    AvailableSpills &Spills,
-                    BitVector &RegKills,
-                    std::vector<MachineOperand*> &KillOps,
-                    const TargetRegisterInfo *TRI) {
-  if (MII == MBB->begin() || !MII->killsRegister(SrcReg))
-    return false;
-
-  MachineInstr &MI = *MII;
-  MachineBasicBlock::iterator DefMII = prior(MII);
-  MachineInstr *DefMI = DefMII;
-  const MCInstrDesc &MCID = DefMI->getDesc();
-  unsigned NewDstIdx;
-  if (DefMII != MBB->begin() &&
-      MCID.isCommutable() &&
-      CommuteChangesDestination(DefMI, MCID, SrcReg, TII, NewDstIdx)) {
-    MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
-    unsigned NewReg = NewDstMO.getReg();
-    if (!NewDstMO.isKill() || TRI->regsOverlap(NewReg, SrcReg))
-      return false;
-    MachineInstr *ReloadMI = prior(DefMII);
-    int FrameIdx;
-    unsigned DestReg = TII->isLoadFromStackSlot(ReloadMI, FrameIdx);
-    if (DestReg != SrcReg || FrameIdx != SS)
-      return false;
-    int UseIdx = DefMI->findRegisterUseOperandIdx(DestReg, false);
-    if (UseIdx == -1)
-      return false;
-    unsigned DefIdx;
-    if (!MI.isRegTiedToDefOperand(UseIdx, &DefIdx))
-      return false;
-    assert(DefMI->getOperand(DefIdx).isReg() &&
-           DefMI->getOperand(DefIdx).getReg() == SrcReg);
-
-    // Now commute def instruction.
-    MachineInstr *CommutedMI = TII->commuteInstruction(DefMI, true);
-    if (!CommutedMI)
-      return false;
-    MBB->insert(MII, CommutedMI);
-    SmallVector<unsigned, 1> Ops;
-    Ops.push_back(NewDstIdx);
-    MachineInstr *FoldedMI = TII->foldMemoryOperand(CommutedMI, Ops, SS);
-    // Not needed since foldMemoryOperand returns new MI.
-    CommutedMI->eraseFromParent();
-    if (!FoldedMI)
-      return false;
-
-    VRM->addSpillSlotUse(SS, FoldedMI);
-    VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
-    // Insert new def MI and spill MI.
-    const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
-    TII->storeRegToStackSlot(*MBB, &MI, NewReg, true, SS, RC, TRI);
-    MII = prior(MII);
-    MachineInstr *StoreMI = MII;
-    VRM->addSpillSlotUse(SS, StoreMI);
-    VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
-    MII = FoldedMI;  // Update MII to backtrack.
-
-    // Delete all 3 old instructions.
-    InvalidateKills(*ReloadMI, TRI, RegKills, KillOps);
-    EraseInstr(ReloadMI);
-    InvalidateKills(*DefMI, TRI, RegKills, KillOps);
-    EraseInstr(DefMI);
-    InvalidateKills(MI, TRI, RegKills, KillOps);
-    EraseInstr(&MI);
-
-    // If NewReg was previously holding value of some SS, it's now clobbered.
-    // This has to be done now because it's a physical register. When this
-    // instruction is re-visited, it's ignored.
-    Spills.ClobberPhysReg(NewReg);
-
-    ++NumCommutes;
-    return true;
-  }
-
-  return false;
-}
-
-/// SpillRegToStackSlot - Spill a register to a specified stack slot. Check if
-/// the last store to the same slot is now dead. If so, remove the last store.
-void LocalRewriter::
-SpillRegToStackSlot(MachineBasicBlock::iterator &MII,
-                    int Idx, unsigned PhysReg, int StackSlot,
-                    const TargetRegisterClass *RC,
-                    bool isAvailable, MachineInstr *&LastStore,
-                    AvailableSpills &Spills,
-                    SmallSet<MachineInstr*, 4> &ReMatDefs,
-                    BitVector &RegKills,
-                    std::vector<MachineOperand*> &KillOps) {
-
-  MachineBasicBlock::iterator oldNextMII = llvm::next(MII);
-  TII->storeRegToStackSlot(*MBB, llvm::next(MII), PhysReg, true, StackSlot, RC,
-                           TRI);
-  MachineInstr *StoreMI = prior(oldNextMII);
-  VRM->addSpillSlotUse(StackSlot, StoreMI);
-  DEBUG(dbgs() << "Store:\t" << *StoreMI);
-
-  // If there is a dead store to this stack slot, nuke it now.
-  if (LastStore) {
-    DEBUG(dbgs() << "Removed dead store:\t" << *LastStore);
-    ++NumDSE;
-    SmallVector<unsigned, 2> KillRegs;
-    InvalidateKills(*LastStore, TRI, RegKills, KillOps, &KillRegs);
-    MachineBasicBlock::iterator PrevMII = LastStore;
-    bool CheckDef = PrevMII != MBB->begin();
-    if (CheckDef)
-      --PrevMII;
-    EraseInstr(LastStore);
-    if (CheckDef) {
-      // Look at defs of killed registers on the store. Mark the defs
-      // as dead since the store has been deleted and they aren't
-      // being reused.
-      for (unsigned j = 0, ee = KillRegs.size(); j != ee; ++j) {
-        bool HasOtherDef = false;
-        if (InvalidateRegDef(PrevMII, *MII, KillRegs[j], HasOtherDef, TRI)) {
-          MachineInstr *DeadDef = PrevMII;
-          if (ReMatDefs.count(DeadDef) && !HasOtherDef) {
-            // FIXME: This assumes a remat def does not have side effects.
-            EraseInstr(DeadDef);
-            ++NumDRM;
-          }
-        }
-      }
-    }
-  }
-
-  // Allow for multi-instruction spill sequences, as on PPC Altivec.  Presume
-  // the last of multiple instructions is the actual store.
-  LastStore = prior(oldNextMII);
-
-  // If the stack slot value was previously available in some other
-  // register, change it now.  Otherwise, make the register available,
-  // in PhysReg.
-  Spills.ModifyStackSlotOrReMat(StackSlot);
-  Spills.ClobberPhysReg(PhysReg);
-  Spills.addAvailable(StackSlot, PhysReg, isAvailable);
-  ++NumStores;
-}
-
-/// isSafeToDelete - Return true if this instruction doesn't produce any side
-/// effect and all of its defs are dead.
-static bool isSafeToDelete(MachineInstr &MI) {
-  const MCInstrDesc &MCID = MI.getDesc();
-  if (MCID.mayLoad() || MCID.mayStore() || MCID.isTerminator() ||
-      MCID.isCall() || MCID.isBarrier() || MCID.isReturn() ||
-      MI.isLabel() || MI.isDebugValue() ||
-      MI.hasUnmodeledSideEffects())
-    return false;
-
-  // Technically speaking inline asm without side effects and no defs can still
-  // be deleted. But there is so much bad inline asm code out there, we should
-  // let them be.
-  if (MI.isInlineAsm())
-    return false;
-
-  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = MI.getOperand(i);
-    if (!MO.isReg() || !MO.getReg())
-      continue;
-    if (MO.isDef() && !MO.isDead())
-      return false;
-    if (MO.isUse() && MO.isKill())
-      // FIXME: We can't remove kill markers or else the scavenger will assert.
-      // An alternative is to add a ADD pseudo instruction to replace kill
-      // markers.
-      return false;
-  }
-  return true;
-}
-
-/// TransferDeadness - A identity copy definition is dead and it's being
-/// removed. Find the last def or use and mark it as dead / kill.
-void LocalRewriter::
-TransferDeadness(unsigned Reg, BitVector &RegKills,
-                 std::vector<MachineOperand*> &KillOps) {
-  SmallPtrSet<MachineInstr*, 4> Seens;
-  SmallVector<std::pair<MachineInstr*, int>,8> Refs;
-  for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(Reg),
-         RE = MRI->reg_end(); RI != RE; ++RI) {
-    MachineInstr *UDMI = &*RI;
-    if (UDMI->isDebugValue() || UDMI->getParent() != MBB)
-      continue;
-    DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UDMI);
-    if (DI == DistanceMap.end())
-      continue;
-    if (Seens.insert(UDMI))
-      Refs.push_back(std::make_pair(UDMI, DI->second));
-  }
-
-  if (Refs.empty())
-    return;
-  std::sort(Refs.begin(), Refs.end(), RefSorter());
-
-  while (!Refs.empty()) {
-    MachineInstr *LastUDMI = Refs.back().first;
-    Refs.pop_back();
-
-    MachineOperand *LastUD = NULL;
-    for (unsigned i = 0, e = LastUDMI->getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = LastUDMI->getOperand(i);
-      if (!MO.isReg() || MO.getReg() != Reg)
-        continue;
-      if (!LastUD || (LastUD->isUse() && MO.isDef()))
-        LastUD = &MO;
-      if (LastUDMI->isRegTiedToDefOperand(i))
-        break;
-    }
-    if (LastUD->isDef()) {
-      // If the instruction has no side effect, delete it and propagate
-      // backward further. Otherwise, mark is dead and we are done.
-      if (!isSafeToDelete(*LastUDMI)) {
-        LastUD->setIsDead();
-        break;
-      }
-      EraseInstr(LastUDMI);
-    } else {
-      LastUD->setIsKill();
-      RegKills.set(Reg);
-      KillOps[Reg] = LastUD;
-      break;
-    }
-  }
-}
-
-/// InsertEmergencySpills - Insert emergency spills before MI if requested by
-/// VRM. Return true if spills were inserted.
-bool LocalRewriter::InsertEmergencySpills(MachineInstr *MI) {
-  if (!VRM->hasEmergencySpills(MI))
-    return false;
-  MachineBasicBlock::iterator MII = MI;
-  SmallSet<int, 4> UsedSS;
-  std::vector<unsigned> &EmSpills = VRM->getEmergencySpills(MI);
-  for (unsigned i = 0, e = EmSpills.size(); i != e; ++i) {
-    unsigned PhysReg = EmSpills[i];
-    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysReg);
-    assert(RC && "Unable to determine register class!");
-    int SS = VRM->getEmergencySpillSlot(RC);
-    if (UsedSS.count(SS))
-      llvm_unreachable("Need to spill more than one physical registers!");
-    UsedSS.insert(SS);
-    TII->storeRegToStackSlot(*MBB, MII, PhysReg, true, SS, RC, TRI);
-    MachineInstr *StoreMI = prior(MII);
-    VRM->addSpillSlotUse(SS, StoreMI);
-
-    // Back-schedule reloads and remats.
-    MachineBasicBlock::iterator InsertLoc =
-      ComputeReloadLoc(llvm::next(MII), MBB->begin(), PhysReg, TRI, false, SS,
-                       TII, *MBB->getParent());
-
-    TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SS, RC, TRI);
-
-    MachineInstr *LoadMI = prior(InsertLoc);
-    VRM->addSpillSlotUse(SS, LoadMI);
-    ++NumPSpills;
-    DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
-  }
-  return true;
-}
-
-/// InsertRestores - Restore registers before MI is requested by VRM. Return
-/// true is any instructions were inserted.
-bool LocalRewriter::InsertRestores(MachineInstr *MI,
-                                   AvailableSpills &Spills,
-                                   BitVector &RegKills,
-                                   std::vector<MachineOperand*> &KillOps) {
-  if (!VRM->isRestorePt(MI))
-    return false;
-  MachineBasicBlock::iterator MII = MI;
-  std::vector<unsigned> &RestoreRegs = VRM->getRestorePtRestores(MI);
-  for (unsigned i = 0, e = RestoreRegs.size(); i != e; ++i) {
-    unsigned VirtReg = RestoreRegs[e-i-1];  // Reverse order.
-    if (!VRM->getPreSplitReg(VirtReg))
-      continue; // Split interval spilled again.
-    unsigned Phys = VRM->getPhys(VirtReg);
-    MRI->setPhysRegUsed(Phys);
-
-    // Check if the value being restored if available. If so, it must be
-    // from a predecessor BB that fallthrough into this BB. We do not
-    // expect:
-    // BB1:
-    // r1 = load fi#1
-    // ...
-    //    = r1<kill>
-    // ... # r1 not clobbered
-    // ...
-    //    = load fi#1
-    bool DoReMat = VRM->isReMaterialized(VirtReg);
-    int SSorRMId = DoReMat
-      ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg);
-    unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
-    if (InReg == Phys) {
-      // If the value is already available in the expected register, save
-      // a reload / remat.
-      if (SSorRMId)
-        DEBUG(dbgs() << "Reusing RM#"
-                     << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1);
-      else
-        DEBUG(dbgs() << "Reusing SS#" << SSorRMId);
-      DEBUG(dbgs() << " from physreg "
-                   << TRI->getName(InReg) << " for " << PrintReg(VirtReg)
-                   <<" instead of reloading into physreg "
-                   << TRI->getName(Phys) << '\n');
-
-      // Reusing a physreg may resurrect it. But we expect ProcessUses to update
-      // the kill flags for the current instruction after processing it.
-
-      ++NumOmitted;
-      continue;
-    } else if (InReg && InReg != Phys) {
-      if (SSorRMId)
-        DEBUG(dbgs() << "Reusing RM#"
-                     << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1);
-      else
-        DEBUG(dbgs() << "Reusing SS#" << SSorRMId);
-      DEBUG(dbgs() << " from physreg "
-                   << TRI->getName(InReg) << " for " << PrintReg(VirtReg)
-                   <<" by copying it into physreg "
-                   << TRI->getName(Phys) << '\n');
-
-      // If the reloaded / remat value is available in another register,
-      // copy it to the desired register.
-
-      // Back-schedule reloads and remats.
-      MachineBasicBlock::iterator InsertLoc =
-        ComputeReloadLoc(MII, MBB->begin(), Phys, TRI, DoReMat, SSorRMId, TII,
-                         *MBB->getParent());
-      MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI->getDebugLoc(),
-                                     TII->get(TargetOpcode::COPY), Phys)
-                               .addReg(InReg, RegState::Kill);
-
-      // This invalidates Phys.
-      Spills.ClobberPhysReg(Phys);
-      // Remember it's available.
-      Spills.addAvailable(SSorRMId, Phys);
-
-      CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
-      UpdateKills(*CopyMI, TRI, RegKills, KillOps);
-
-      DEBUG(dbgs() << '\t' << *CopyMI);
-      ++NumCopified;
-      continue;
-    }
-
-    // Back-schedule reloads and remats.
-    MachineBasicBlock::iterator InsertLoc =
-      ComputeReloadLoc(MII, MBB->begin(), Phys, TRI, DoReMat, SSorRMId, TII,
-                       *MBB->getParent());
-
-    if (VRM->isReMaterialized(VirtReg)) {
-      ReMaterialize(*MBB, InsertLoc, Phys, VirtReg, TII, TRI, *VRM);
-    } else {
-      const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
-      TII->loadRegFromStackSlot(*MBB, InsertLoc, Phys, SSorRMId, RC, TRI);
-      MachineInstr *LoadMI = prior(InsertLoc);
-      VRM->addSpillSlotUse(SSorRMId, LoadMI);
-      ++NumLoads;
-      DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
-    }
-
-    // This invalidates Phys.
-    Spills.ClobberPhysReg(Phys);
-    // Remember it's available.
-    Spills.addAvailable(SSorRMId, Phys);
-
-    UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
-    DEBUG(dbgs() << '\t' << *prior(MII));
-  }
-  return true;
-}
-
-/// InsertSpills - Insert spills after MI if requested by VRM. Return
-/// true if spills were inserted.
-bool LocalRewriter::InsertSpills(MachineInstr *MI) {
-  if (!VRM->isSpillPt(MI))
-    return false;
-  MachineBasicBlock::iterator MII = MI;
-  std::vector<std::pair<unsigned,bool> > &SpillRegs =
-    VRM->getSpillPtSpills(MI);
-  for (unsigned i = 0, e = SpillRegs.size(); i != e; ++i) {
-    unsigned VirtReg = SpillRegs[i].first;
-    bool isKill = SpillRegs[i].second;
-    if (!VRM->getPreSplitReg(VirtReg))
-      continue; // Split interval spilled again.
-    const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
-    unsigned Phys = VRM->getPhys(VirtReg);
-    int StackSlot = VRM->getStackSlot(VirtReg);
-    MachineBasicBlock::iterator oldNextMII = llvm::next(MII);
-    TII->storeRegToStackSlot(*MBB, llvm::next(MII), Phys, isKill, StackSlot,
-                             RC, TRI);
-    MachineInstr *StoreMI = prior(oldNextMII);
-    VRM->addSpillSlotUse(StackSlot, StoreMI);
-    DEBUG(dbgs() << "Store:\t" << *StoreMI);
-    VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
-  }
-  return true;
-}
-
-
-/// ProcessUses - Process all of MI's spilled operands and all available
-/// operands.
-void LocalRewriter::ProcessUses(MachineInstr &MI, AvailableSpills &Spills,
-                                std::vector<MachineInstr*> &MaybeDeadStores,
-                                BitVector &RegKills,
-                                ReuseInfo &ReusedOperands,
-                                std::vector<MachineOperand*> &KillOps) {
-  // Clear kill info.
-  SmallSet<unsigned, 2> KilledMIRegs;
-  SmallVector<unsigned, 4> VirtUseOps;
-  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = MI.getOperand(i);
-    if (!MO.isReg() || MO.getReg() == 0)
-      continue;   // Ignore non-register operands.
-
-    unsigned VirtReg = MO.getReg();
-
-    if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) {
-      // Ignore physregs for spilling, but remember that it is used by this
-      // function.
-      MRI->setPhysRegUsed(VirtReg);
-      continue;
-    }
-
-    // We want to process implicit virtual register uses first.
-    if (MO.isImplicit())
-      // If the virtual register is implicitly defined, emit a implicit_def
-      // before so scavenger knows it's "defined".
-      // FIXME: This is a horrible hack done the by register allocator to
-      // remat a definition with virtual register operand.
-      VirtUseOps.insert(VirtUseOps.begin(), i);
-    else
-      VirtUseOps.push_back(i);
-
-    // A partial def causes problems because the same operand both reads and
-    // writes the register. This rewriter is designed to rewrite uses and defs
-    // separately, so a partial def would already have been rewritten to a
-    // physreg by the time we get to processing defs.
-    // Add an implicit use operand to model the partial def.
-    if (MO.isDef() && MO.getSubReg() && MI.readsVirtualRegister(VirtReg) &&
-        MI.findRegisterUseOperandIdx(VirtReg) == -1) {
-      VirtUseOps.insert(VirtUseOps.begin(), MI.getNumOperands());
-      MI.addOperand(MachineOperand::CreateReg(VirtReg,
-                                              false,  // isDef
-                                              true)); // isImplicit
-      DEBUG(dbgs() << "Partial redef: " << MI);
-    }
-  }
-
-  // Process all of the spilled uses and all non spilled reg references.
-  SmallVector<int, 2> PotentialDeadStoreSlots;
-  KilledMIRegs.clear();
-  for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) {
-    unsigned i = VirtUseOps[j];
-    unsigned VirtReg = MI.getOperand(i).getReg();
-    assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
-           "Not a virtual register?");
-
-    unsigned SubIdx = MI.getOperand(i).getSubReg();
-    if (VRM->isAssignedReg(VirtReg)) {
-      // This virtual register was assigned a physreg!
-      unsigned Phys = VRM->getPhys(VirtReg);
-      MRI->setPhysRegUsed(Phys);
-      if (MI.getOperand(i).isDef())
-        ReusedOperands.markClobbered(Phys);
-      substitutePhysReg(MI.getOperand(i), Phys, *TRI);
-      if (VRM->isImplicitlyDefined(VirtReg))
-        // FIXME: Is this needed?
-        BuildMI(*MBB, &MI, MI.getDebugLoc(),
-                TII->get(TargetOpcode::IMPLICIT_DEF), Phys);
-      continue;
-    }
-
-    // This virtual register is now known to be a spilled value.
-    if (!MI.getOperand(i).isUse())
-      continue;  // Handle defs in the loop below (handle use&def here though)
-
-    bool AvoidReload = MI.getOperand(i).isUndef();
-    // Check if it is defined by an implicit def. It should not be spilled.
-    // Note, this is for correctness reason. e.g.
-    // 8   %reg1024<def> = IMPLICIT_DEF
-    // 12  %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
-    // The live range [12, 14) are not part of the r1024 live interval since
-    // it's defined by an implicit def. It will not conflicts with live
-    // interval of r1025. Now suppose both registers are spilled, you can
-    // easily see a situation where both registers are reloaded before
-    // the INSERT_SUBREG and both target registers that would overlap.
-    bool DoReMat = VRM->isReMaterialized(VirtReg);
-    int SSorRMId = DoReMat
-      ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg);
-    int ReuseSlot = SSorRMId;
-
-    // Check to see if this stack slot is available.
-    unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
-
-    // If this is a sub-register use, make sure the reuse register is in the
-    // right register class. For example, for x86 not all of the 32-bit
-    // registers have accessible sub-registers.
-    // Similarly so for EXTRACT_SUBREG. Consider this:
-    // EDI = op
-    // MOV32_mr fi#1, EDI
-    // ...
-    //       = EXTRACT_SUBREG fi#1
-    // fi#1 is available in EDI, but it cannot be reused because it's not in
-    // the right register file.
-    if (PhysReg && !AvoidReload && SubIdx) {
-      const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
-      if (!RC->contains(PhysReg))
-        PhysReg = 0;
-    }
-
-    if (PhysReg && !AvoidReload) {
-      // This spilled operand might be part of a two-address operand.  If this
-      // is the case, then changing it will necessarily require changing the
-      // def part of the instruction as well.  However, in some cases, we
-      // aren't allowed to modify the reused register.  If none of these cases
-      // apply, reuse it.
-      bool CanReuse = true;
-      bool isTied = MI.isRegTiedToDefOperand(i);
-      if (isTied) {
-        // Okay, we have a two address operand.  We can reuse this physreg as
-        // long as we are allowed to clobber the value and there isn't an
-        // earlier def that has already clobbered the physreg.
-        CanReuse = !ReusedOperands.isClobbered(PhysReg) &&
-          Spills.canClobberPhysReg(PhysReg);
-      }
-      // If this is an asm, and a PhysReg alias is used elsewhere as an
-      // earlyclobber operand, we can't also use it as an input.
-      if (MI.isInlineAsm()) {
-        for (unsigned k = 0, e = MI.getNumOperands(); k != e; ++k) {
-          MachineOperand &MOk = MI.getOperand(k);
-          if (MOk.isReg() && MOk.isEarlyClobber() &&
-              TRI->regsOverlap(MOk.getReg(), PhysReg)) {
-            CanReuse = false;
-            DEBUG(dbgs() << "Not reusing physreg " << TRI->getName(PhysReg)
-                         << " for " << PrintReg(VirtReg) << ": " << MOk
-                         << '\n');
-            break;
-          }
-        }
-      }
-
-      if (CanReuse) {
-        // If this stack slot value is already available, reuse it!
-        if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
-          DEBUG(dbgs() << "Reusing RM#"
-                << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
-        else
-          DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
-        DEBUG(dbgs() << " from physreg "
-              << TRI->getName(PhysReg) << " for " << PrintReg(VirtReg)
-              << " instead of reloading into "
-              << PrintReg(VRM->getPhys(VirtReg), TRI) << '\n');
-        unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
-        MI.getOperand(i).setReg(RReg);
-        MI.getOperand(i).setSubReg(0);
-
-        // Reusing a physreg may resurrect it. But we expect ProcessUses to
-        // update the kill flags for the current instr after processing it.
-
-        // The only technical detail we have is that we don't know that
-        // PhysReg won't be clobbered by a reloaded stack slot that occurs
-        // later in the instruction.  In particular, consider 'op V1, V2'.
-        // If V1 is available in physreg R0, we would choose to reuse it
-        // here, instead of reloading it into the register the allocator
-        // indicated (say R1).  However, V2 might have to be reloaded
-        // later, and it might indicate that it needs to live in R0.  When
-        // this occurs, we need to have information available that
-        // indicates it is safe to use R1 for the reload instead of R0.
-        //
-        // To further complicate matters, we might conflict with an alias,
-        // or R0 and R1 might not be compatible with each other.  In this
-        // case, we actually insert a reload for V1 in R1, ensuring that
-        // we can get at R0 or its alias.
-        ReusedOperands.addReuse(i, ReuseSlot, PhysReg,
-                                VRM->getPhys(VirtReg), VirtReg);
-        if (isTied)
-          // Only mark it clobbered if this is a use&def operand.
-          ReusedOperands.markClobbered(PhysReg);
-        ++NumReused;
-
-        if (MI.getOperand(i).isKill() &&
-            ReuseSlot <= VirtRegMap::MAX_STACK_SLOT) {
-
-          // The store of this spilled value is potentially dead, but we
-          // won't know for certain until we've confirmed that the re-use
-          // above is valid, which means waiting until the other operands
-          // are processed. For now we just track the spill slot, we'll
-          // remove it after the other operands are processed if valid.
-
-          PotentialDeadStoreSlots.push_back(ReuseSlot);
-        }
-
-        // Mark is isKill if it's there no other uses of the same virtual
-        // register and it's not a two-address operand. IsKill will be
-        // unset if reg is reused.
-        if (!isTied && KilledMIRegs.count(VirtReg) == 0) {
-          MI.getOperand(i).setIsKill();
-          KilledMIRegs.insert(VirtReg);
-        }
-        continue;
-      }  // CanReuse
-
-      // Otherwise we have a situation where we have a two-address instruction
-      // whose mod/ref operand needs to be reloaded.  This reload is already
-      // available in some register "PhysReg", but if we used PhysReg as the
-      // operand to our 2-addr instruction, the instruction would modify
-      // PhysReg.  This isn't cool if something later uses PhysReg and expects
-      // to get its initial value.
-      //
-      // To avoid this problem, and to avoid doing a load right after a store,
-      // we emit a copy from PhysReg into the designated register for this
-      // operand.
-      //
-      // This case also applies to an earlyclobber'd PhysReg.
-      unsigned DesignatedReg = VRM->getPhys(VirtReg);
-      assert(DesignatedReg && "Must map virtreg to physreg!");
-
-      // Note that, if we reused a register for a previous operand, the
-      // register we want to reload into might not actually be
-      // available.  If this occurs, use the register indicated by the
-      // reuser.
-      if (ReusedOperands.hasReuses())
-        DesignatedReg = ReusedOperands.
-          GetRegForReload(VirtReg, DesignatedReg, &MI, Spills,
-                          MaybeDeadStores, RegKills, KillOps, *VRM);
-
-      // If the mapped designated register is actually the physreg we have
-      // incoming, we don't need to inserted a dead copy.
-      if (DesignatedReg == PhysReg) {
-        // If this stack slot value is already available, reuse it!
-        if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
-          DEBUG(dbgs() << "Reusing RM#"
-                << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
-        else
-          DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
-        DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg)
-              << " for " << PrintReg(VirtReg)
-              << " instead of reloading into same physreg.\n");
-        unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
-        MI.getOperand(i).setReg(RReg);
-        MI.getOperand(i).setSubReg(0);
-        ReusedOperands.markClobbered(RReg);
-        ++NumReused;
-        continue;
-      }
-
-      MRI->setPhysRegUsed(DesignatedReg);
-      ReusedOperands.markClobbered(DesignatedReg);
-
-      // Back-schedule reloads and remats.
-      MachineBasicBlock::iterator InsertLoc =
-        ComputeReloadLoc(&MI, MBB->begin(), PhysReg, TRI, DoReMat,
-                         SSorRMId, TII, *MBB->getParent());
-      MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI.getDebugLoc(),
-                                     TII->get(TargetOpcode::COPY),
-                                     DesignatedReg).addReg(PhysReg);
-      CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
-      UpdateKills(*CopyMI, TRI, RegKills, KillOps);
-
-      // This invalidates DesignatedReg.
-      Spills.ClobberPhysReg(DesignatedReg);
-
-      Spills.addAvailable(ReuseSlot, DesignatedReg);
-      unsigned RReg =
-        SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg;
-      MI.getOperand(i).setReg(RReg);
-      MI.getOperand(i).setSubReg(0);
-      DEBUG(dbgs() << '\t' << *prior(InsertLoc));
-      ++NumReused;
-      continue;
-    } // if (PhysReg)
-
-    // Otherwise, reload it and remember that we have it.
-    PhysReg = VRM->getPhys(VirtReg);
-    assert(PhysReg && "Must map virtreg to physreg!");
-
-    // Note that, if we reused a register for a previous operand, the
-    // register we want to reload into might not actually be
-    // available.  If this occurs, use the register indicated by the
-    // reuser.
-    if (ReusedOperands.hasReuses())
-      PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI,
-                  Spills, MaybeDeadStores, RegKills, KillOps, *VRM);
-
-    MRI->setPhysRegUsed(PhysReg);
-    ReusedOperands.markClobbered(PhysReg);
-    if (AvoidReload)
-      ++NumAvoided;
-    else {
-      // Back-schedule reloads and remats.
-      MachineBasicBlock::iterator InsertLoc =
-        ComputeReloadLoc(MI, MBB->begin(), PhysReg, TRI, DoReMat,
-                         SSorRMId, TII, *MBB->getParent());
-
-      if (DoReMat) {
-        ReMaterialize(*MBB, InsertLoc, PhysReg, VirtReg, TII, TRI, *VRM);
-      } else {
-        const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
-        TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SSorRMId, RC,TRI);
-        MachineInstr *LoadMI = prior(InsertLoc);
-        VRM->addSpillSlotUse(SSorRMId, LoadMI);
-        ++NumLoads;
-        DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
-      }
-      // This invalidates PhysReg.
-      Spills.ClobberPhysReg(PhysReg);
-
-      // Any stores to this stack slot are not dead anymore.
-      if (!DoReMat)
-        MaybeDeadStores[SSorRMId] = NULL;
-      Spills.addAvailable(SSorRMId, PhysReg);
-      // Assumes this is the last use. IsKill will be unset if reg is reused
-      // unless it's a two-address operand.
-      if (!MI.isRegTiedToDefOperand(i) &&
-          KilledMIRegs.count(VirtReg) == 0) {
-        MI.getOperand(i).setIsKill();
-        KilledMIRegs.insert(VirtReg);
-      }
-
-      UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
-      DEBUG(dbgs() << '\t' << *prior(InsertLoc));
-    }
-    unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
-    MI.getOperand(i).setReg(RReg);
-    MI.getOperand(i).setSubReg(0);
-  }
-
-  // Ok - now we can remove stores that have been confirmed dead.
-  for (unsigned j = 0, e = PotentialDeadStoreSlots.size(); j != e; ++j) {
-    // This was the last use and the spilled value is still available
-    // for reuse. That means the spill was unnecessary!
-    int PDSSlot = PotentialDeadStoreSlots[j];
-    MachineInstr* DeadStore = MaybeDeadStores[PDSSlot];
-    if (DeadStore) {
-      DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
-      InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
-      EraseInstr(DeadStore);
-      MaybeDeadStores[PDSSlot] = NULL;
-      ++NumDSE;
-    }
-  }
-}
-
-/// rewriteMBB - Keep track of which spills are available even after the
-/// register allocator is done with them.  If possible, avoid reloading vregs.
-void
-LocalRewriter::RewriteMBB(LiveIntervals *LIs,
-                          AvailableSpills &Spills, BitVector &RegKills,
-                          std::vector<MachineOperand*> &KillOps) {
-
-  DEBUG(dbgs() << "\n**** Local spiller rewriting MBB '"
-               << MBB->getName() << "':\n");
-
-  MachineFunction &MF = *MBB->getParent();
-
-  // MaybeDeadStores - When we need to write a value back into a stack slot,
-  // keep track of the inserted store.  If the stack slot value is never read
-  // (because the value was used from some available register, for example), and
-  // subsequently stored to, the original store is dead.  This map keeps track
-  // of inserted stores that are not used.  If we see a subsequent store to the
-  // same stack slot, the original store is deleted.
-  std::vector<MachineInstr*> MaybeDeadStores;
-  MaybeDeadStores.resize(MF.getFrameInfo()->getObjectIndexEnd(), NULL);
-
-  // ReMatDefs - These are rematerializable def MIs which are not deleted.
-  SmallSet<MachineInstr*, 4> ReMatDefs;
-
-  // Keep track of the registers we have already spilled in case there are
-  // multiple defs of the same register in MI.
-  SmallSet<unsigned, 8> SpilledMIRegs;
-
-  RegKills.reset();
-  KillOps.clear();
-  KillOps.resize(TRI->getNumRegs(), NULL);
-
-  DistanceMap.clear();
-  for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
-       MII != E; ) {
-    MachineBasicBlock::iterator NextMII = llvm::next(MII);
-
-    if (OptimizeByUnfold(MII, MaybeDeadStores, Spills, RegKills, KillOps))
-      NextMII = llvm::next(MII);
-
-    if (InsertEmergencySpills(MII))
-      NextMII = llvm::next(MII);
-
-    InsertRestores(MII, Spills, RegKills, KillOps);
-
-    if (InsertSpills(MII))
-      NextMII = llvm::next(MII);
-
-    bool Erased = false;
-    bool BackTracked = false;
-    MachineInstr &MI = *MII;
-
-    // Remember DbgValue's which reference stack slots.
-    if (MI.isDebugValue() && MI.getOperand(0).isFI())
-      Slot2DbgValues[MI.getOperand(0).getIndex()].push_back(&MI);
-
-    /// ReusedOperands - Keep track of operand reuse in case we need to undo
-    /// reuse.
-    ReuseInfo ReusedOperands(MI, TRI);
-
-    ProcessUses(MI, Spills, MaybeDeadStores, RegKills, ReusedOperands, KillOps);
-
-    DEBUG(dbgs() << '\t' << MI);
-
-
-    // If we have folded references to memory operands, make sure we clear all
-    // physical registers that may contain the value of the spilled virtual
-    // register
-
-    // Copy the folded virts to a small vector, we may change MI2VirtMap.
-    SmallVector<std::pair<unsigned, VirtRegMap::ModRef>, 4> FoldedVirts;
-    // C++0x FTW!
-    for (std::pair<VirtRegMap::MI2VirtMapTy::const_iterator,
-                   VirtRegMap::MI2VirtMapTy::const_iterator> FVRange =
-           VRM->getFoldedVirts(&MI);
-         FVRange.first != FVRange.second; ++FVRange.first)
-      FoldedVirts.push_back(FVRange.first->second);
-
-    SmallSet<int, 2> FoldedSS;
-    for (unsigned FVI = 0, FVE = FoldedVirts.size(); FVI != FVE; ++FVI) {
-      unsigned VirtReg = FoldedVirts[FVI].first;
-      VirtRegMap::ModRef MR = FoldedVirts[FVI].second;
-      DEBUG(dbgs() << "Folded " << PrintReg(VirtReg) << "  MR: " << MR);
-
-      int SS = VRM->getStackSlot(VirtReg);
-      if (SS == VirtRegMap::NO_STACK_SLOT)
-        continue;
-      FoldedSS.insert(SS);
-      DEBUG(dbgs() << " - StackSlot: " << SS << "\n");
-
-      // If this folded instruction is just a use, check to see if it's a
-      // straight load from the virt reg slot.
-      if ((MR & VirtRegMap::isRef) && !(MR & VirtRegMap::isMod)) {
-        int FrameIdx;
-        unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx);
-        if (DestReg && FrameIdx == SS) {
-          // If this spill slot is available, turn it into a copy (or nothing)
-          // instead of leaving it as a load!
-          if (unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SS)) {
-            DEBUG(dbgs() << "Promoted Load To Copy: " << MI);
-            if (DestReg != InReg) {
-              MachineOperand *DefMO = MI.findRegisterDefOperand(DestReg);
-              MachineInstr *CopyMI = BuildMI(*MBB, &MI, MI.getDebugLoc(),
-                                             TII->get(TargetOpcode::COPY))
-                .addReg(DestReg, RegState::Define, DefMO->getSubReg())
-                .addReg(InReg, RegState::Kill);
-              // Revisit the copy so we make sure to notice the effects of the
-              // operation on the destreg (either needing to RA it if it's
-              // virtual or needing to clobber any values if it's physical).
-              NextMII = CopyMI;
-              NextMII->setAsmPrinterFlag(MachineInstr::ReloadReuse);
-              BackTracked = true;
-            } else {
-              DEBUG(dbgs() << "Removing now-noop copy: " << MI);
-              // InvalidateKills resurrects any prior kill of the copy's source
-              // allowing the source reg to be reused in place of the copy.
-              Spills.disallowClobberPhysReg(InReg);
-            }
-
-            InvalidateKills(MI, TRI, RegKills, KillOps);
-            EraseInstr(&MI);
-            Erased = true;
-            goto ProcessNextInst;
-          }
-        } else {
-          unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
-          SmallVector<MachineInstr*, 4> NewMIs;
-          if (PhysReg &&
-              TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, false, NewMIs)){
-            MBB->insert(MII, NewMIs[0]);
-            InvalidateKills(MI, TRI, RegKills, KillOps);
-            EraseInstr(&MI);
-            Erased = true;
-            --NextMII;  // backtrack to the unfolded instruction.
-            BackTracked = true;
-            goto ProcessNextInst;
-          }
-        }
-      }
-
-      // If this reference is not a use, any previous store is now dead.
-      // Otherwise, the store to this stack slot is not dead anymore.
-      MachineInstr* DeadStore = MaybeDeadStores[SS];
-      if (DeadStore) {
-        bool isDead = !(MR & VirtRegMap::isRef);
-        MachineInstr *NewStore = NULL;
-        if (MR & VirtRegMap::isModRef) {
-          unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
-          SmallVector<MachineInstr*, 4> NewMIs;
-          // We can reuse this physreg as long as we are allowed to clobber
-          // the value and there isn't an earlier def that has already clobbered
-          // the physreg.
-          if (PhysReg &&
-              !ReusedOperands.isClobbered(PhysReg) &&
-              Spills.canClobberPhysReg(PhysReg) &&
-              !TII->isStoreToStackSlot(&MI, SS)) { // Not profitable!
-            MachineOperand *KillOpnd =
-              DeadStore->findRegisterUseOperand(PhysReg, true);
-            // Note, if the store is storing a sub-register, it's possible the
-            // super-register is needed below.
-            if (KillOpnd && !KillOpnd->getSubReg() &&
-                TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, true,NewMIs)){
-              MBB->insert(MII, NewMIs[0]);
-              NewStore = NewMIs[1];
-              MBB->insert(MII, NewStore);
-              VRM->addSpillSlotUse(SS, NewStore);
-              InvalidateKills(MI, TRI, RegKills, KillOps);
-              EraseInstr(&MI);
-              Erased = true;
-              --NextMII;
-              --NextMII;  // backtrack to the unfolded instruction.
-              BackTracked = true;
-              isDead = true;
-              ++NumSUnfold;
-            }
-          }
-        }
-
-        if (isDead) {  // Previous store is dead.
-          // If we get here, the store is dead, nuke it now.
-          DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
-          InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
-          EraseInstr(DeadStore);
-          if (!NewStore)
-            ++NumDSE;
-        }
-
-        MaybeDeadStores[SS] = NULL;
-        if (NewStore) {
-          // Treat this store as a spill merged into a copy. That makes the
-          // stack slot value available.
-          VRM->virtFolded(VirtReg, NewStore, VirtRegMap::isMod);
-          goto ProcessNextInst;
-        }
-      }
-
-      // If the spill slot value is available, and this is a new definition of
-      // the value, the value is not available anymore.
-      if (MR & VirtRegMap::isMod) {
-        // Notice that the value in this stack slot has been modified.
-        Spills.ModifyStackSlotOrReMat(SS);
-
-        // If this is *just* a mod of the value, check to see if this is just a
-        // store to the spill slot (i.e. the spill got merged into the copy). If
-        // so, realize that the vreg is available now, and add the store to the
-        // MaybeDeadStore info.
-        int StackSlot;
-        if (!(MR & VirtRegMap::isRef)) {
-          if (unsigned SrcReg = TII->isStoreToStackSlot(&MI, StackSlot)) {
-            assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
-                   "Src hasn't been allocated yet?");
-
-            if (CommuteToFoldReload(MII, VirtReg, SrcReg, StackSlot,
-                                    Spills, RegKills, KillOps, TRI)) {
-              NextMII = llvm::next(MII);
-              BackTracked = true;
-              goto ProcessNextInst;
-            }
-
-            // Okay, this is certainly a store of SrcReg to [StackSlot].  Mark
-            // this as a potentially dead store in case there is a subsequent
-            // store into the stack slot without a read from it.
-            MaybeDeadStores[StackSlot] = &MI;
-
-            // If the stack slot value was previously available in some other
-            // register, change it now.  Otherwise, make the register
-            // available in PhysReg.
-            Spills.addAvailable(StackSlot, SrcReg, MI.killsRegister(SrcReg));
-          }
-        }
-      }
-    }
-
-    // Process all of the spilled defs.
-    SpilledMIRegs.clear();
-    for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = MI.getOperand(i);
-      if (!(MO.isReg() && MO.getReg() && MO.isDef()))
-        continue;
-
-      unsigned VirtReg = MO.getReg();
-      if (!TargetRegisterInfo::isVirtualRegister(VirtReg)) {
-        // Check to see if this is a noop copy.  If so, eliminate the
-        // instruction before considering the dest reg to be changed.
-        // Also check if it's copying from an "undef", if so, we can't
-        // eliminate this or else the undef marker is lost and it will
-        // confuses the scavenger. This is extremely rare.
-        if (MI.isIdentityCopy() && !MI.getOperand(1).isUndef() &&
-            MI.getNumOperands() == 2) {
-          ++NumDCE;
-          DEBUG(dbgs() << "Removing now-noop copy: " << MI);
-          SmallVector<unsigned, 2> KillRegs;
-          InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs);
-          if (MO.isDead() && !KillRegs.empty()) {
-            // Source register or an implicit super/sub-register use is killed.
-            assert(TRI->regsOverlap(KillRegs[0], MI.getOperand(0).getReg()));
-            // Last def is now dead.
-            TransferDeadness(MI.getOperand(1).getReg(), RegKills, KillOps);
-          }
-          EraseInstr(&MI);
-          Erased = true;
-          Spills.disallowClobberPhysReg(VirtReg);
-          goto ProcessNextInst;
-        }
-
-        // If it's not a no-op copy, it clobbers the value in the destreg.
-        Spills.ClobberPhysReg(VirtReg);
-        ReusedOperands.markClobbered(VirtReg);
-
-        // Check to see if this instruction is a load from a stack slot into
-        // a register.  If so, this provides the stack slot value in the reg.
-        int FrameIdx;
-        if (unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx)) {
-          assert(DestReg == VirtReg && "Unknown load situation!");
-
-          // If it is a folded reference, then it's not safe to clobber.
-          bool Folded = FoldedSS.count(FrameIdx);
-          // Otherwise, if it wasn't available, remember that it is now!
-          Spills.addAvailable(FrameIdx, DestReg, !Folded);
-          goto ProcessNextInst;
-        }
-
-        continue;
-      }
-
-      unsigned SubIdx = MO.getSubReg();
-      bool DoReMat = VRM->isReMaterialized(VirtReg);
-      if (DoReMat)
-        ReMatDefs.insert(&MI);
-
-      // The only vregs left are stack slot definitions.
-      int StackSlot = VRM->getStackSlot(VirtReg);
-      const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
-
-      // If this def is part of a two-address operand, make sure to execute
-      // the store from the correct physical register.
-      unsigned PhysReg;
-      unsigned TiedOp;
-      if (MI.isRegTiedToUseOperand(i, &TiedOp)) {
-        PhysReg = MI.getOperand(TiedOp).getReg();
-        if (SubIdx) {
-          unsigned SuperReg = findSuperReg(RC, PhysReg, SubIdx, TRI);
-          assert(SuperReg && TRI->getSubReg(SuperReg, SubIdx) == PhysReg &&
-                 "Can't find corresponding super-register!");
-          PhysReg = SuperReg;
-        }
-      } else {
-        PhysReg = VRM->getPhys(VirtReg);
-        if (ReusedOperands.isClobbered(PhysReg)) {
-          // Another def has taken the assigned physreg. It must have been a
-          // use&def which got it due to reuse. Undo the reuse!
-          PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI,
-                      Spills, MaybeDeadStores, RegKills, KillOps, *VRM);
-        }
-      }
-
-      // If StackSlot is available in a register that also holds other stack
-      // slots, clobber those stack slots now.
-      Spills.ClobberSharingStackSlots(StackSlot);
-
-      assert(PhysReg && "VR not assigned a physical register?");
-      MRI->setPhysRegUsed(PhysReg);
-      unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
-      ReusedOperands.markClobbered(RReg);
-      MI.getOperand(i).setReg(RReg);
-      MI.getOperand(i).setSubReg(0);
-
-      if (!MO.isDead() && SpilledMIRegs.insert(VirtReg)) {
-        MachineInstr *&LastStore = MaybeDeadStores[StackSlot];
-        SpillRegToStackSlot(MII, -1, PhysReg, StackSlot, RC, true,
-          LastStore, Spills, ReMatDefs, RegKills, KillOps);
-        NextMII = llvm::next(MII);
-
-        // Check to see if this is a noop copy.  If so, eliminate the
-        // instruction before considering the dest reg to be changed.
-        if (MI.isIdentityCopy()) {
-          ++NumDCE;
-          DEBUG(dbgs() << "Removing now-noop copy: " << MI);
-          InvalidateKills(MI, TRI, RegKills, KillOps);
-          EraseInstr(&MI);
-          Erased = true;
-          UpdateKills(*LastStore, TRI, RegKills, KillOps);
-          goto ProcessNextInst;
-        }
-      }
-    }
-    ProcessNextInst:
-    // Delete dead instructions without side effects.
-    if (!Erased && !BackTracked && isSafeToDelete(MI)) {
-      InvalidateKills(MI, TRI, RegKills, KillOps);
-      EraseInstr(&MI);
-      Erased = true;
-    }
-    if (!Erased)
-      DistanceMap.insert(std::make_pair(&MI, DistanceMap.size()));
-    if (!Erased && !BackTracked) {
-      for (MachineBasicBlock::iterator II = &MI; II != NextMII; ++II)
-        UpdateKills(*II, TRI, RegKills, KillOps);
-    }
-    MII = NextMII;
-  }
-
-}
-
-llvm::VirtRegRewriter* llvm::createVirtRegRewriter() {
-  switch (RewriterOpt) {
-  default: llvm_unreachable("Unreachable!");
-  case local:
-    return new LocalRewriter();
-  case trivial:
-    return new TrivialRewriter();
-  }
-}
diff --git a/lib/CodeGen/VirtRegRewriter.h b/lib/CodeGen/VirtRegRewriter.h
deleted file mode 100644
index 93474e0d7ff7..000000000000
--- a/lib/CodeGen/VirtRegRewriter.h
+++ /dev/null
@@ -1,32 +0,0 @@
-//===-- llvm/CodeGen/VirtRegRewriter.h - VirtRegRewriter -*- C++ -*--------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_VIRTREGREWRITER_H
-#define LLVM_CODEGEN_VIRTREGREWRITER_H
-
-namespace llvm {
-  class LiveIntervals;
-  class MachineFunction;
-  class VirtRegMap;
-  
-  /// VirtRegRewriter interface: Implementations of this interface assign
-  /// spilled virtual registers to stack slots, rewriting the code.
-  struct VirtRegRewriter {
-    virtual ~VirtRegRewriter();
-    virtual bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
-                                      LiveIntervals* LIs) = 0;
-  };
-
-  /// createVirtRegRewriter - Create an return a rewriter object, as specified
-  /// on the command line.
-  VirtRegRewriter* createVirtRegRewriter();
-
-}
-
-#endif