aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2018-06-27 19:14:09 +0000
committerDimitry Andric <dim@FreeBSD.org>2018-06-27 19:14:09 +0000
commiteb1edd4d5902fdc561fd68fa70400fbd11127998 (patch)
tree0b10ccde4b5d3acf243966db54f4f3afef10cf93
parent2ed8710148a921286717212737771dd31c518fb7 (diff)
downloadsrc-vendor/llvm-60.tar.gz
src-vendor/llvm-60.zip
Vendor import of llvm 6.0.1 release r335540:vendor/llvm/llvm-release_601-r335540vendor/llvm-60
Notes
Notes: svn path=/vendor/llvm/dist-release_60/; revision=335720 svn path=/vendor/llvm/llvm-release_601-r335540/; revision=335721; tag=vendor/llvm/llvm-release_601-r335540
-rw-r--r--CMakeLists.txt2
-rw-r--r--include/llvm/CodeGen/MachineBasicBlock.h7
-rw-r--r--include/llvm/CodeGen/TargetInstrInfo.h9
-rw-r--r--include/llvm/IR/IntrinsicsPowerPC.td6
-rw-r--r--lib/Analysis/GlobalsModRef.cpp2
-rw-r--r--lib/Analysis/MemorySSA.cpp29
-rw-r--r--lib/CodeGen/IfConversion.cpp39
-rw-r--r--lib/CodeGen/LiveDebugVariables.cpp41
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp8
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp17
-rw-r--r--lib/CodeGen/PeepholeOptimizer.cpp19
-rw-r--r--lib/CodeGen/TargetInstrInfo.cpp6
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp22
-rw-r--r--lib/IR/Core.cpp8
-rw-r--r--lib/MC/MCObjectFileInfo.cpp2
-rw-r--r--lib/Support/CMakeLists.txt9
-rw-r--r--lib/Support/Host.cpp2
-rw-r--r--lib/Target/AArch64/AArch64AsmPrinter.cpp5
-rw-r--r--lib/Target/AArch64/AArch64FalkorHWPFFix.cpp18
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.cpp12
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp35
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td2
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetMachine.cpp1
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp1
-rw-r--r--lib/Target/AMDGPU/SIInstructions.td4
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp14
-rw-r--r--lib/Target/ARM/ARMComputeBlockSize.cpp1
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp1
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp2
-rw-r--r--lib/Target/Mips/MicroMips32r6InstrInfo.td6
-rw-r--r--lib/Target/Mips/MicroMipsInstrInfo.td6
-rw-r--r--lib/Target/Mips/Mips.td4
-rw-r--r--lib/Target/Mips/Mips32r6InstrInfo.td39
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td34
-rw-r--r--lib/Target/Mips/Mips64r6InstrInfo.td30
-rw-r--r--lib/Target/Mips/MipsDSPInstrFormats.td2
-rw-r--r--lib/Target/Mips/MipsFastISel.cpp15
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp2
-rw-r--r--lib/Target/Mips/MipsInstrFormats.td4
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp21
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td55
-rw-r--r--lib/Target/Mips/MipsLongBranch.cpp19
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.cpp79
-rw-r--r--lib/Target/Mips/MipsSubtarget.cpp16
-rw-r--r--lib/Target/Mips/MipsSubtarget.h7
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp10
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp6
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp7
-rw-r--r--lib/Target/X86/CMakeLists.txt1
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp7
-rw-r--r--lib/Target/X86/X86.h3
-rw-r--r--lib/Target/X86/X86DomainReassignment.cpp65
-rw-r--r--lib/Target/X86/X86FastISel.cpp21
-rw-r--r--lib/Target/X86/X86FlagsCopyLowering.cpp935
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp26
-rw-r--r--lib/Target/X86/X86ISelLowering.h3
-rw-r--r--lib/Target/X86/X86InstrArithmetic.td34
-rw-r--r--lib/Target/X86/X86InstrCompiler.td8
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp121
-rw-r--r--lib/Target/X86/X86InstrInfo.h6
-rw-r--r--lib/Target/X86/X86InstrInfo.td75
-rw-r--r--lib/Target/X86/X86InstrSystem.td13
-rw-r--r--lib/Target/X86/X86RegisterInfo.td16
-rw-r--r--lib/Target/X86/X86Schedule.td4
-rw-r--r--lib/Target/X86/X86ScheduleAtom.td4
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp3
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp10
-rw-r--r--lib/Transforms/IPO/DeadArgumentElimination.cpp40
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp25
-rw-r--r--lib/Transforms/IPO/MergeFunctions.cpp84
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp6
-rw-r--r--lib/Transforms/Scalar/CallSiteSplitting.cpp77
-rw-r--r--lib/Transforms/Scalar/DivRemPairs.cpp11
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp17
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp57
-rw-r--r--lib/Transforms/Utils/FunctionComparator.cpp2
-rw-r--r--test/Analysis/MemorySSA/pr36883.ll38
-rw-r--r--test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll22
-rw-r--r--test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll70
-rw-r--r--test/CodeGen/AArch64/falkor-hwpf-fix.mir25
-rw-r--r--test/CodeGen/AArch64/inlineasm-S-constraint.ll20
-rw-r--r--test/CodeGen/AArch64/spill-stack-realignment.mir35
-rw-r--r--test/CodeGen/AMDGPU/ctpop16.ll334
-rw-r--r--test/CodeGen/ARM/peephole-phi.mir36
-rw-r--r--test/CodeGen/Hexagon/ifcvt-diamond-ret.mir25
-rw-r--r--test/CodeGen/MIR/PowerPC/ifcvt-diamond-ret.mir34
-rw-r--r--test/CodeGen/Mips/const-mult.ll645
-rw-r--r--test/CodeGen/Mips/indirect-jump-hazard/calls.ll188
-rw-r--r--test/CodeGen/Mips/indirect-jump-hazard/guards-verify-call.mir58
-rw-r--r--test/CodeGen/Mips/indirect-jump-hazard/guards-verify-tailcall.mir59
-rw-r--r--test/CodeGen/Mips/indirect-jump-hazard/jumptables.ll649
-rw-r--r--test/CodeGen/Mips/indirect-jump-hazard/long-branch.ll138
-rw-r--r--test/CodeGen/Mips/indirect-jump-hazard/long-calls.ll113
-rw-r--r--test/CodeGen/Mips/indirect-jump-hazard/unsupported-micromips.ll5
-rw-r--r--test/CodeGen/Mips/indirect-jump-hazard/unsupported-mips32.ll5
-rw-r--r--test/CodeGen/Mips/inlineasm-cnstrnt-bad-l1.ll13
-rw-r--r--test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll10
-rw-r--r--test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir198
-rw-r--r--test/CodeGen/PowerPC/no-dup-of-bdnz.ll75
-rw-r--r--test/CodeGen/PowerPC/pr35402.ll18
-rw-r--r--test/CodeGen/Thumb/PR36658.mir359
-rw-r--r--test/CodeGen/X86/GlobalISel/add-scalar.ll14
-rw-r--r--test/CodeGen/X86/O0-pipeline.ll2
-rw-r--r--test/CodeGen/X86/clobber-fi0.ll37
-rw-r--r--test/CodeGen/X86/cmpxchg-clobber-flags.ll363
-rw-r--r--test/CodeGen/X86/copy-eflags.ll357
-rw-r--r--test/CodeGen/X86/domain-reassignment-implicit-def.ll24
-rw-r--r--test/CodeGen/X86/domain-reassignment-test.ll37
-rw-r--r--test/CodeGen/X86/eflags-copy-expansion.mir64
-rw-r--r--test/CodeGen/X86/fast-isel-shift.ll12
-rw-r--r--test/CodeGen/X86/flags-copy-lowering.mir555
-rw-r--r--test/CodeGen/X86/ipra-reg-usage.ll2
-rw-r--r--test/CodeGen/X86/mul-i1024.ll11163
-rw-r--r--test/CodeGen/X86/peephole-na-phys-copy-folding.ll109
-rw-r--r--test/CodeGen/X86/pr37264.ll12
-rw-r--r--test/CodeGen/X86/win64_frame.ll334
-rw-r--r--test/CodeGen/X86/x86-repmov-copy-eflags.ll11
-rw-r--r--test/DebugInfo/X86/dbg-value-inlined-parameter.ll6
-rw-r--r--test/DebugInfo/X86/live-debug-vars-discard-invalid.mir141
-rw-r--r--test/ExecutionEngine/RuntimeDyld/PowerPC/Inputs/ppc64_elf_module_b.s42
-rw-r--r--test/ExecutionEngine/RuntimeDyld/PowerPC/ppc64_elf.s47
-rw-r--r--test/MC/ELF/cfi-large-model.s68
-rw-r--r--test/MC/Mips/unsupported-relocation.s13
-rw-r--r--test/Transforms/ArgumentPromotion/musttail.ll45
-rw-r--r--test/Transforms/CallSiteSplitting/musttail.ll109
-rw-r--r--test/Transforms/DeadArgElim/musttail-caller.ll16
-rw-r--r--test/Transforms/GlobalOpt/musttail_cc.ll34
-rw-r--r--test/Transforms/IPConstantProp/musttail-call.ll58
-rw-r--r--test/Transforms/InstCombine/gep-addrspace.ll19
-rw-r--r--test/Transforms/JumpThreading/header-succ.ll99
-rw-r--r--test/Transforms/MergeFunc/inline-asm.ll53
-rw-r--r--test/Transforms/MergeFunc/weak-small.ll16
-rw-r--r--tools/llvm-config/CMakeLists.txt20
133 files changed, 12793 insertions, 6621 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2bf2c21a306f..f8da6cf92119 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -24,7 +24,7 @@ if(NOT DEFINED LLVM_VERSION_MINOR)
set(LLVM_VERSION_MINOR 0)
endif()
if(NOT DEFINED LLVM_VERSION_PATCH)
- set(LLVM_VERSION_PATCH 0)
+ set(LLVM_VERSION_PATCH 1)
endif()
if(NOT DEFINED LLVM_VERSION_SUFFIX)
set(LLVM_VERSION_SUFFIX "")
diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h
index 0c9110cbaa87..89210e16629e 100644
--- a/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/include/llvm/CodeGen/MachineBasicBlock.h
@@ -449,6 +449,13 @@ public:
/// Replace successor OLD with NEW and update probability info.
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New);
+ /// Copy a successor (and any probability info) from original block to this
+ /// block's. Uses an iterator into the original blocks successors.
+ ///
+ /// This is useful when doing a partial clone of successors. Afterward, the
+ /// probabilities may need to be normalized.
+ void copySuccessor(MachineBasicBlock *Orig, succ_iterator I);
+
/// Transfers all the successors from MBB to this machine basic block (i.e.,
/// copies all the successors FromMBB and remove all the successors from
/// FromMBB).
diff --git a/include/llvm/CodeGen/TargetInstrInfo.h b/include/llvm/CodeGen/TargetInstrInfo.h
index 6b5404be35d3..57dee3bb44b3 100644
--- a/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/include/llvm/CodeGen/TargetInstrInfo.h
@@ -421,7 +421,8 @@ public:
/// Build the equivalent inputs of a REG_SEQUENCE for the given \p MI
/// and \p DefIdx.
/// \p [out] InputRegs of the equivalent REG_SEQUENCE. Each element of
- /// the list is modeled as <Reg:SubReg, SubIdx>.
+ /// the list is modeled as <Reg:SubReg, SubIdx>. Operands with the undef
+ /// flag are not added to this list.
/// E.g., REG_SEQUENCE %1:sub1, sub0, %2, sub1 would produce
/// two elements:
/// - %1:sub1, sub0
@@ -446,7 +447,8 @@ public:
/// - %1:sub1, sub0
///
/// \returns true if it is possible to build such an input sequence
- /// with the pair \p MI, \p DefIdx. False otherwise.
+ /// with the pair \p MI, \p DefIdx and the operand has no undef flag set.
+ /// False otherwise.
///
/// \pre MI.isExtractSubreg() or MI.isExtractSubregLike().
///
@@ -465,7 +467,8 @@ public:
/// - InsertedReg: %1:sub1, sub3
///
/// \returns true if it is possible to build such an input sequence
- /// with the pair \p MI, \p DefIdx. False otherwise.
+ /// with the pair \p MI, \p DefIdx and the operand has no undef flag set.
+ /// False otherwise.
///
/// \pre MI.isInsertSubreg() or MI.isInsertSubregLike().
///
diff --git a/include/llvm/IR/IntrinsicsPowerPC.td b/include/llvm/IR/IntrinsicsPowerPC.td
index 6321bb81b8cb..a302d5726aa3 100644
--- a/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/include/llvm/IR/IntrinsicsPowerPC.td
@@ -36,8 +36,12 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
// Intrinsics used to generate ctr-based loops. These should only be
// generated by the PowerPC backend!
+ // The branch intrinsic is marked as NoDuplicate because loop rotation will
+ // attempt to duplicate it forming loops where a block reachable from one
+ // instance of it can contain another.
def int_ppc_mtctr : Intrinsic<[], [llvm_anyint_ty], []>;
- def int_ppc_is_decremented_ctr_nonzero : Intrinsic<[llvm_i1_ty], [], []>;
+ def int_ppc_is_decremented_ctr_nonzero :
+ Intrinsic<[llvm_i1_ty], [], [IntrNoDuplicate]>;
// Intrinsics for [double]word extended forms of divide instructions
def int_ppc_divwe : GCCBuiltin<"__builtin_divwe">,
diff --git a/lib/Analysis/GlobalsModRef.cpp b/lib/Analysis/GlobalsModRef.cpp
index daee93267f56..94306d0f54ad 100644
--- a/lib/Analysis/GlobalsModRef.cpp
+++ b/lib/Analysis/GlobalsModRef.cpp
@@ -502,6 +502,8 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
}
FunctionInfo &FI = FunctionInfos[F];
+ Handles.emplace_front(*this, F);
+ Handles.front().I = Handles.begin();
bool KnowNothing = false;
// Collect the mod/ref properties due to called functions. We only compute
diff --git a/lib/Analysis/MemorySSA.cpp b/lib/Analysis/MemorySSA.cpp
index 6e9368c49d65..09605f61fa93 100644
--- a/lib/Analysis/MemorySSA.cpp
+++ b/lib/Analysis/MemorySSA.cpp
@@ -153,9 +153,14 @@ public:
if (IsCall != Other.IsCall)
return false;
- if (IsCall)
- return CS.getCalledValue() == Other.CS.getCalledValue();
- return Loc == Other.Loc;
+ if (!IsCall)
+ return Loc == Other.Loc;
+
+ if (CS.getCalledValue() != Other.CS.getCalledValue())
+ return false;
+
+ return CS.arg_size() == Other.CS.arg_size() &&
+ std::equal(CS.arg_begin(), CS.arg_end(), Other.CS.arg_begin());
}
private:
@@ -179,12 +184,18 @@ template <> struct DenseMapInfo<MemoryLocOrCall> {
}
static unsigned getHashValue(const MemoryLocOrCall &MLOC) {
- if (MLOC.IsCall)
- return hash_combine(MLOC.IsCall,
- DenseMapInfo<const Value *>::getHashValue(
- MLOC.getCS().getCalledValue()));
- return hash_combine(
- MLOC.IsCall, DenseMapInfo<MemoryLocation>::getHashValue(MLOC.getLoc()));
+ if (!MLOC.IsCall)
+ return hash_combine(
+ MLOC.IsCall,
+ DenseMapInfo<MemoryLocation>::getHashValue(MLOC.getLoc()));
+
+ hash_code hash =
+ hash_combine(MLOC.IsCall, DenseMapInfo<const Value *>::getHashValue(
+ MLOC.getCS().getCalledValue()));
+
+ for (const Value *Arg : MLOC.getCS().args())
+ hash = hash_combine(hash, DenseMapInfo<const Value *>::getHashValue(Arg));
+ return hash;
}
static bool isEqual(const MemoryLocOrCall &LHS, const MemoryLocOrCall &RHS) {
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index a22ce0dab9c2..d8ce90e63a9d 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -1714,20 +1714,25 @@ bool IfConverter::IfConvertDiamondCommon(
}
// Remove the duplicated instructions at the beginnings of both paths.
- // Skip dbg_value instructions
+ // Skip dbg_value instructions.
MachineBasicBlock::iterator DI1 = MBB1.getFirstNonDebugInstr();
MachineBasicBlock::iterator DI2 = MBB2.getFirstNonDebugInstr();
BBI1->NonPredSize -= NumDups1;
BBI2->NonPredSize -= NumDups1;
// Skip past the dups on each side separately since there may be
- // differing dbg_value entries.
+ // differing dbg_value entries. NumDups1 can include a "return"
+ // instruction, if it's not marked as "branch".
for (unsigned i = 0; i < NumDups1; ++DI1) {
+ if (DI1 == MBB1.end())
+ break;
if (!DI1->isDebugValue())
++i;
}
while (NumDups1 != 0) {
++DI2;
+ if (DI2 == MBB2.end())
+ break;
if (!DI2->isDebugValue())
--NumDups1;
}
@@ -1738,11 +1743,16 @@ bool IfConverter::IfConvertDiamondCommon(
Redefs.stepForward(MI, Dummy);
}
}
+
BBI.BB->splice(BBI.BB->end(), &MBB1, MBB1.begin(), DI1);
MBB2.erase(MBB2.begin(), DI2);
- // The branches have been checked to match, so it is safe to remove the branch
- // in BB1 and rely on the copy in BB2
+ // The branches have been checked to match, so it is safe to remove the
+ // branch in BB1 and rely on the copy in BB2. The complication is that
+ // the blocks may end with a return instruction, which may or may not
+ // be marked as "branch". If it's not, then it could be included in
+ // "dups1", leaving the blocks potentially empty after moving the common
+ // duplicates.
#ifndef NDEBUG
// Unanalyzable branches must match exactly. Check that now.
if (!BBI1->IsBrAnalyzable)
@@ -1768,11 +1778,14 @@ bool IfConverter::IfConvertDiamondCommon(
if (RemoveBranch)
BBI2->NonPredSize -= TII->removeBranch(*BBI2->BB);
else {
- do {
- assert(DI2 != MBB2.begin());
- DI2--;
- } while (DI2->isBranch() || DI2->isDebugValue());
- DI2++;
+ // Make DI2 point to the end of the range where the common "tail"
+ // instructions could be found.
+ while (DI2 != MBB2.begin()) {
+ MachineBasicBlock::iterator Prev = std::prev(DI2);
+ if (!Prev->isBranch() && !Prev->isDebugValue())
+ break;
+ DI2 = Prev;
+ }
}
while (NumDups2 != 0) {
// NumDups2 only counted non-dbg_value instructions, so this won't
@@ -1833,11 +1846,15 @@ bool IfConverter::IfConvertDiamondCommon(
// a non-predicated in BBI2, then we don't want to predicate the one from
// BBI2. The reason is that if we merged these blocks, we would end up with
// two predicated terminators in the same block.
+ // Also, if the branches in MBB1 and MBB2 were non-analyzable, then don't
+ // predicate them either. They were checked to be identical, and so the
+ // same branch would happen regardless of which path was taken.
if (!MBB2.empty() && (DI2 == MBB2.end())) {
MachineBasicBlock::iterator BBI1T = MBB1.getFirstTerminator();
MachineBasicBlock::iterator BBI2T = MBB2.getFirstTerminator();
- if (BBI1T != MBB1.end() && TII->isPredicated(*BBI1T) &&
- BBI2T != MBB2.end() && !TII->isPredicated(*BBI2T))
+ bool BB1Predicated = BBI1T != MBB1.end() && TII->isPredicated(*BBI1T);
+ bool BB2NonPredicated = BBI2T != MBB2.end() && !TII->isPredicated(*BBI2T);
+ if (BB2NonPredicated && (BB1Predicated || !BBI2->IsBrAnalyzable))
--DI2;
}
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index 75e3d35169cf..4ffcffcea693 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -514,6 +514,39 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {
return false;
}
+ // Detect invalid DBG_VALUE instructions, with a debug-use of a virtual
+ // register that hasn't been defined yet. If we do not remove those here, then
+ // the re-insertion of the DBG_VALUE instruction after register allocation
+ // will be incorrect.
+ // TODO: If earlier passes are corrected to generate sane debug information
+ // (and if the machine verifier is improved to catch this), then these checks
+ // could be removed or replaced by asserts.
+ bool Discard = false;
+ if (MI.getOperand(0).isReg() &&
+ TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg())) {
+ const unsigned Reg = MI.getOperand(0).getReg();
+ if (!LIS->hasInterval(Reg)) {
+ // The DBG_VALUE is described by a virtual register that does not have a
+ // live interval. Discard the DBG_VALUE.
+ Discard = true;
+ DEBUG(dbgs() << "Discarding debug info (no LIS interval): "
+ << Idx << " " << MI);
+ } else {
+ // The DBG_VALUE is only valid if either Reg is live out from Idx, or Reg
+ // is defined dead at Idx (where Idx is the slot index for the instruction
+ // preceeding the DBG_VALUE).
+ const LiveInterval &LI = LIS->getInterval(Reg);
+ LiveQueryResult LRQ = LI.Query(Idx);
+ if (!LRQ.valueOutOrDead()) {
+ // We have found a DBG_VALUE with the value in a virtual register that
+ // is not live. Discard the DBG_VALUE.
+ Discard = true;
+ DEBUG(dbgs() << "Discarding debug info (reg not live): "
+ << Idx << " " << MI);
+ }
+ }
+ }
+
// Get or create the UserValue for (variable,offset) here.
bool IsIndirect = MI.getOperand(1).isImm();
if (IsIndirect)
@@ -522,7 +555,13 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {
const DIExpression *Expr = MI.getDebugExpression();
UserValue *UV =
getUserValue(Var, Expr, MI.getDebugLoc());
- UV->addDef(Idx, MI.getOperand(0), IsIndirect);
+ if (!Discard)
+ UV->addDef(Idx, MI.getOperand(0), IsIndirect);
+ else {
+ MachineOperand MO = MachineOperand::CreateReg(0U, false);
+ MO.setIsDebug();
+ UV->addDef(Idx, MO, false);
+ }
return true;
}
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 209abf34d885..cd67449e3acf 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -646,6 +646,14 @@ void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old,
removeSuccessor(OldI);
}
+void MachineBasicBlock::copySuccessor(MachineBasicBlock *Orig,
+ succ_iterator I) {
+ if (Orig->Probs.empty())
+ addSuccessor(*I, Orig->getSuccProbability(I));
+ else
+ addSuccessorWithoutProb(*I);
+}
+
void MachineBasicBlock::addPredecessor(MachineBasicBlock *Pred) {
Predecessors.push_back(Pred);
}
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 84c808ee7938..167135b56ec0 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -513,6 +513,11 @@ public:
bool runOnMachineFunction(MachineFunction &F) override;
+ bool allowTailDupPlacement() const {
+ assert(F);
+ return TailDupPlacement && !F->getTarget().requiresStructuredCFG();
+ }
+
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineBranchProbabilityInfo>();
AU.addRequired<MachineBlockFrequencyInfo>();
@@ -1018,7 +1023,7 @@ MachineBlockPlacement::getBestTrellisSuccessor(
MachineBasicBlock *Succ1 = BestA.Dest;
MachineBasicBlock *Succ2 = BestB.Dest;
// Check to see if tail-duplication would be profitable.
- if (TailDupPlacement && shouldTailDuplicate(Succ2) &&
+ if (allowTailDupPlacement() && shouldTailDuplicate(Succ2) &&
canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) &&
isProfitableToTailDup(BB, Succ2, MBPI->getEdgeProbability(BB, Succ1),
Chain, BlockFilter)) {
@@ -1044,7 +1049,7 @@ MachineBlockPlacement::getBestTrellisSuccessor(
return Result;
}
-/// When the option TailDupPlacement is on, this method checks if the
+/// When the option allowTailDupPlacement() is on, this method checks if the
/// fallthrough candidate block \p Succ (of block \p BB) can be tail-duplicated
/// into all of its unplaced, unfiltered predecessors, that are not BB.
bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
@@ -1493,7 +1498,7 @@ MachineBlockPlacement::selectBestSuccessor(
if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,
Chain, BlockFilter)) {
// If tail duplication would make Succ profitable, place it.
- if (TailDupPlacement && shouldTailDuplicate(Succ))
+ if (allowTailDupPlacement() && shouldTailDuplicate(Succ))
DupCandidates.push_back(std::make_tuple(SuccProb, Succ));
continue;
}
@@ -1702,7 +1707,7 @@ void MachineBlockPlacement::buildChain(
auto Result = selectBestSuccessor(BB, Chain, BlockFilter);
MachineBasicBlock* BestSucc = Result.BB;
bool ShouldTailDup = Result.ShouldTailDup;
- if (TailDupPlacement)
+ if (allowTailDupPlacement())
ShouldTailDup |= (BestSucc && shouldTailDuplicate(BestSucc));
// If an immediate successor isn't available, look for the best viable
@@ -1724,7 +1729,7 @@ void MachineBlockPlacement::buildChain(
// Placement may have changed tail duplication opportunities.
// Check for that now.
- if (TailDupPlacement && BestSucc && ShouldTailDup) {
+ if (allowTailDupPlacement() && BestSucc && ShouldTailDup) {
// If the chosen successor was duplicated into all its predecessors,
// don't bother laying it out, just go round the loop again with BB as
// the chain end.
@@ -2758,7 +2763,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
TailDupSize = TailDupPlacementAggressiveThreshold;
}
- if (TailDupPlacement) {
+ if (allowTailDupPlacement()) {
MPDT = &getAnalysis<MachinePostDominatorTree>();
if (MF.getFunction().optForSize())
TailDupSize = 1;
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index 11acbe687a31..1090550243f8 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -1882,6 +1882,8 @@ ValueTrackerResult ValueTracker::getNextSourceFromCopy() {
return ValueTrackerResult();
// Otherwise, we want the whole source.
const MachineOperand &Src = Def->getOperand(1);
+ if (Src.isUndef())
+ return ValueTrackerResult();
return ValueTrackerResult(Src.getReg(), Src.getSubReg());
}
@@ -1925,6 +1927,8 @@ ValueTrackerResult ValueTracker::getNextSourceFromBitcast() {
}
const MachineOperand &Src = Def->getOperand(SrcIdx);
+ if (Src.isUndef())
+ return ValueTrackerResult();
return ValueTrackerResult(Src.getReg(), Src.getSubReg());
}
@@ -2093,6 +2097,10 @@ ValueTrackerResult ValueTracker::getNextSourceFromPHI() {
for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) {
auto &MO = Def->getOperand(i);
assert(MO.isReg() && "Invalid PHI instruction");
+ // We have no code to deal with undef operands. They shouldn't happen in
+ // normal programs anyway.
+ if (MO.isUndef())
+ return ValueTrackerResult();
Res.addSource(MO.getReg(), MO.getSubReg());
}
@@ -2149,9 +2157,14 @@ ValueTrackerResult ValueTracker::getNextSource() {
// If we can still move up in the use-def chain, move to the next
// definition.
if (!TargetRegisterInfo::isPhysicalRegister(Reg) && OneRegSrc) {
- Def = MRI.getVRegDef(Reg);
- DefIdx = MRI.def_begin(Reg).getOperandNo();
- DefSubReg = Res.getSrcSubReg(0);
+ MachineRegisterInfo::def_iterator DI = MRI.def_begin(Reg);
+ if (DI != MRI.def_end()) {
+ Def = DI->getParent();
+ DefIdx = DI.getOperandNo();
+ DefSubReg = Res.getSrcSubReg(0);
+ } else {
+ Def = nullptr;
+ }
return Res;
}
}
diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp
index db925f803db6..bd90ed5b55b8 100644
--- a/lib/CodeGen/TargetInstrInfo.cpp
+++ b/lib/CodeGen/TargetInstrInfo.cpp
@@ -1151,6 +1151,8 @@ bool TargetInstrInfo::getRegSequenceInputs(
for (unsigned OpIdx = 1, EndOpIdx = MI.getNumOperands(); OpIdx != EndOpIdx;
OpIdx += 2) {
const MachineOperand &MOReg = MI.getOperand(OpIdx);
+ if (MOReg.isUndef())
+ continue;
const MachineOperand &MOSubIdx = MI.getOperand(OpIdx + 1);
assert(MOSubIdx.isImm() &&
"One of the subindex of the reg_sequence is not an immediate");
@@ -1174,6 +1176,8 @@ bool TargetInstrInfo::getExtractSubregInputs(
// Def = EXTRACT_SUBREG v0.sub1, sub0.
assert(DefIdx == 0 && "EXTRACT_SUBREG only has one def");
const MachineOperand &MOReg = MI.getOperand(1);
+ if (MOReg.isUndef())
+ return false;
const MachineOperand &MOSubIdx = MI.getOperand(2);
assert(MOSubIdx.isImm() &&
"The subindex of the extract_subreg is not an immediate");
@@ -1198,6 +1202,8 @@ bool TargetInstrInfo::getInsertSubregInputs(
assert(DefIdx == 0 && "INSERT_SUBREG only has one def");
const MachineOperand &MOBaseReg = MI.getOperand(1);
const MachineOperand &MOInsertedReg = MI.getOperand(2);
+ if (MOInsertedReg.isUndef())
+ return false;
const MachineOperand &MOSubIdx = MI.getOperand(3);
assert(MOSubIdx.isImm() &&
"One of the subindex of the reg_sequence is not an immediate");
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index c0047d0cde6a..2c57eee191db 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -1422,7 +1422,8 @@ RuntimeDyldELF::processRelocationRef(
SectionEntry &Section = Sections[SectionID];
uint8_t *Target = Section.getAddressWithOffset(Offset);
bool RangeOverflow = false;
- if (!Value.SymbolName && SymType != SymbolRef::ST_Unknown) {
+ bool IsExtern = Value.SymbolName || SymType == SymbolRef::ST_Unknown;
+ if (!IsExtern) {
if (AbiVariant != 2) {
// In the ELFv1 ABI, a function call may point to the .opd entry,
// so the final symbol value is calculated based on the relocation
@@ -1432,21 +1433,24 @@ RuntimeDyldELF::processRelocationRef(
} else {
// In the ELFv2 ABI, a function symbol may provide a local entry
// point, which must be used for direct calls.
- uint8_t SymOther = Symbol->getOther();
- Value.Addend += ELF::decodePPC64LocalEntryOffset(SymOther);
+ if (Value.SectionID == SectionID){
+ uint8_t SymOther = Symbol->getOther();
+ Value.Addend += ELF::decodePPC64LocalEntryOffset(SymOther);
+ }
}
uint8_t *RelocTarget =
Sections[Value.SectionID].getAddressWithOffset(Value.Addend);
int64_t delta = static_cast<int64_t>(Target - RelocTarget);
// If it is within 26-bits branch range, just set the branch target
- if (SignExtend64<26>(delta) == delta) {
+ if (SignExtend64<26>(delta) != delta) {
+ RangeOverflow = true;
+ } else if ((AbiVariant != 2) ||
+ (AbiVariant == 2 && Value.SectionID == SectionID)) {
RelocationEntry RE(SectionID, Offset, RelType, Value.Addend);
addRelocationForSection(RE, Value.SectionID);
- } else {
- RangeOverflow = true;
}
}
- if (Value.SymbolName || SymType == SymbolRef::ST_Unknown ||
+ if (IsExtern || (AbiVariant == 2 && Value.SectionID != SectionID) ||
RangeOverflow) {
// It is an external symbol (either Value.SymbolName is set, or
// SymType is SymbolRef::ST_Unknown) or out of range.
@@ -1503,10 +1507,10 @@ RuntimeDyldELF::processRelocationRef(
RelType, 0);
Section.advanceStubOffset(getMaxStubSize());
}
- if (Value.SymbolName || SymType == SymbolRef::ST_Unknown) {
+ if (IsExtern || (AbiVariant == 2 && Value.SectionID != SectionID)) {
// Restore the TOC for external calls
if (AbiVariant == 2)
- writeInt32BE(Target + 4, 0xE8410018); // ld r2,28(r1)
+ writeInt32BE(Target + 4, 0xE8410018); // ld r2,24(r1)
else
writeInt32BE(Target + 4, 0xE8410028); // ld r2,40(r1)
}
diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp
index d3c33edec186..743e3710fd68 100644
--- a/lib/IR/Core.cpp
+++ b/lib/IR/Core.cpp
@@ -359,11 +359,9 @@ LLVMContextRef LLVMGetTypeContext(LLVMTypeRef Ty) {
return wrap(&unwrap(Ty)->getContext());
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void LLVMDumpType(LLVMTypeRef Ty) {
- return unwrap(Ty)->dump();
+void LLVMDumpType(LLVMTypeRef Ty) {
+ return unwrap(Ty)->print(errs(), /*IsForDebug=*/true);
}
-#endif
char *LLVMPrintTypeToString(LLVMTypeRef Ty) {
std::string buf;
@@ -658,7 +656,7 @@ void LLVMSetValueName(LLVMValueRef Val, const char *Name) {
unwrap(Val)->setName(Name);
}
-LLVM_DUMP_METHOD void LLVMDumpValue(LLVMValueRef Val) {
+void LLVMDumpValue(LLVMValueRef Val) {
unwrap(Val)->print(errs(), /*IsForDebug=*/true);
}
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index a6b5c43f1d2a..328f000f37c9 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -289,6 +289,8 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) {
case Triple::mips64el:
FDECFIEncoding = dwarf::DW_EH_PE_sdata8;
break;
+ case Triple::ppc64:
+ case Triple::ppc64le:
case Triple::x86_64:
FDECFIEncoding = dwarf::DW_EH_PE_pcrel |
(Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index d968688911eb..6439d16a2a3f 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -13,8 +13,13 @@ elseif( CMAKE_HOST_UNIX )
if( HAVE_LIBDL )
set(system_libs ${system_libs} ${CMAKE_DL_LIBS})
endif()
- if( HAVE_BACKTRACE )
- set(system_libs ${system_libs} ${Backtrace_LIBRARIES})
+ if( HAVE_BACKTRACE AND NOT "${Backtrace_LIBRARIES}" STREQUAL "" )
+ # On BSDs, CMake returns a fully qualified path to the backtrace library.
+ # We need to remove the path and the 'lib' prefix, to make it look like a
+ # regular short library name, suitable for appending to a -l link flag.
+ get_filename_component(Backtrace_LIBFILE ${Backtrace_LIBRARIES} NAME_WE)
+ STRING(REGEX REPLACE "^lib" "" Backtrace_LIBFILE ${Backtrace_LIBFILE})
+ set(system_libs ${system_libs} ${Backtrace_LIBFILE})
endif()
if(LLVM_ENABLE_TERMINFO)
if(HAVE_TERMINFO)
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index 3dc67ad782af..6e65b5e6c807 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -1009,7 +1009,7 @@ StringRef sys::getHostCPUName() {
#include "llvm/Support/X86TargetParser.def"
// Now check types.
-#define X86_CPU_SUBTYPE(ARCHNAME, ENUM) \
+#define X86_CPU_TYPE(ARCHNAME, ENUM) \
if (Type == X86::ENUM) \
return ARCHNAME;
#include "llvm/Support/X86TargetParser.def"
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 2ff2ee347f56..6704fa27c86e 100644
--- a/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -299,6 +299,11 @@ void AArch64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
printOffset(MO.getOffset(), O);
break;
}
+ case MachineOperand::MO_BlockAddress: {
+ MCSymbol *Sym = GetBlockAddressSymbol(MO.getBlockAddress());
+ Sym->print(O, MAI);
+ break;
+ }
}
}
diff --git a/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp b/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
index d1ddb2e3ef70..0d00dab598d5 100644
--- a/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
+++ b/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
@@ -46,6 +46,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugCounter.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <iterator>
@@ -60,6 +61,8 @@ STATISTIC(NumCollisionsAvoided,
"Number of HW prefetch tag collisions avoided");
STATISTIC(NumCollisionsNotAvoided,
"Number of HW prefetch tag collisions not avoided due to lack of regsiters");
+DEBUG_COUNTER(FixCounter, "falkor-hwpf",
+ "Controls which tag collisions are avoided");
namespace {
@@ -729,6 +732,21 @@ void FalkorHWPFFix::runOnLoop(MachineLoop &L, MachineFunction &Fn) {
bool Fixed = false;
DEBUG(dbgs() << "Attempting to fix tag collision: " << MI);
+ if (!DebugCounter::shouldExecute(FixCounter)) {
+ DEBUG(dbgs() << "Skipping fix due to debug counter:\n " << MI);
+ continue;
+ }
+
+ // Add the non-base registers of MI as live so we don't use them as
+ // scratch registers.
+ for (unsigned OpI = 0, OpE = MI.getNumOperands(); OpI < OpE; ++OpI) {
+ if (OpI == static_cast<unsigned>(LdI.BaseRegIdx))
+ continue;
+ MachineOperand &MO = MI.getOperand(OpI);
+ if (MO.isReg() && MO.readsReg())
+ LR.addReg(MO.getReg());
+ }
+
for (unsigned ScratchReg : AArch64::GPR64RegClass) {
if (!LR.available(ScratchReg) || MRI.isReserved(ScratchReg))
continue;
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index d66f7b59a4b5..789200b28445 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -917,6 +917,8 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
int FPOffset = MFI.getObjectOffset(FI) + FixedObject + 16;
int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
bool isFixed = MFI.isFixedObjectIndex(FI);
+ bool isCSR = !isFixed && MFI.getObjectOffset(FI) >=
+ -((int)AFI->getCalleeSavedStackSize());
// Use frame pointer to reference fixed objects. Use it for locals if
// there are VLAs or a dynamically realigned SP (and thus the SP isn't
@@ -930,6 +932,12 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
// Argument access should always use the FP.
if (isFixed) {
UseFP = hasFP(MF);
+ } else if (isCSR && RegInfo->needsStackRealignment(MF)) {
+ // References to the CSR area must use FP if we're re-aligning the stack
+ // since the dynamically-sized alignment padding is between the SP/BP and
+ // the CSR area.
+ assert(hasFP(MF) && "Re-aligned stack must have frame pointer");
+ UseFP = true;
} else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) &&
!RegInfo->needsStackRealignment(MF)) {
// Use SP or FP, whichever gives us the best chance of the offset
@@ -947,9 +955,9 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
}
}
- assert((isFixed || !RegInfo->needsStackRealignment(MF) || !UseFP) &&
+ assert(((isFixed || isCSR) || !RegInfo->needsStackRealignment(MF) || !UseFP) &&
"In the presence of dynamic stack pointer realignment, "
- "non-argument objects cannot be accessed through the frame pointer");
+ "non-argument/CSR objects cannot be accessed through the frame pointer");
if (UseFP) {
FrameReg = RegInfo->getFrameRegister(MF);
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 41ed24c329ef..233d6be247c2 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4930,7 +4930,8 @@ bool AArch64TargetLowering::isOffsetFoldingLegal(
bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
// We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases.
// FIXME: We should be able to handle f128 as well with a clever lowering.
- if (Imm.isPosZero() && (VT == MVT::f16 || VT == MVT::f64 || VT == MVT::f32)) {
+ if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32 ||
+ (VT == MVT::f16 && Subtarget->hasFullFP16()))) {
DEBUG(dbgs() << "Legal fp imm: materialize 0 using the zero register\n");
return true;
}
@@ -5066,7 +5067,7 @@ SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand,
// Table of Constraints
// TODO: This is the current set of constraints supported by ARM for the
-// compiler, not all of them may make sense, e.g. S may be difficult to support.
+// compiler, not all of them may make sense.
//
// r - A general register
// w - An FP/SIMD register of some size in the range v0-v31
@@ -5126,6 +5127,8 @@ AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
// currently handle addresses it is the same as 'r'.
case 'Q':
return C_Memory;
+ case 'S': // A symbolic address
+ return C_Other;
}
}
return TargetLowering::getConstraintType(Constraint);
@@ -5250,6 +5253,23 @@ void AArch64TargetLowering::LowerAsmOperandForConstraint(
Result = DAG.getRegister(AArch64::WZR, MVT::i32);
break;
}
+ case 'S': {
+ // An absolute symbolic address or label reference.
+ if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
+ Result = DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
+ GA->getValueType(0));
+ } else if (const BlockAddressSDNode *BA =
+ dyn_cast<BlockAddressSDNode>(Op)) {
+ Result =
+ DAG.getTargetBlockAddress(BA->getBlockAddress(), BA->getValueType(0));
+ } else if (const ExternalSymbolSDNode *ES =
+ dyn_cast<ExternalSymbolSDNode>(Op)) {
+ Result =
+ DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0));
+ } else
+ return;
+ break;
+ }
case 'I':
case 'J':
@@ -9637,6 +9657,15 @@ static SDValue performPostLD1Combine(SDNode *N,
if (LD->getOpcode() != ISD::LOAD)
return SDValue();
+ // The vector lane must be a constant in the LD1LANE opcode.
+ SDValue Lane;
+ if (IsLaneOp) {
+ Lane = N->getOperand(2);
+ auto *LaneC = dyn_cast<ConstantSDNode>(Lane);
+ if (!LaneC || LaneC->getZExtValue() >= VT.getVectorNumElements())
+ return SDValue();
+ }
+
LoadSDNode *LoadSDN = cast<LoadSDNode>(LD);
EVT MemVT = LoadSDN->getMemoryVT();
// Check if memory operand is the same type as the vector element.
@@ -9693,7 +9722,7 @@ static SDValue performPostLD1Combine(SDNode *N,
Ops.push_back(LD->getOperand(0)); // Chain
if (IsLaneOp) {
Ops.push_back(Vector); // The vector to be inserted
- Ops.push_back(N->getOperand(2)); // The lane to be inserted in the vector
+ Ops.push_back(Lane); // The lane to be inserted in the vector
}
Ops.push_back(Addr);
Ops.push_back(Inc);
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index 79826ca2ed8d..040011d858e7 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -2713,7 +2713,7 @@ defm FMOV : UnscaledConversion<"fmov">;
// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable
let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1 in {
def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>,
- Sched<[WriteF]>;
+ Sched<[WriteF]>, Requires<[HasFullFP16]>;
def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>,
Sched<[WriteF]>;
def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>,
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 2042dbf6d5e2..e09263b6fac9 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -147,6 +147,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
initializeR600PacketizerPass(*PR);
initializeR600ExpandSpecialInstrsPassPass(*PR);
initializeR600VectorRegMergerPass(*PR);
+ initializeGlobalISel(*PR);
initializeAMDGPUDAGToDAGISelPass(*PR);
initializeSILowerI1CopiesPass(*PR);
initializeSIFixSGPRCopiesPass(*PR);
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index 6d89aa6968e9..41ca7fe8bfaa 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -358,6 +358,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Promote);
setOperationAction(ISD::CTLZ, MVT::i16, Promote);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Promote);
+ setOperationAction(ISD::CTPOP, MVT::i16, Promote);
setOperationAction(ISD::SELECT_CC, MVT::i16, Expand);
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
index 9740a18b7248..8c02e8da8d79 100644
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -726,6 +726,10 @@ def : GCNPat <
(i32 (add (i32 (ctpop i32:$popcnt)), i32:$val)),
(V_BCNT_U32_B32_e64 $popcnt, $val)
>;
+def : GCNPat <
+ (i16 (add (i16 (trunc (ctpop i32:$popcnt))), i16:$val)),
+ (V_BCNT_U32_B32_e64 $popcnt, $val)
+>;
/********** ============================================ **********/
/********** Extraction, Insertion, Building and Casting **********/
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 8c1727724a9e..cff24a10bb5f 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -4864,12 +4864,14 @@ bool ARMBaseInstrInfo::getRegSequenceLikeInputs(
// Populate the InputRegs accordingly.
// rY
const MachineOperand *MOReg = &MI.getOperand(1);
- InputRegs.push_back(
- RegSubRegPairAndIdx(MOReg->getReg(), MOReg->getSubReg(), ARM::ssub_0));
+ if (!MOReg->isUndef())
+ InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
+ MOReg->getSubReg(), ARM::ssub_0));
// rZ
MOReg = &MI.getOperand(2);
- InputRegs.push_back(
- RegSubRegPairAndIdx(MOReg->getReg(), MOReg->getSubReg(), ARM::ssub_1));
+ if (!MOReg->isUndef())
+ InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
+ MOReg->getSubReg(), ARM::ssub_1));
return true;
}
llvm_unreachable("Target dependent opcode missing");
@@ -4888,6 +4890,8 @@ bool ARMBaseInstrInfo::getExtractSubregLikeInputs(
// rX = EXTRACT_SUBREG dZ, ssub_0
// rY = EXTRACT_SUBREG dZ, ssub_1
const MachineOperand &MOReg = MI.getOperand(2);
+ if (MOReg.isUndef())
+ return false;
InputReg.Reg = MOReg.getReg();
InputReg.SubReg = MOReg.getSubReg();
InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1;
@@ -4907,6 +4911,8 @@ bool ARMBaseInstrInfo::getInsertSubregLikeInputs(
// dX = VSETLNi32 dY, rZ, imm
const MachineOperand &MOBaseReg = MI.getOperand(1);
const MachineOperand &MOInsertedReg = MI.getOperand(2);
+ if (MOInsertedReg.isUndef())
+ return false;
const MachineOperand &MOIndex = MI.getOperand(3);
BaseReg.Reg = MOBaseReg.getReg();
BaseReg.SubReg = MOBaseReg.getSubReg();
diff --git a/lib/Target/ARM/ARMComputeBlockSize.cpp b/lib/Target/ARM/ARMComputeBlockSize.cpp
index 2e97b99b05a7..b263e9d86c42 100644
--- a/lib/Target/ARM/ARMComputeBlockSize.cpp
+++ b/lib/Target/ARM/ARMComputeBlockSize.cpp
@@ -35,6 +35,7 @@ mayOptimizeThumb2Instruction(const MachineInstr *MI) {
case ARM::tBcc:
// optimizeThumb2JumpTables.
case ARM::t2BR_JT:
+ case ARM::tBR_JTr:
return true;
}
return false;
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 345b081500a4..f36a4317b1b9 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -5136,6 +5136,7 @@ unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
// It also applies for registers Rt and Rs of microMIPSr6 jalrc.hb instruction
// and registers Rd and Base for microMIPS lwp instruction
case Mips::JALR_HB:
+ case Mips::JALR_HB64:
case Mips::JALRC_HB_MMR6:
case Mips::JALRC_MMR6:
if (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg())
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index 6d2f098a6b32..3c67743947cb 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -225,6 +225,8 @@ unsigned MipsELFObjectWriter::getRelocType(MCContext &Ctx,
switch (Kind) {
case Mips::fixup_Mips_NONE:
return ELF::R_MIPS_NONE;
+ case FK_Data_1:
+ report_fatal_error("MIPS does not support one byte relocations");
case Mips::fixup_Mips_16:
case FK_Data_2:
return IsPCRel ? ELF::R_MIPS_PC16 : ELF::R_MIPS_16;
diff --git a/lib/Target/Mips/MicroMips32r6InstrInfo.td b/lib/Target/Mips/MicroMips32r6InstrInfo.td
index 3ff3f07654d9..326897dc5c63 100644
--- a/lib/Target/Mips/MicroMips32r6InstrInfo.td
+++ b/lib/Target/Mips/MicroMips32r6InstrInfo.td
@@ -1886,6 +1886,12 @@ let AddedComplexity = 41 in {
def TAILCALL_MMR6 : TailCall<BC_MMR6, brtarget26_mm>, ISA_MICROMIPS32R6;
+def TAILCALLREG_MMR6 : TailCallReg<JRC16_MM, GPR32Opnd>, ISA_MICROMIPS32R6;
+
+def PseudoIndirectBranch_MMR6 : PseudoIndirectBranchBase<JRC16_MMR6,
+ GPR32Opnd>,
+ ISA_MICROMIPS32R6;
+
def : MipsPat<(MipsTailCall (iPTR tglobaladdr:$dst)),
(TAILCALL_MMR6 tglobaladdr:$dst)>, ISA_MICROMIPS32R6;
diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td
index 64fe55e9776b..1fef51fd69d0 100644
--- a/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -1003,6 +1003,12 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
def TAILCALL_MM : TailCall<J_MM, jmptarget_mm>, ISA_MIPS1_NOT_32R6_64R6;
+def TAILCALLREG_MM : TailCallReg<JRC16_MM, GPR32Opnd>,
+ ISA_MICROMIPS32_NOT_MIPS32R6;
+
+def PseudoIndirectBranch_MM : PseudoIndirectBranchBase<JR_MM, GPR32Opnd>,
+ ISA_MICROMIPS32_NOT_MIPS32R6;
+
let DecoderNamespace = "MicroMips" in {
def RDHWR_MM : MMRel, R6MMR6Rel, ReadHardware<GPR32Opnd, HWRegsOpnd>,
RDHWR_FM_MM, ISA_MICROMIPS32_NOT_MIPS32R6;
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index 6ceb05577538..f8e739497f4c 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -193,6 +193,10 @@ def FeatureMT : SubtargetFeature<"mt", "HasMT", "true", "Mips MT ASE">;
def FeatureLongCalls : SubtargetFeature<"long-calls", "UseLongCalls", "true",
"Disable use of the jal instruction">;
+def FeatureUseIndirectJumpsHazard : SubtargetFeature<"use-indirect-jump-hazard",
+ "UseIndirectJumpsHazard",
+ "true", "Use indirect jump"
+ " guards to prevent certain speculation based attacks">;
//===----------------------------------------------------------------------===//
// Mips processors supported.
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/Mips32r6InstrInfo.td b/lib/Target/Mips/Mips32r6InstrInfo.td
index 62f045e77fdb..9e9e074875d0 100644
--- a/lib/Target/Mips/Mips32r6InstrInfo.td
+++ b/lib/Target/Mips/Mips32r6InstrInfo.td
@@ -1036,3 +1036,42 @@ def : MipsPat<(select i32:$cond, immz, i32:$f),
(SELEQZ i32:$f, i32:$cond)>,
ISA_MIPS32R6;
}
+
+// Pseudo instructions
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, hasDelaySlot = 1,
+ hasExtraSrcRegAllocReq = 1, isCTI = 1, Defs = [AT] in {
+ class TailCallRegR6<Instruction JumpInst, Register RT, RegisterOperand RO> :
+ PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>,
+ PseudoInstExpansion<(JumpInst RT:$rt, RO:$rs)>;
+}
+
+class PseudoIndirectBranchBaseR6<Instruction JumpInst, Register RT,
+ RegisterOperand RO> :
+ MipsPseudo<(outs), (ins RO:$rs), [(brind RO:$rs)],
+ II_IndirectBranchPseudo>,
+ PseudoInstExpansion<(JumpInst RT:$rt, RO:$rs)> {
+ let isTerminator=1;
+ let isBarrier=1;
+ let hasDelaySlot = 1;
+ let isBranch = 1;
+ let isIndirectBranch = 1;
+ bit isCTI = 1;
+}
+
+
+let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips,
+ NoIndirectJumpGuards] in {
+ def TAILCALLR6REG : TailCallRegR6<JALR, ZERO, GPR32Opnd>, ISA_MIPS32R6;
+ def PseudoIndirectBranchR6 : PseudoIndirectBranchBaseR6<JALR, ZERO,
+ GPR32Opnd>,
+ ISA_MIPS32R6;
+}
+
+let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips,
+ UseIndirectJumpsHazard] in {
+ def TAILCALLHBR6REG : TailCallReg<JR_HB_R6, GPR32Opnd>, ISA_MIPS32R6;
+ def PseudoIndrectHazardBranchR6 : PseudoIndirectBranchBase<JR_HB_R6,
+ GPR32Opnd>,
+ ISA_MIPS32R6;
+}
+
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index e008aeafaa2b..828dd4f54223 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -240,13 +240,32 @@ let isCodeGenOnly = 1 in {
def BGTZ64 : CBranchZero<"bgtz", brtarget, setgt, GPR64Opnd>, BGEZ_FM<7, 0>;
def BLEZ64 : CBranchZero<"blez", brtarget, setle, GPR64Opnd>, BGEZ_FM<6, 0>;
def BLTZ64 : CBranchZero<"bltz", brtarget, setlt, GPR64Opnd>, BGEZ_FM<1, 0>;
- def JALR64Pseudo : JumpLinkRegPseudo<GPR64Opnd, JALR, RA, GPR32Opnd>;
+ let AdditionalPredicates = [NoIndirectJumpGuards] in
+ def JALR64Pseudo : JumpLinkRegPseudo<GPR64Opnd, JALR, RA, GPR32Opnd>;
}
+let AdditionalPredicates = [NotInMicroMips],
+ DecoderNamespace = "Mips64" in {
+ def JR_HB64 : JR_HB_DESC<GPR64Opnd>, JR_HB_ENC, ISA_MIPS32_NOT_32R6_64R6;
+ def JALR_HB64 : JALR_HB_DESC<GPR64Opnd>, JALR_HB_ENC, ISA_MIPS32R2;
+}
+def PseudoReturn64 : PseudoReturnBase<GPR64Opnd>;
-def TAILCALLREG64 : TailCallReg<GPR64Opnd>;
+let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips,
+ NoIndirectJumpGuards] in {
+ def TAILCALLREG64 : TailCallReg<JR64, GPR64Opnd>, ISA_MIPS3_NOT_32R6_64R6,
+ PTR_64;
+ def PseudoIndirectBranch64 : PseudoIndirectBranchBase<JR64, GPR64Opnd>,
+ ISA_MIPS3_NOT_32R6_64R6;
+}
-def PseudoReturn64 : PseudoReturnBase<GPR64Opnd>;
-def PseudoIndirectBranch64 : PseudoIndirectBranchBase<GPR64Opnd>;
+let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips,
+ UseIndirectJumpsHazard] in {
+ def TAILCALLREGHB64 : TailCallReg<JR_HB64, GPR64Opnd>,
+ ISA_MIPS32R2_NOT_32R6_64R6, PTR_64;
+ def PseudoIndirectHazardBranch64 : PseudoIndirectBranchBase<JR_HB64,
+ GPR64Opnd>,
+ ISA_MIPS32R2_NOT_32R6_64R6;
+}
/// Multiply and Divide Instructions.
let AdditionalPredicates = [NotInMicroMips] in {
@@ -536,6 +555,10 @@ def DMTC2 : MTC3OP<"dmtc2", COP2Opnd, GPR64Opnd, II_DMTC2>, MFC3OP_FM<0x12, 5>,
ISA_MIPS3;
}
+
+let AdditionalPredicates = [UseIndirectJumpsHazard] in
+ def JALRHB64Pseudo : JumpLinkRegPseudo<GPR64Opnd, JALR_HB64, RA_64>;
+
//===----------------------------------------------------------------------===//
// Arbitrary patterns that map to one or more instructions
//===----------------------------------------------------------------------===//
@@ -843,7 +866,8 @@ let AdditionalPredicates = [NotInMicroMips] in {
def : MipsInstAlias<"dext $rt, $rs, $pos, $size",
(DEXTU GPR64Opnd:$rt, GPR64Opnd:$rs, uimm5_plus32:$pos,
uimm5_plus1:$size), 0>, ISA_MIPS64R2;
-
+ def : MipsInstAlias<"jalr.hb $rs", (JALR_HB64 RA_64, GPR64Opnd:$rs), 1>,
+ ISA_MIPS64;
// Two operand (implicit 0 selector) versions:
def : MipsInstAlias<"dmtc0 $rt, $rd",
(DMTC0 COP0Opnd:$rd, GPR64Opnd:$rt, 0), 0>;
diff --git a/lib/Target/Mips/Mips64r6InstrInfo.td b/lib/Target/Mips/Mips64r6InstrInfo.td
index 1cd43ee6f1c3..da743fbdee45 100644
--- a/lib/Target/Mips/Mips64r6InstrInfo.td
+++ b/lib/Target/Mips/Mips64r6InstrInfo.td
@@ -104,6 +104,16 @@ class JIC64_DESC : JMP_IDX_COMPACT_DESC_BASE<"jic", jmpoffset16, GPR64Opnd,
class LL64_R6_DESC : LL_R6_DESC_BASE<"ll", GPR32Opnd, mem_simm9, II_LL>;
class SC64_R6_DESC : SC_R6_DESC_BASE<"sc", GPR32Opnd, II_SC>;
+
+class JR_HB64_R6_DESC : JR_HB_DESC_BASE<"jr.hb", GPR64Opnd> {
+ bit isBranch = 1;
+ bit isIndirectBranch = 1;
+ bit hasDelaySlot = 1;
+ bit isTerminator=1;
+ bit isBarrier=1;
+ bit isCTI = 1;
+ InstrItinClass Itinerary = II_JR_HB;
+}
//===----------------------------------------------------------------------===//
//
// Instruction Definitions
@@ -136,6 +146,7 @@ def SCD_R6 : SCD_R6_ENC, SCD_R6_DESC, ISA_MIPS32R6;
let DecoderNamespace = "Mips32r6_64r6_GP64" in {
def SELEQZ64 : SELEQZ_ENC, SELEQZ64_DESC, ISA_MIPS32R6, GPR_64;
def SELNEZ64 : SELNEZ_ENC, SELNEZ64_DESC, ISA_MIPS32R6, GPR_64;
+ def JR_HB64_R6 : JR_HB_R6_ENC, JR_HB64_R6_DESC, ISA_MIPS32R6;
}
let AdditionalPredicates = [NotInMicroMips],
DecoderNamespace = "Mips32r6_64r6_PTR64" in {
@@ -277,3 +288,22 @@ def : MipsPat<(select (i32 (setne i32:$cond, immz)), immz, i64:$f),
def : MipsPat<(select (i32 (seteq i32:$cond, immz)), immz, i64:$f),
(SELNEZ64 i64:$f, (SLL64_32 i32:$cond))>,
ISA_MIPS64R6;
+
+// Pseudo instructions
+
+let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips,
+ NoIndirectJumpGuards] in {
+ def TAILCALL64R6REG : TailCallRegR6<JALR64, ZERO_64, GPR64Opnd>, ISA_MIPS64R6;
+ def PseudoIndirectBranch64R6 : PseudoIndirectBranchBaseR6<JALR64, ZERO_64,
+ GPR64Opnd>,
+ ISA_MIPS64R6;
+}
+
+let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips,
+ UseIndirectJumpsHazard] in {
+ def TAILCALLHB64R6REG : TailCallReg<JR_HB64_R6, GPR64Opnd>,
+ ISA_MIPS64R6;
+ def PseudoIndrectHazardBranch64R6 : PseudoIndirectBranchBase<JR_HB64_R6,
+ GPR64Opnd>,
+ ISA_MIPS64R6;
+}
diff --git a/lib/Target/Mips/MipsDSPInstrFormats.td b/lib/Target/Mips/MipsDSPInstrFormats.td
index 0ceb1858fb09..2dcefdc789a5 100644
--- a/lib/Target/Mips/MipsDSPInstrFormats.td
+++ b/lib/Target/Mips/MipsDSPInstrFormats.td
@@ -53,7 +53,7 @@ class DSPInst<string opstr = "">
class PseudoDSP<dag outs, dag ins, list<dag> pattern,
InstrItinClass itin = IIPseudo>
- : MipsPseudo<outs, ins, pattern, itin>, PredicateControl {
+ : MipsPseudo<outs, ins, pattern, itin> {
let InsnPredicates = [HasDSP];
}
diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp
index 8bbac3ed7cfb..d3048c7390e1 100644
--- a/lib/Target/Mips/MipsFastISel.cpp
+++ b/lib/Target/Mips/MipsFastISel.cpp
@@ -67,6 +67,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
+#include <array>
#include <cassert>
#include <cstdint>
@@ -1306,13 +1307,13 @@ bool MipsFastISel::fastLowerArguments() {
return false;
}
- const ArrayRef<MCPhysReg> GPR32ArgRegs = {Mips::A0, Mips::A1, Mips::A2,
- Mips::A3};
- const ArrayRef<MCPhysReg> FGR32ArgRegs = {Mips::F12, Mips::F14};
- const ArrayRef<MCPhysReg> AFGR64ArgRegs = {Mips::D6, Mips::D7};
- ArrayRef<MCPhysReg>::iterator NextGPR32 = GPR32ArgRegs.begin();
- ArrayRef<MCPhysReg>::iterator NextFGR32 = FGR32ArgRegs.begin();
- ArrayRef<MCPhysReg>::iterator NextAFGR64 = AFGR64ArgRegs.begin();
+ std::array<MCPhysReg, 4> GPR32ArgRegs = {{Mips::A0, Mips::A1, Mips::A2,
+ Mips::A3}};
+ std::array<MCPhysReg, 2> FGR32ArgRegs = {{Mips::F12, Mips::F14}};
+ std::array<MCPhysReg, 2> AFGR64ArgRegs = {{Mips::D6, Mips::D7}};
+ auto NextGPR32 = GPR32ArgRegs.begin();
+ auto NextFGR32 = FGR32ArgRegs.begin();
+ auto NextAFGR64 = AFGR64ArgRegs.begin();
struct AllocatedReg {
const TargetRegisterClass *RC;
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index ba05b0f48df7..3d383b3dfe3e 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -3868,7 +3868,7 @@ MipsTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return std::make_pair(0U, nullptr);
case 'l': // use the `lo` register to store values
// that are no bigger than a word
- if (VT == MVT::i32)
+ if (VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8)
return std::make_pair((unsigned)Mips::LO0, &Mips::LO32RegClass);
return std::make_pair((unsigned)Mips::LO0_64, &Mips::LO64RegClass);
case 'x': // use the concatenated `hi` and `lo` registers
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
index 817d9b44b9c2..516edef0556c 100644
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -128,7 +128,7 @@ class InstSE<dag outs, dag ins, string asmstr, list<dag> pattern,
// Mips Pseudo Instructions Format
class MipsPseudo<dag outs, dag ins, list<dag> pattern,
InstrItinClass itin = IIPseudo> :
- MipsInst<outs, ins, "", pattern, itin, Pseudo> {
+ MipsInst<outs, ins, "", pattern, itin, Pseudo>, PredicateControl {
let isCodeGenOnly = 1;
let isPseudo = 1;
}
@@ -136,7 +136,7 @@ class MipsPseudo<dag outs, dag ins, list<dag> pattern,
// Mips32/64 Pseudo Instruction Format
class PseudoSE<dag outs, dag ins, list<dag> pattern,
InstrItinClass itin = IIPseudo> :
- MipsPseudo<outs, ins, pattern, itin>, PredicateControl {
+ MipsPseudo<outs, ins, pattern, itin> {
let EncodingPredicates = [HasStdEnc];
}
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index 51ddc0d44c00..2e30d271e130 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -298,7 +298,6 @@ unsigned MipsInstrInfo::getEquivalentCompactForm(
case Mips::JR:
case Mips::PseudoReturn:
case Mips::PseudoIndirectBranch:
- case Mips::TAILCALLREG:
canUseShortMicroMipsCTI = true;
break;
}
@@ -377,18 +376,18 @@ unsigned MipsInstrInfo::getEquivalentCompactForm(
// For MIPSR6, the instruction 'jic' can be used for these cases. Some
// tools will accept 'jrc reg' as an alias for 'jic 0, $reg'.
case Mips::JR:
+ case Mips::PseudoIndirectBranchR6:
case Mips::PseudoReturn:
- case Mips::PseudoIndirectBranch:
- case Mips::TAILCALLREG:
+ case Mips::TAILCALLR6REG:
if (canUseShortMicroMipsCTI)
return Mips::JRC16_MM;
return Mips::JIC;
case Mips::JALRPseudo:
return Mips::JIALC;
case Mips::JR64:
+ case Mips::PseudoIndirectBranch64R6:
case Mips::PseudoReturn64:
- case Mips::PseudoIndirectBranch64:
- case Mips::TAILCALLREG64:
+ case Mips::TAILCALL64R6REG:
return Mips::JIC64;
case Mips::JALR64Pseudo:
return Mips::JIALC64;
@@ -617,6 +616,18 @@ bool MipsInstrInfo::verifyInstruction(const MachineInstr &MI,
return verifyInsExtInstruction(MI, ErrInfo, 0, 32, 32, 64, 32, 64);
case Mips::DEXTU:
return verifyInsExtInstruction(MI, ErrInfo, 32, 64, 0, 32, 32, 64);
+ case Mips::TAILCALLREG:
+ case Mips::PseudoIndirectBranch:
+ case Mips::JR:
+ case Mips::JR64:
+ case Mips::JALR:
+ case Mips::JALR64:
+ case Mips::JALRPseudo:
+ if (!Subtarget.useIndirectJumpsHazard())
+ return true;
+
+ ErrInfo = "invalid instruction when using jump guards!";
+ return false;
default:
return true;
}
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index e0d818b749df..33a061e12a3f 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -244,7 +244,10 @@ def HasMadd4 : Predicate<"!Subtarget->disableMadd4()">,
AssemblerPredicate<"!FeatureMadd4">;
def HasMT : Predicate<"Subtarget->hasMT()">,
AssemblerPredicate<"FeatureMT">;
-
+def UseIndirectJumpsHazard : Predicate<"Subtarget->useIndirectJumpsHazard()">,
+ AssemblerPredicate<"FeatureUseIndirectJumpsHazard">;
+def NoIndirectJumpGuards : Predicate<"!Subtarget->useIndirectJumpsHazard()">,
+ AssemblerPredicate<"!FeatureUseIndirectJumpsHazard">;
//===----------------------------------------------------------------------===//
// Mips GPR size adjectives.
// They are mutually exclusive.
@@ -1540,8 +1543,9 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, hasDelaySlot = 1,
PseudoSE<(outs), (ins calltarget:$target), [], II_J>,
PseudoInstExpansion<(JumpInst Opnd:$target)>;
- class TailCallReg<RegisterOperand RO> :
- PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>;
+ class TailCallReg<Instruction JumpInst, RegisterOperand RO> :
+ PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>,
+ PseudoInstExpansion<(JumpInst RO:$rs)>;
}
class BAL_BR_Pseudo<Instruction RealInst> :
@@ -2068,7 +2072,7 @@ def B : UncondBranch<BEQ, brtarget>,
AdditionalRequires<[NotInMicroMips]>;
def JAL : MMRel, JumpLink<"jal", calltarget>, FJ<3>;
-let AdditionalPredicates = [NotInMicroMips] in {
+let AdditionalPredicates = [NotInMicroMips, NoIndirectJumpGuards] in {
def JALR : JumpLinkReg<"jalr", GPR32Opnd>, JALR_FM;
def JALRPseudo : JumpLinkRegPseudo<GPR32Opnd, JALR, RA>;
}
@@ -2088,24 +2092,28 @@ def BAL_BR : BAL_BR_Pseudo<BGEZAL>;
let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips] in {
def TAILCALL : TailCall<J, jmptarget>;
}
-
-def TAILCALLREG : TailCallReg<GPR32Opnd>;
+let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips,
+ NoIndirectJumpGuards] in
+ def TAILCALLREG : TailCallReg<JR, GPR32Opnd>, ISA_MIPS1_NOT_32R6_64R6;
// Indirect branches are matched as PseudoIndirectBranch/PseudoIndirectBranch64
// then are expanded to JR, JR64, JALR, or JALR64 depending on the ISA.
-class PseudoIndirectBranchBase<RegisterOperand RO> :
+class PseudoIndirectBranchBase<Instruction JumpInst, RegisterOperand RO> :
MipsPseudo<(outs), (ins RO:$rs), [(brind RO:$rs)],
- II_IndirectBranchPseudo> {
+ II_IndirectBranchPseudo>,
+ PseudoInstExpansion<(JumpInst RO:$rs)> {
let isTerminator=1;
let isBarrier=1;
let hasDelaySlot = 1;
let isBranch = 1;
let isIndirectBranch = 1;
bit isCTI = 1;
- let Predicates = [NotInMips16Mode];
}
-def PseudoIndirectBranch : PseudoIndirectBranchBase<GPR32Opnd>;
+let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips,
+ NoIndirectJumpGuards] in
+ def PseudoIndirectBranch : PseudoIndirectBranchBase<JR, GPR32Opnd>,
+ ISA_MIPS1_NOT_32R6_64R6;
// Return instructions are matched as a RetRA instruction, then are expanded
// into PseudoReturn/PseudoReturn64 after register allocation. Finally,
@@ -2278,8 +2286,8 @@ class JALR_HB_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
list<dag> Pattern = [];
}
-class JR_HB_DESC : InstSE<(outs), (ins), "", [], II_JR_HB, FrmJ>,
- JR_HB_DESC_BASE<"jr.hb", GPR32Opnd> {
+class JR_HB_DESC<RegisterOperand RO> :
+ InstSE<(outs), (ins), "", [], II_JR_HB, FrmJ>, JR_HB_DESC_BASE<"jr.hb", RO> {
let isBranch=1;
let isIndirectBranch=1;
let hasDelaySlot=1;
@@ -2288,8 +2296,9 @@ class JR_HB_DESC : InstSE<(outs), (ins), "", [], II_JR_HB, FrmJ>,
bit isCTI = 1;
}
-class JALR_HB_DESC : InstSE<(outs), (ins), "", [], II_JALR_HB, FrmJ>,
- JALR_HB_DESC_BASE<"jalr.hb", GPR32Opnd> {
+class JALR_HB_DESC<RegisterOperand RO> :
+ InstSE<(outs), (ins), "", [], II_JALR_HB, FrmJ>, JALR_HB_DESC_BASE<"jalr.hb",
+ RO> {
let isIndirectBranch=1;
let hasDelaySlot=1;
bit isCTI = 1;
@@ -2298,8 +2307,19 @@ class JALR_HB_DESC : InstSE<(outs), (ins), "", [], II_JALR_HB, FrmJ>,
class JR_HB_ENC : JR_HB_FM<8>;
class JALR_HB_ENC : JALR_HB_FM<9>;
-def JR_HB : JR_HB_DESC, JR_HB_ENC, ISA_MIPS32_NOT_32R6_64R6;
-def JALR_HB : JALR_HB_DESC, JALR_HB_ENC, ISA_MIPS32;
+def JR_HB : JR_HB_DESC<GPR32Opnd>, JR_HB_ENC, ISA_MIPS32R2_NOT_32R6_64R6;
+def JALR_HB : JALR_HB_DESC<GPR32Opnd>, JALR_HB_ENC, ISA_MIPS32;
+
+let AdditionalPredicates = [NotInMicroMips, UseIndirectJumpsHazard] in
+ def JALRHBPseudo : JumpLinkRegPseudo<GPR32Opnd, JALR_HB, RA>;
+
+
+let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips,
+ UseIndirectJumpsHazard] in {
+ def TAILCALLREGHB : TailCallReg<JR_HB, GPR32Opnd>, ISA_MIPS32_NOT_32R6_64R6;
+ def PseudoIndirectHazardBranch : PseudoIndirectBranchBase<JR_HB, GPR32Opnd>,
+ ISA_MIPS32R2_NOT_32R6_64R6;
+}
class TLB<string asmstr, InstrItinClass itin = NoItinerary> :
InstSE<(outs), (ins), asmstr, [], itin, FrmOther, asmstr>;
@@ -2433,7 +2453,8 @@ def : MipsInstAlias<"j $rs", (JR GPR32Opnd:$rs), 0>;
let Predicates = [NotInMicroMips] in {
def : MipsInstAlias<"jalr $rs", (JALR RA, GPR32Opnd:$rs), 0>;
}
-def : MipsInstAlias<"jalr.hb $rs", (JALR_HB RA, GPR32Opnd:$rs), 1>, ISA_MIPS32;
+def : MipsInstAlias<"jalr.hb $rs", (JALR_HB RA, GPR32Opnd:$rs), 1>,
+ ISA_MIPS32;
def : MipsInstAlias<"neg $rt, $rs",
(SUB GPR32Opnd:$rt, ZERO, GPR32Opnd:$rs), 1>;
def : MipsInstAlias<"neg $rt",
diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp
index bbf2050ce1eb..e6ecbe9b5f66 100644
--- a/lib/Target/Mips/MipsLongBranch.cpp
+++ b/lib/Target/Mips/MipsLongBranch.cpp
@@ -371,11 +371,12 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
// In NaCl, modifying the sp is not allowed in branch delay slot.
// For MIPS32R6, we can skip using a delay slot branch.
- if (Subtarget.isTargetNaCl() || Subtarget.hasMips32r6())
+ if (Subtarget.isTargetNaCl() ||
+ (Subtarget.hasMips32r6() && !Subtarget.useIndirectJumpsHazard()))
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP)
.addReg(Mips::SP).addImm(8);
- if (Subtarget.hasMips32r6()) {
+ if (Subtarget.hasMips32r6() && !Subtarget.useIndirectJumpsHazard()) {
const unsigned JICOp =
Subtarget.inMicroMipsMode() ? Mips::JIC_MMR6 : Mips::JIC;
BuildMI(*BalTgtMBB, Pos, DL, TII->get(JICOp))
@@ -383,7 +384,11 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
.addImm(0);
} else {
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JR)).addReg(Mips::AT);
+ unsigned JROp =
+ Subtarget.useIndirectJumpsHazard()
+ ? (Subtarget.hasMips32r6() ? Mips::JR_HB_R6 : Mips::JR_HB)
+ : Mips::JR;
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(JROp)).addReg(Mips::AT);
if (Subtarget.isTargetNaCl()) {
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::NOP));
@@ -475,7 +480,7 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::LD), Mips::RA_64)
.addReg(Mips::SP_64).addImm(0);
- if (Subtarget.hasMips64r6()) {
+ if (Subtarget.hasMips64r6() && !Subtarget.useIndirectJumpsHazard()) {
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::SP_64)
.addReg(Mips::SP_64)
.addImm(16);
@@ -483,7 +488,11 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
.addReg(Mips::AT_64)
.addImm(0);
} else {
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JR64)).addReg(Mips::AT_64);
+ unsigned JROp =
+ Subtarget.useIndirectJumpsHazard()
+ ? (Subtarget.hasMips32r6() ? Mips::JR_HB64_R6 : Mips::JR_HB64)
+ : Mips::JR64;
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(JROp)).addReg(Mips::AT_64);
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::SP_64)
.addReg(Mips::SP_64)
.addImm(16);
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index f7d7e2af85e4..eee5b23117f6 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -701,6 +701,77 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT,
+ SelectionDAG &DAG,
+ const MipsSubtarget &Subtarget) {
+ // Estimate the number of operations the below transform will turn a
+ // constant multiply into. The number is approximately how many powers
+ // of two summed together that the constant can be broken down into.
+
+ SmallVector<APInt, 16> WorkStack(1, C);
+ unsigned Steps = 0;
+ unsigned BitWidth = C.getBitWidth();
+
+ while (!WorkStack.empty()) {
+ APInt Val = WorkStack.pop_back_val();
+
+ if (Val == 0 || Val == 1)
+ continue;
+
+ if (Val.isPowerOf2()) {
+ ++Steps;
+ continue;
+ }
+
+ APInt Floor = APInt(BitWidth, 1) << Val.logBase2();
+ APInt Ceil = Val.isNegative() ? APInt(BitWidth, 0)
+ : APInt(BitWidth, 1) << C.ceilLogBase2();
+
+ if ((Val - Floor).ule(Ceil - Val)) {
+ WorkStack.push_back(Floor);
+ WorkStack.push_back(Val - Floor);
+ ++Steps;
+ continue;
+ }
+
+ WorkStack.push_back(Ceil);
+ WorkStack.push_back(Ceil - Val);
+ ++Steps;
+
+ // If we have taken more than 12[1] / 8[2] steps to attempt the
+ // optimization for a native sized value, it is more than likely that this
+ // optimization will make things worse.
+ //
+ // [1] MIPS64 requires 6 instructions at most to materialize any constant,
+ // multiplication requires at least 4 cycles, but another cycle (or two)
+ // to retrieve the result from the HI/LO registers.
+ //
+ // [2] For MIPS32, more than 8 steps is expensive as the constant could be
+ // materialized in 2 instructions, multiplication requires at least 4
+ // cycles, but another cycle (or two) to retrieve the result from the
+ // HI/LO registers.
+
+ if (Steps > 12 && (Subtarget.isABI_N32() || Subtarget.isABI_N64()))
+ return false;
+
+ if (Steps > 8 && Subtarget.isABI_O32())
+ return false;
+ }
+
+ // If the value being multiplied is not supported natively, we have to pay
+ // an additional legalization cost, conservatively assume an increase in the
+ // cost of 3 instructions per step. This values for this heuristic were
+ // determined experimentally.
+ unsigned RegisterSize = DAG.getTargetLoweringInfo()
+ .getRegisterType(*DAG.getContext(), VT)
+ .getSizeInBits();
+ Steps *= (VT.getSizeInBits() != RegisterSize) * 3;
+ if (Steps > 27)
+ return false;
+
+ return true;
+}
+
static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT,
EVT ShiftTy, SelectionDAG &DAG) {
// Return 0.
@@ -739,11 +810,13 @@ static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT,
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,
const TargetLowering::DAGCombinerInfo &DCI,
- const MipsSETargetLowering *TL) {
+ const MipsSETargetLowering *TL,
+ const MipsSubtarget &Subtarget) {
EVT VT = N->getValueType(0);
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
- if (!VT.isVector())
+ if (!VT.isVector() && shouldTransformMulToShiftsAddsSubs(
+ C->getAPIntValue(), VT, DAG, Subtarget))
return genConstMult(N->getOperand(0), C->getAPIntValue(), SDLoc(N), VT,
TL->getScalarShiftAmountTy(DAG.getDataLayout(), VT),
DAG);
@@ -983,7 +1056,7 @@ MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
Val = performORCombine(N, DAG, DCI, Subtarget);
break;
case ISD::MUL:
- return performMULCombine(N, DAG, DCI, this);
+ return performMULCombine(N, DAG, DCI, this, Subtarget);
case ISD::SHL:
Val = performSHLCombine(N, DAG, DCI, Subtarget);
break;
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index f6af7e22e351..ddaa07ea9bc1 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -72,9 +72,10 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
HasDSPR2(false), HasDSPR3(false), AllowMixed16_32(Mixed16_32 | Mips_Os16),
Os16(Mips_Os16), HasMSA(false), UseTCCInDIV(false), HasSym32(false),
HasEVA(false), DisableMadd4(false), HasMT(false),
- StackAlignOverride(StackAlignOverride), TM(TM), TargetTriple(TT),
- TSInfo(), InstrInfo(MipsInstrInfo::create(
- initializeSubtargetDependencies(CPU, FS, TM))),
+ UseIndirectJumpsHazard(false), StackAlignOverride(StackAlignOverride),
+ TM(TM), TargetTriple(TT), TSInfo(),
+ InstrInfo(
+ MipsInstrInfo::create(initializeSubtargetDependencies(CPU, FS, TM))),
FrameLowering(MipsFrameLowering::create(*this)),
TLInfo(MipsTargetLowering::create(TM, *this)) {
@@ -107,6 +108,15 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
if (hasMips64r6() && InMicroMipsMode)
report_fatal_error("microMIPS64R6 is not supported", false);
+
+ if (UseIndirectJumpsHazard) {
+ if (InMicroMipsMode)
+ report_fatal_error(
+ "cannot combine indirect jumps with hazard barriers and microMIPS");
+ if (!hasMips32r2())
+ report_fatal_error(
+ "indirect jumps with hazard barriers requires MIPS32R2 or later");
+ }
if (hasMips32r6()) {
StringRef ISA = hasMips64r6() ? "MIPS64r6" : "MIPS32r6";
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index 8b10b0596e0e..ad2905c51601 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -152,6 +152,10 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
// HasMT -- support MT ASE.
bool HasMT;
+ // Use hazard variants of the jump register instructions for indirect
+ // function calls and jump tables.
+ bool UseIndirectJumpsHazard;
+
// Disable use of the `jal` instruction.
bool UseLongCalls = false;
@@ -272,6 +276,9 @@ public:
bool disableMadd4() const { return DisableMadd4; }
bool hasEVA() const { return HasEVA; }
bool hasMT() const { return HasMT; }
+ bool useIndirectJumpsHazard() const {
+ return UseIndirectJumpsHazard && hasMips32r2();
+ }
bool useSmallSection() const { return UseSmallSection; }
bool hasStandardEncoding() const { return !inMips16Mode(); }
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index d31e1cb5047b..cb8cc7bb347a 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -44,6 +44,14 @@ static cl::opt<bool>
cl::desc("Disable load/store vectorizer"),
cl::init(false), cl::Hidden);
+// TODO: Remove this flag when we are confident with no regressions.
+static cl::opt<bool> DisableRequireStructuredCFG(
+ "disable-nvptx-require-structured-cfg",
+ cl::desc("Transitional flag to turn off NVPTX's requirement on preserving "
+ "structured CFG. The requirement should be disabled only when "
+ "unexpected regressions happen."),
+ cl::init(false), cl::Hidden);
+
namespace llvm {
void initializeNVVMIntrRangePass(PassRegistry&);
@@ -108,6 +116,8 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT,
drvInterface = NVPTX::NVCL;
else
drvInterface = NVPTX::CUDA;
+ if (!DisableRequireStructuredCFG)
+ setRequiresStructuredCFG(true);
initAsmInfo();
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index f0e8b11a3d9c..26e9f13f9ff4 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -12264,6 +12264,11 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
N->getOperand(1).getValueType() == MVT::i16 ||
(Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
N->getOperand(1).getValueType() == MVT::i64))) {
+ // STBRX can only handle simple types.
+ EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
+ if (mVT.isExtended())
+ break;
+
SDValue BSwapOp = N->getOperand(1).getOperand(0);
// Do an any-extend to 32-bits if this is a half-word input.
if (BSwapOp.getValueType() == MVT::i16)
@@ -12271,7 +12276,6 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// If the type of BSWAP operand is wider than stored memory width
// it need to be shifted to the right side before STBRX.
- EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
if (Op1VT.bitsGT(mVT)) {
int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index fb16700a5e17..4ef71effd49b 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -2431,7 +2431,8 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
// Use APInt's rotate function.
int64_t SH = MI.getOperand(2).getImm();
int64_t MB = MI.getOperand(3).getImm();
- APInt InVal(Opc == PPC::RLDICL ? 64 : 32, SExtImm, true);
+ APInt InVal((Opc == PPC::RLDICL || Opc == PPC::RLDICLo) ?
+ 64 : 32, SExtImm, true);
InVal = InVal.rotl(SH);
uint64_t Mask = (1LLU << (63 - MB + 1)) - 1;
InVal &= Mask;
@@ -2444,6 +2445,8 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
Is64BitLI = Opc != PPC::RLDICL_32;
NewImm = InVal.getSExtValue();
SetCR = Opc == PPC::RLDICLo;
+ if (SetCR && (SExtImm & NewImm) != NewImm)
+ return false;
break;
}
return false;
@@ -2471,6 +2474,8 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
Is64BitLI = Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8o;
NewImm = InVal.getSExtValue();
SetCR = Opc == PPC::RLWINMo || Opc == PPC::RLWINM8o;
+ if (SetCR && (SExtImm & NewImm) != NewImm)
+ return false;
break;
}
return false;
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index 23ac9d9936ad..44400813094b 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -31,6 +31,7 @@ set(sources
X86FixupBWInsts.cpp
X86FixupLEAs.cpp
X86FixupSetCC.cpp
+ X86FlagsCopyLowering.cpp
X86FloatingPoint.cpp
X86FrameLowering.cpp
X86InstructionSelector.cpp
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index c58254ae38c1..b3c491b3de5e 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -265,13 +265,10 @@ MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction(
/// @param reg - The Reg to append.
static void translateRegister(MCInst &mcInst, Reg reg) {
#define ENTRY(x) X86::x,
- uint8_t llvmRegnums[] = {
- ALL_REGS
- 0
- };
+ static constexpr MCPhysReg llvmRegnums[] = {ALL_REGS};
#undef ENTRY
- uint8_t llvmRegnum = llvmRegnums[reg];
+ MCPhysReg llvmRegnum = llvmRegnums[reg];
mcInst.addOperand(MCOperand::createReg(llvmRegnum));
}
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 361326824292..642dda8f4225 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -66,6 +66,9 @@ FunctionPass *createX86OptimizeLEAs();
/// Return a pass that transforms setcc + movzx pairs into xor + setcc.
FunctionPass *createX86FixupSetCC();
+/// Return a pass that lowers EFLAGS copy pseudo instructions.
+FunctionPass *createX86FlagsCopyLoweringPass();
+
/// Return a pass that expands WinAlloca pseudo-instructions.
FunctionPass *createX86WinAllocaExpander();
diff --git a/lib/Target/X86/X86DomainReassignment.cpp b/lib/Target/X86/X86DomainReassignment.cpp
index bc0f55f581ff..ffe176ad4770 100644
--- a/lib/Target/X86/X86DomainReassignment.cpp
+++ b/lib/Target/X86/X86DomainReassignment.cpp
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Printable.h"
#include <bitset>
using namespace llvm;
@@ -262,25 +263,6 @@ public:
}
};
-/// An Instruction Converter which completely deletes an instruction.
-/// For example, IMPLICIT_DEF instructions can be deleted when converting from
-/// GPR to mask.
-class InstrDeleter : public InstrConverterBase {
-public:
- InstrDeleter(unsigned SrcOpcode) : InstrConverterBase(SrcOpcode) {}
-
- bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII,
- MachineRegisterInfo *MRI) const override {
- assert(isLegal(MI, TII) && "Cannot convert instruction");
- return true;
- }
-
- double getExtraCost(const MachineInstr *MI,
- MachineRegisterInfo *MRI) const override {
- return 0;
- }
-};
-
// Key type to be used by the Instruction Converters map.
// A converter is identified by <destination domain, source opcode>
typedef std::pair<int, unsigned> InstrConverterBaseKeyTy;
@@ -310,8 +292,12 @@ private:
/// Domains which this closure can legally be reassigned to.
std::bitset<NumDomains> LegalDstDomains;
+ /// An ID to uniquely identify this closure, even when it gets
+ /// moved around
+ unsigned ID;
+
public:
- Closure(std::initializer_list<RegDomain> LegalDstDomainList) {
+ Closure(unsigned ID, std::initializer_list<RegDomain> LegalDstDomainList) : ID(ID) {
for (RegDomain D : LegalDstDomainList)
LegalDstDomains.set(D);
}
@@ -347,6 +333,27 @@ public:
return Instrs;
}
+ LLVM_DUMP_METHOD void dump(const MachineRegisterInfo *MRI) const {
+ dbgs() << "Registers: ";
+ bool First = true;
+ for (unsigned Reg : Edges) {
+ if (!First)
+ dbgs() << ", ";
+ First = false;
+ dbgs() << printReg(Reg, MRI->getTargetRegisterInfo());
+ }
+ dbgs() << "\n" << "Instructions:";
+ for (MachineInstr *MI : Instrs) {
+ dbgs() << "\n ";
+ MI->print(dbgs());
+ }
+ dbgs() << "\n";
+ }
+
+ unsigned getID() const {
+ return ID;
+ }
+
};
class X86DomainReassignment : public MachineFunctionPass {
@@ -358,7 +365,7 @@ class X86DomainReassignment : public MachineFunctionPass {
DenseSet<unsigned> EnclosedEdges;
/// All instructions that are included in some closure.
- DenseMap<MachineInstr *, Closure *> EnclosedInstrs;
+ DenseMap<MachineInstr *, unsigned> EnclosedInstrs;
public:
static char ID;
@@ -435,14 +442,14 @@ void X86DomainReassignment::visitRegister(Closure &C, unsigned Reg,
void X86DomainReassignment::encloseInstr(Closure &C, MachineInstr *MI) {
auto I = EnclosedInstrs.find(MI);
if (I != EnclosedInstrs.end()) {
- if (I->second != &C)
+ if (I->second != C.getID())
// Instruction already belongs to another closure, avoid conflicts between
// closure and mark this closure as illegal.
C.setAllIllegal();
return;
}
- EnclosedInstrs[MI] = &C;
+ EnclosedInstrs[MI] = C.getID();
C.addInstruction(MI);
// Mark closure as illegal for reassignment to domains, if there is no
@@ -587,7 +594,7 @@ void X86DomainReassignment::initConverters() {
new InstrIgnore(TargetOpcode::PHI);
Converters[{MaskDomain, TargetOpcode::IMPLICIT_DEF}] =
- new InstrDeleter(TargetOpcode::IMPLICIT_DEF);
+ new InstrIgnore(TargetOpcode::IMPLICIT_DEF);
Converters[{MaskDomain, TargetOpcode::INSERT_SUBREG}] =
new InstrReplaceWithCopy(TargetOpcode::INSERT_SUBREG, 2);
@@ -723,6 +730,7 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) {
std::vector<Closure> Closures;
// Go over all virtual registers and calculate a closure.
+ unsigned ClosureID = 0;
for (unsigned Idx = 0; Idx < MRI->getNumVirtRegs(); ++Idx) {
unsigned Reg = TargetRegisterInfo::index2VirtReg(Idx);
@@ -735,7 +743,7 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) {
continue;
// Calculate closure starting with Reg.
- Closure C({MaskDomain});
+ Closure C(ClosureID++, {MaskDomain});
buildClosure(C, Reg);
// Collect all closures that can potentially be converted.
@@ -743,15 +751,16 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) {
Closures.push_back(std::move(C));
}
- for (Closure &C : Closures)
+ for (Closure &C : Closures) {
+ DEBUG(C.dump(MRI));
if (isReassignmentProfitable(C, MaskDomain)) {
reassign(C, MaskDomain);
++NumClosuresConverted;
Changed = true;
}
+ }
- for (auto I : Converters)
- delete I.second;
+ DeleteContainerSeconds(Converters);
DEBUG(dbgs() << "***** Machine Function after Domain Reassignment *****\n");
DEBUG(MF.print(dbgs()));
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 80ce3c579fe0..dca6c592614c 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -1789,9 +1789,16 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
bool X86FastISel::X86SelectShift(const Instruction *I) {
unsigned CReg = 0, OpReg = 0;
const TargetRegisterClass *RC = nullptr;
- assert(!I->getType()->isIntegerTy(8) &&
- "i8 shifts should be handled by autogenerated table");
- if (I->getType()->isIntegerTy(16)) {
+ if (I->getType()->isIntegerTy(8)) {
+ CReg = X86::CL;
+ RC = &X86::GR8RegClass;
+ switch (I->getOpcode()) {
+ case Instruction::LShr: OpReg = X86::SHR8rCL; break;
+ case Instruction::AShr: OpReg = X86::SAR8rCL; break;
+ case Instruction::Shl: OpReg = X86::SHL8rCL; break;
+ default: return false;
+ }
+ } else if (I->getType()->isIntegerTy(16)) {
CReg = X86::CX;
RC = &X86::GR16RegClass;
switch (I->getOpcode()) {
@@ -1836,10 +1843,10 @@ bool X86FastISel::X86SelectShift(const Instruction *I) {
// The shift instruction uses X86::CL. If we defined a super-register
// of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
- assert(CReg != X86::CL && "CReg should be a super register of CL");
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::KILL), X86::CL)
- .addReg(CReg, RegState::Kill);
+ if (CReg != X86::CL)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::KILL), X86::CL)
+ .addReg(CReg, RegState::Kill);
unsigned ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpReg), ResultReg)
diff --git a/lib/Target/X86/X86FlagsCopyLowering.cpp b/lib/Target/X86/X86FlagsCopyLowering.cpp
new file mode 100644
index 000000000000..a6fccd134740
--- /dev/null
+++ b/lib/Target/X86/X86FlagsCopyLowering.cpp
@@ -0,0 +1,935 @@
+//====- X86FlagsCopyLowering.cpp - Lowers COPY nodes of EFLAGS ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// Lowers COPY nodes of EFLAGS by directly extracting and preserving individual
+/// flag bits.
+///
+/// We have to do this by carefully analyzing and rewriting the usage of the
+/// copied EFLAGS register because there is no general way to rematerialize the
+/// entire EFLAGS register safely and efficiently. Using `popf` both forces
+/// dynamic stack adjustment and can create correctness issues due to IF, TF,
+/// and other non-status flags being overwritten. Using sequences involving
+/// SAHF don't work on all x86 processors and are often quite slow compared to
+/// directly testing a single status preserved in its own GPR.
+///
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSSAUpdater.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <utility>
+
+using namespace llvm;
+
+#define PASS_KEY "x86-flags-copy-lowering"
+#define DEBUG_TYPE PASS_KEY
+
+STATISTIC(NumCopiesEliminated, "Number of copies of EFLAGS eliminated");
+STATISTIC(NumSetCCsInserted, "Number of setCC instructions inserted");
+STATISTIC(NumTestsInserted, "Number of test instructions inserted");
+STATISTIC(NumAddsInserted, "Number of adds instructions inserted");
+
+namespace llvm {
+
+void initializeX86FlagsCopyLoweringPassPass(PassRegistry &);
+
+} // end namespace llvm
+
+namespace {
+
+// Convenient array type for storing registers associated with each condition.
+using CondRegArray = std::array<unsigned, X86::LAST_VALID_COND + 1>;
+
+class X86FlagsCopyLoweringPass : public MachineFunctionPass {
+public:
+ X86FlagsCopyLoweringPass() : MachineFunctionPass(ID) {
+ initializeX86FlagsCopyLoweringPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override { return "X86 EFLAGS copy lowering"; }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ /// Pass identification, replacement for typeid.
+ static char ID;
+
+private:
+ MachineRegisterInfo *MRI;
+ const X86InstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const TargetRegisterClass *PromoteRC;
+ MachineDominatorTree *MDT;
+
+ CondRegArray collectCondsInRegs(MachineBasicBlock &MBB,
+ MachineInstr &CopyDefI);
+
+ unsigned promoteCondToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator TestPos,
+ DebugLoc TestLoc, X86::CondCode Cond);
+ std::pair<unsigned, bool>
+ getCondOrInverseInReg(MachineBasicBlock &TestMBB,
+ MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
+ X86::CondCode Cond, CondRegArray &CondRegs);
+ void insertTest(MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos,
+ DebugLoc Loc, unsigned Reg);
+
+ void rewriteArithmetic(MachineBasicBlock &TestMBB,
+ MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
+ MachineInstr &MI, MachineOperand &FlagUse,
+ CondRegArray &CondRegs);
+ void rewriteCMov(MachineBasicBlock &TestMBB,
+ MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
+ MachineInstr &CMovI, MachineOperand &FlagUse,
+ CondRegArray &CondRegs);
+ void rewriteCondJmp(MachineBasicBlock &TestMBB,
+ MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
+ MachineInstr &JmpI, CondRegArray &CondRegs);
+ void rewriteCopy(MachineInstr &MI, MachineOperand &FlagUse,
+ MachineInstr &CopyDefI);
+ void rewriteSetCarryExtended(MachineBasicBlock &TestMBB,
+ MachineBasicBlock::iterator TestPos,
+ DebugLoc TestLoc, MachineInstr &SetBI,
+ MachineOperand &FlagUse, CondRegArray &CondRegs);
+ void rewriteSetCC(MachineBasicBlock &TestMBB,
+ MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
+ MachineInstr &SetCCI, MachineOperand &FlagUse,
+ CondRegArray &CondRegs);
+};
+
+} // end anonymous namespace
+
+INITIALIZE_PASS_BEGIN(X86FlagsCopyLoweringPass, DEBUG_TYPE,
+ "X86 EFLAGS copy lowering", false, false)
+INITIALIZE_PASS_END(X86FlagsCopyLoweringPass, DEBUG_TYPE,
+ "X86 EFLAGS copy lowering", false, false)
+
+FunctionPass *llvm::createX86FlagsCopyLoweringPass() {
+ return new X86FlagsCopyLoweringPass();
+}
+
+char X86FlagsCopyLoweringPass::ID = 0;
+
+void X86FlagsCopyLoweringPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+namespace {
+/// An enumeration of the arithmetic instruction mnemonics which have
+/// interesting flag semantics.
+///
+/// We can map instruction opcodes into these mnemonics to make it easy to
+/// dispatch with specific functionality.
+enum class FlagArithMnemonic {
+ ADC,
+ ADCX,
+ ADOX,
+ RCL,
+ RCR,
+ SBB,
+};
+} // namespace
+
+static FlagArithMnemonic getMnemonicFromOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ report_fatal_error("No support for lowering a copy into EFLAGS when used "
+ "by this instruction!");
+
+#define LLVM_EXPAND_INSTR_SIZES(MNEMONIC, SUFFIX) \
+ case X86::MNEMONIC##8##SUFFIX: \
+ case X86::MNEMONIC##16##SUFFIX: \
+ case X86::MNEMONIC##32##SUFFIX: \
+ case X86::MNEMONIC##64##SUFFIX:
+
+#define LLVM_EXPAND_ADC_SBB_INSTR(MNEMONIC) \
+ LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr) \
+ LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr_REV) \
+ LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rm) \
+ LLVM_EXPAND_INSTR_SIZES(MNEMONIC, mr) \
+ case X86::MNEMONIC##8ri: \
+ case X86::MNEMONIC##16ri8: \
+ case X86::MNEMONIC##32ri8: \
+ case X86::MNEMONIC##64ri8: \
+ case X86::MNEMONIC##16ri: \
+ case X86::MNEMONIC##32ri: \
+ case X86::MNEMONIC##64ri32: \
+ case X86::MNEMONIC##8mi: \
+ case X86::MNEMONIC##16mi8: \
+ case X86::MNEMONIC##32mi8: \
+ case X86::MNEMONIC##64mi8: \
+ case X86::MNEMONIC##16mi: \
+ case X86::MNEMONIC##32mi: \
+ case X86::MNEMONIC##64mi32: \
+ case X86::MNEMONIC##8i8: \
+ case X86::MNEMONIC##16i16: \
+ case X86::MNEMONIC##32i32: \
+ case X86::MNEMONIC##64i32:
+
+ LLVM_EXPAND_ADC_SBB_INSTR(ADC)
+ return FlagArithMnemonic::ADC;
+
+ LLVM_EXPAND_ADC_SBB_INSTR(SBB)
+ return FlagArithMnemonic::SBB;
+
+#undef LLVM_EXPAND_ADC_SBB_INSTR
+
+ LLVM_EXPAND_INSTR_SIZES(RCL, rCL)
+ LLVM_EXPAND_INSTR_SIZES(RCL, r1)
+ LLVM_EXPAND_INSTR_SIZES(RCL, ri)
+ return FlagArithMnemonic::RCL;
+
+ LLVM_EXPAND_INSTR_SIZES(RCR, rCL)
+ LLVM_EXPAND_INSTR_SIZES(RCR, r1)
+ LLVM_EXPAND_INSTR_SIZES(RCR, ri)
+ return FlagArithMnemonic::RCR;
+
+#undef LLVM_EXPAND_INSTR_SIZES
+
+ case X86::ADCX32rr:
+ case X86::ADCX64rr:
+ case X86::ADCX32rm:
+ case X86::ADCX64rm:
+ return FlagArithMnemonic::ADCX;
+
+ case X86::ADOX32rr:
+ case X86::ADOX64rr:
+ case X86::ADOX32rm:
+ case X86::ADOX64rm:
+ return FlagArithMnemonic::ADOX;
+ }
+}
+
+static MachineBasicBlock &splitBlock(MachineBasicBlock &MBB,
+ MachineInstr &SplitI,
+ const X86InstrInfo &TII) {
+ MachineFunction &MF = *MBB.getParent();
+
+ assert(SplitI.getParent() == &MBB &&
+ "Split instruction must be in the split block!");
+ assert(SplitI.isBranch() &&
+ "Only designed to split a tail of branch instructions!");
+ assert(X86::getCondFromBranchOpc(SplitI.getOpcode()) != X86::COND_INVALID &&
+ "Must split on an actual jCC instruction!");
+
+ // Dig out the previous instruction to the split point.
+ MachineInstr &PrevI = *std::prev(SplitI.getIterator());
+ assert(PrevI.isBranch() && "Must split after a branch!");
+ assert(X86::getCondFromBranchOpc(PrevI.getOpcode()) != X86::COND_INVALID &&
+ "Must split after an actual jCC instruction!");
+ assert(!std::prev(PrevI.getIterator())->isTerminator() &&
+ "Must only have this one terminator prior to the split!");
+
+ // Grab the one successor edge that will stay in `MBB`.
+ MachineBasicBlock &UnsplitSucc = *PrevI.getOperand(0).getMBB();
+
+ // Analyze the original block to see if we are actually splitting an edge
+ // into two edges. This can happen when we have multiple conditional jumps to
+ // the same successor.
+ bool IsEdgeSplit =
+ std::any_of(SplitI.getIterator(), MBB.instr_end(),
+ [&](MachineInstr &MI) {
+ assert(MI.isTerminator() &&
+ "Should only have spliced terminators!");
+ return llvm::any_of(
+ MI.operands(), [&](MachineOperand &MOp) {
+ return MOp.isMBB() && MOp.getMBB() == &UnsplitSucc;
+ });
+ }) ||
+ MBB.getFallThrough() == &UnsplitSucc;
+
+ MachineBasicBlock &NewMBB = *MF.CreateMachineBasicBlock();
+
+ // Insert the new block immediately after the current one. Any existing
+ // fallthrough will be sunk into this new block anyways.
+ MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB);
+
+ // Splice the tail of instructions into the new block.
+ NewMBB.splice(NewMBB.end(), &MBB, SplitI.getIterator(), MBB.end());
+
+ // Copy the necessary succesors (and their probability info) into the new
+ // block.
+ for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI)
+ if (IsEdgeSplit || *SI != &UnsplitSucc)
+ NewMBB.copySuccessor(&MBB, SI);
+ // Normalize the probabilities if we didn't end up splitting the edge.
+ if (!IsEdgeSplit)
+ NewMBB.normalizeSuccProbs();
+
+ // Now replace all of the moved successors in the original block with the new
+ // block. This will merge their probabilities.
+ for (MachineBasicBlock *Succ : NewMBB.successors())
+ if (Succ != &UnsplitSucc)
+ MBB.replaceSuccessor(Succ, &NewMBB);
+
+ // We should always end up replacing at least one successor.
+ assert(MBB.isSuccessor(&NewMBB) &&
+ "Failed to make the new block a successor!");
+
+ // Now update all the PHIs.
+ for (MachineBasicBlock *Succ : NewMBB.successors()) {
+ for (MachineInstr &MI : *Succ) {
+ if (!MI.isPHI())
+ break;
+
+ for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
+ OpIdx += 2) {
+ MachineOperand &OpV = MI.getOperand(OpIdx);
+ MachineOperand &OpMBB = MI.getOperand(OpIdx + 1);
+ assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!");
+ if (OpMBB.getMBB() != &MBB)
+ continue;
+
+ // Replace the operand for unsplit successors
+ if (!IsEdgeSplit || Succ != &UnsplitSucc) {
+ OpMBB.setMBB(&NewMBB);
+
+ // We have to continue scanning as there may be multiple entries in
+ // the PHI.
+ continue;
+ }
+
+ // When we have split the edge append a new successor.
+ MI.addOperand(MF, OpV);
+ MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB));
+ break;
+ }
+ }
+ }
+
+ return NewMBB;
+}
+
+bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
+ << " **********\n");
+
+ auto &Subtarget = MF.getSubtarget<X86Subtarget>();
+ MRI = &MF.getRegInfo();
+ TII = Subtarget.getInstrInfo();
+ TRI = Subtarget.getRegisterInfo();
+ MDT = &getAnalysis<MachineDominatorTree>();
+ PromoteRC = &X86::GR8RegClass;
+
+ if (MF.begin() == MF.end())
+ // Nothing to do for a degenerate empty function...
+ return false;
+
+ SmallVector<MachineInstr *, 4> Copies;
+ for (MachineBasicBlock &MBB : MF)
+ for (MachineInstr &MI : MBB)
+ if (MI.getOpcode() == TargetOpcode::COPY &&
+ MI.getOperand(0).getReg() == X86::EFLAGS)
+ Copies.push_back(&MI);
+
+ for (MachineInstr *CopyI : Copies) {
+ MachineBasicBlock &MBB = *CopyI->getParent();
+
+ MachineOperand &VOp = CopyI->getOperand(1);
+ assert(VOp.isReg() &&
+ "The input to the copy for EFLAGS should always be a register!");
+ MachineInstr &CopyDefI = *MRI->getVRegDef(VOp.getReg());
+ if (CopyDefI.getOpcode() != TargetOpcode::COPY) {
+ // FIXME: The big likely candidate here are PHI nodes. We could in theory
+ // handle PHI nodes, but it gets really, really hard. Insanely hard. Hard
+ // enough that it is probably better to change every other part of LLVM
+ // to avoid creating them. The issue is that once we have PHIs we won't
+ // know which original EFLAGS value we need to capture with our setCCs
+ // below. The end result will be computing a complete set of setCCs that
+ // we *might* want, computing them in every place where we copy *out* of
+ // EFLAGS and then doing SSA formation on all of them to insert necessary
+ // PHI nodes and consume those here. Then hoping that somehow we DCE the
+ // unnecessary ones. This DCE seems very unlikely to be successful and so
+ // we will almost certainly end up with a glut of dead setCC
+ // instructions. Until we have a motivating test case and fail to avoid
+ // it by changing other parts of LLVM's lowering, we refuse to handle
+ // this complex case here.
+ DEBUG(dbgs() << "ERROR: Encountered unexpected def of an eflags copy: ";
+ CopyDefI.dump());
+ report_fatal_error(
+ "Cannot lower EFLAGS copy unless it is defined in turn by a copy!");
+ }
+
+ auto Cleanup = make_scope_exit([&] {
+ // All uses of the EFLAGS copy are now rewritten, kill the copy into
+ // eflags and if dead the copy from.
+ CopyI->eraseFromParent();
+ if (MRI->use_empty(CopyDefI.getOperand(0).getReg()))
+ CopyDefI.eraseFromParent();
+ ++NumCopiesEliminated;
+ });
+
+ MachineOperand &DOp = CopyI->getOperand(0);
+ assert(DOp.isDef() && "Expected register def!");
+ assert(DOp.getReg() == X86::EFLAGS && "Unexpected copy def register!");
+ if (DOp.isDead())
+ continue;
+
+ MachineBasicBlock &TestMBB = *CopyDefI.getParent();
+ auto TestPos = CopyDefI.getIterator();
+ DebugLoc TestLoc = CopyDefI.getDebugLoc();
+
+ DEBUG(dbgs() << "Rewriting copy: "; CopyI->dump());
+
+ // Scan for usage of newly set EFLAGS so we can rewrite them. We just buffer
+ // jumps because their usage is very constrained.
+ bool FlagsKilled = false;
+ SmallVector<MachineInstr *, 4> JmpIs;
+
+ // Gather the condition flags that have already been preserved in
+ // registers. We do this from scratch each time as we expect there to be
+ // very few of them and we expect to not revisit the same copy definition
+ // many times. If either of those change sufficiently we could build a map
+ // of these up front instead.
+ CondRegArray CondRegs = collectCondsInRegs(TestMBB, CopyDefI);
+
+ // Collect the basic blocks we need to scan. Typically this will just be
+ // a single basic block but we may have to scan multiple blocks if the
+ // EFLAGS copy lives into successors.
+ SmallVector<MachineBasicBlock *, 2> Blocks;
+ SmallPtrSet<MachineBasicBlock *, 2> VisitedBlocks;
+ Blocks.push_back(&MBB);
+ VisitedBlocks.insert(&MBB);
+
+ do {
+ MachineBasicBlock &UseMBB = *Blocks.pop_back_val();
+
+ // We currently don't do any PHI insertion and so we require that the
+ // test basic block dominates all of the use basic blocks.
+ //
+ // We could in theory do PHI insertion here if it becomes useful by just
+ // taking undef values in along every edge that we don't trace this
+ // EFLAGS copy along. This isn't as bad as fully general PHI insertion,
+ // but still seems like a great deal of complexity.
+ //
+ // Because it is theoretically possible that some earlier MI pass or
+ // other lowering transformation could induce this to happen, we do
+ // a hard check even in non-debug builds here.
+ if (&TestMBB != &UseMBB && !MDT->dominates(&TestMBB, &UseMBB)) {
+ DEBUG({
+ dbgs() << "ERROR: Encountered use that is not dominated by our test "
+ "basic block! Rewriting this would require inserting PHI "
+ "nodes to track the flag state across the CFG.\n\nTest "
+ "block:\n";
+ TestMBB.dump();
+ dbgs() << "Use block:\n";
+ UseMBB.dump();
+ });
+ report_fatal_error("Cannot lower EFLAGS copy when original copy def "
+ "does not dominate all uses.");
+ }
+
+ for (auto MII = &UseMBB == &MBB ? std::next(CopyI->getIterator())
+ : UseMBB.instr_begin(),
+ MIE = UseMBB.instr_end();
+ MII != MIE;) {
+ MachineInstr &MI = *MII++;
+ MachineOperand *FlagUse = MI.findRegisterUseOperand(X86::EFLAGS);
+ if (!FlagUse) {
+ if (MI.findRegisterDefOperand(X86::EFLAGS)) {
+ // If EFLAGS are defined, it's as-if they were killed. We can stop
+ // scanning here.
+ //
+ // NB!!! Many instructions only modify some flags. LLVM currently
+ // models this as clobbering all flags, but if that ever changes
+ // this will need to be carefully updated to handle that more
+ // complex logic.
+ FlagsKilled = true;
+ break;
+ }
+ continue;
+ }
+
+ DEBUG(dbgs() << " Rewriting use: "; MI.dump());
+
+ // Check the kill flag before we rewrite as that may change it.
+ if (FlagUse->isKill())
+ FlagsKilled = true;
+
+ // Once we encounter a branch, the rest of the instructions must also be
+ // branches. We can't rewrite in place here, so we handle them below.
+ //
+ // Note that we don't have to handle tail calls here, even conditional
+ // tail calls, as those are not introduced into the X86 MI until post-RA
+ // branch folding or black placement. As a consequence, we get to deal
+ // with the simpler formulation of conditional branches followed by tail
+ // calls.
+ if (X86::getCondFromBranchOpc(MI.getOpcode()) != X86::COND_INVALID) {
+ auto JmpIt = MI.getIterator();
+ do {
+ JmpIs.push_back(&*JmpIt);
+ ++JmpIt;
+ } while (JmpIt != UseMBB.instr_end() &&
+ X86::getCondFromBranchOpc(JmpIt->getOpcode()) !=
+ X86::COND_INVALID);
+ break;
+ }
+
+ // Otherwise we can just rewrite in-place.
+ if (X86::getCondFromCMovOpc(MI.getOpcode()) != X86::COND_INVALID) {
+ rewriteCMov(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
+ } else if (X86::getCondFromSETOpc(MI.getOpcode()) !=
+ X86::COND_INVALID) {
+ rewriteSetCC(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
+ } else if (MI.getOpcode() == TargetOpcode::COPY) {
+ rewriteCopy(MI, *FlagUse, CopyDefI);
+ } else {
+ // We assume all other instructions that use flags also def them.
+ assert(MI.findRegisterDefOperand(X86::EFLAGS) &&
+ "Expected a def of EFLAGS for this instruction!");
+
+ // NB!!! Several arithmetic instructions only *partially* update
+ // flags. Theoretically, we could generate MI code sequences that
+ // would rely on this fact and observe different flags independently.
+ // But currently LLVM models all of these instructions as clobbering
+ // all the flags in an undef way. We rely on that to simplify the
+ // logic.
+ FlagsKilled = true;
+
+ switch (MI.getOpcode()) {
+ case X86::SETB_C8r:
+ case X86::SETB_C16r:
+ case X86::SETB_C32r:
+ case X86::SETB_C64r:
+ // Use custom lowering for arithmetic that is merely extending the
+ // carry flag. We model this as the SETB_C* pseudo instructions.
+ rewriteSetCarryExtended(TestMBB, TestPos, TestLoc, MI, *FlagUse,
+ CondRegs);
+ break;
+
+ default:
+ // Generically handle remaining uses as arithmetic instructions.
+ rewriteArithmetic(TestMBB, TestPos, TestLoc, MI, *FlagUse,
+ CondRegs);
+ break;
+ }
+ break;
+ }
+
+ // If this was the last use of the flags, we're done.
+ if (FlagsKilled)
+ break;
+ }
+
+ // If the flags were killed, we're done with this block.
+ if (FlagsKilled)
+ break;
+
+ // Otherwise we need to scan successors for ones where the flags live-in
+ // and queue those up for processing.
+ for (MachineBasicBlock *SuccMBB : UseMBB.successors())
+ if (SuccMBB->isLiveIn(X86::EFLAGS) &&
+ VisitedBlocks.insert(SuccMBB).second)
+ Blocks.push_back(SuccMBB);
+ } while (!Blocks.empty());
+
+ // Now rewrite the jumps that use the flags. These we handle specially
+ // because if there are multiple jumps in a single basic block we'll have
+ // to do surgery on the CFG.
+ MachineBasicBlock *LastJmpMBB = nullptr;
+ for (MachineInstr *JmpI : JmpIs) {
+ // Past the first jump within a basic block we need to split the blocks
+ // apart.
+ if (JmpI->getParent() == LastJmpMBB)
+ splitBlock(*JmpI->getParent(), *JmpI, *TII);
+ else
+ LastJmpMBB = JmpI->getParent();
+
+ rewriteCondJmp(TestMBB, TestPos, TestLoc, *JmpI, CondRegs);
+ }
+
+ // FIXME: Mark the last use of EFLAGS before the copy's def as a kill if
+ // the copy's def operand is itself a kill.
+ }
+
+#ifndef NDEBUG
+ for (MachineBasicBlock &MBB : MF)
+ for (MachineInstr &MI : MBB)
+ if (MI.getOpcode() == TargetOpcode::COPY &&
+ (MI.getOperand(0).getReg() == X86::EFLAGS ||
+ MI.getOperand(1).getReg() == X86::EFLAGS)) {
+ DEBUG(dbgs() << "ERROR: Found a COPY involving EFLAGS: "; MI.dump());
+ llvm_unreachable("Unlowered EFLAGS copy!");
+ }
+#endif
+
+ return true;
+}
+
+/// Collect any conditions that have already been set in registers so that we
+/// can re-use them rather than adding duplicates.
+CondRegArray
+X86FlagsCopyLoweringPass::collectCondsInRegs(MachineBasicBlock &MBB,
+ MachineInstr &CopyDefI) {
+ CondRegArray CondRegs = {};
+
+ // Scan backwards across the range of instructions with live EFLAGS.
+ for (MachineInstr &MI : llvm::reverse(
+ llvm::make_range(MBB.instr_begin(), CopyDefI.getIterator()))) {
+ X86::CondCode Cond = X86::getCondFromSETOpc(MI.getOpcode());
+ if (Cond != X86::COND_INVALID && MI.getOperand(0).isReg() &&
+ TRI->isVirtualRegister(MI.getOperand(0).getReg()))
+ CondRegs[Cond] = MI.getOperand(0).getReg();
+
+ // Stop scanning when we see the first definition of the EFLAGS as prior to
+ // this we would potentially capture the wrong flag state.
+ if (MI.findRegisterDefOperand(X86::EFLAGS))
+ break;
+ }
+ return CondRegs;
+}
+
+unsigned X86FlagsCopyLoweringPass::promoteCondToReg(
+ MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
+ DebugLoc TestLoc, X86::CondCode Cond) {
+ unsigned Reg = MRI->createVirtualRegister(PromoteRC);
+ auto SetI = BuildMI(TestMBB, TestPos, TestLoc,
+ TII->get(X86::getSETFromCond(Cond)), Reg);
+ (void)SetI;
+ DEBUG(dbgs() << " save cond: "; SetI->dump());
+ ++NumSetCCsInserted;
+ return Reg;
+}
+
+std::pair<unsigned, bool> X86FlagsCopyLoweringPass::getCondOrInverseInReg(
+ MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
+ DebugLoc TestLoc, X86::CondCode Cond, CondRegArray &CondRegs) {
+ unsigned &CondReg = CondRegs[Cond];
+ unsigned &InvCondReg = CondRegs[X86::GetOppositeBranchCondition(Cond)];
+ if (!CondReg && !InvCondReg)
+ CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond);
+
+ if (CondReg)
+ return {CondReg, false};
+ else
+ return {InvCondReg, true};
+}
+
+void X86FlagsCopyLoweringPass::insertTest(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator Pos,
+ DebugLoc Loc, unsigned Reg) {
+ // We emit test instructions as register/immediate test against -1. This
+ // allows register allocation to fold a memory operand if needed (that will
+ // happen often due to the places this code is emitted). But hopefully will
+ // also allow us to select a shorter encoding of `testb %reg, %reg` when that
+ // would be equivalent.
+ auto TestI =
+ BuildMI(MBB, Pos, Loc, TII->get(X86::TEST8rr)).addReg(Reg).addReg(Reg);
+ (void)TestI;
+ DEBUG(dbgs() << " test cond: "; TestI->dump());
+ ++NumTestsInserted;
+}
+
+void X86FlagsCopyLoweringPass::rewriteArithmetic(
+ MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
+ DebugLoc TestLoc, MachineInstr &MI, MachineOperand &FlagUse,
+ CondRegArray &CondRegs) {
+ // Arithmetic is either reading CF or OF. Figure out which condition we need
+ // to preserve in a register.
+ X86::CondCode Cond;
+
+ // The addend to use to reset CF or OF when added to the flag value.
+ int Addend;
+
+ switch (getMnemonicFromOpcode(MI.getOpcode())) {
+ case FlagArithMnemonic::ADC:
+ case FlagArithMnemonic::ADCX:
+ case FlagArithMnemonic::RCL:
+ case FlagArithMnemonic::RCR:
+ case FlagArithMnemonic::SBB:
+ Cond = X86::COND_B; // CF == 1
+ // Set up an addend that when one is added will need a carry due to not
+ // having a higher bit available.
+ Addend = 255;
+ break;
+
+ case FlagArithMnemonic::ADOX:
+ Cond = X86::COND_O; // OF == 1
+ // Set up an addend that when one is added will turn from positive to
+ // negative and thus overflow in the signed domain.
+ Addend = 127;
+ break;
+ }
+
+ // Now get a register that contains the value of the flag input to the
+ // arithmetic. We require exactly this flag to simplify the arithmetic
+ // required to materialize it back into the flag.
+ unsigned &CondReg = CondRegs[Cond];
+ if (!CondReg)
+ CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond);
+
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ // Insert an instruction that will set the flag back to the desired value.
+ unsigned TmpReg = MRI->createVirtualRegister(PromoteRC);
+ auto AddI =
+ BuildMI(MBB, MI.getIterator(), MI.getDebugLoc(), TII->get(X86::ADD8ri))
+ .addDef(TmpReg, RegState::Dead)
+ .addReg(CondReg)
+ .addImm(Addend);
+ (void)AddI;
+ DEBUG(dbgs() << " add cond: "; AddI->dump());
+ ++NumAddsInserted;
+ FlagUse.setIsKill(true);
+}
+
+void X86FlagsCopyLoweringPass::rewriteCMov(MachineBasicBlock &TestMBB,
+ MachineBasicBlock::iterator TestPos,
+ DebugLoc TestLoc,
+ MachineInstr &CMovI,
+ MachineOperand &FlagUse,
+ CondRegArray &CondRegs) {
+ // First get the register containing this specific condition.
+ X86::CondCode Cond = X86::getCondFromCMovOpc(CMovI.getOpcode());
+ unsigned CondReg;
+ bool Inverted;
+ std::tie(CondReg, Inverted) =
+ getCondOrInverseInReg(TestMBB, TestPos, TestLoc, Cond, CondRegs);
+
+ MachineBasicBlock &MBB = *CMovI.getParent();
+
+ // Insert a direct test of the saved register.
+ insertTest(MBB, CMovI.getIterator(), CMovI.getDebugLoc(), CondReg);
+
+ // Rewrite the CMov to use the !ZF flag from the test (but match register
+ // size and memory operand), and then kill its use of the flags afterward.
+ auto &CMovRC = *MRI->getRegClass(CMovI.getOperand(0).getReg());
+ CMovI.setDesc(TII->get(X86::getCMovFromCond(
+ Inverted ? X86::COND_E : X86::COND_NE, TRI->getRegSizeInBits(CMovRC) / 8,
+ !CMovI.memoperands_empty())));
+ FlagUse.setIsKill(true);
+ DEBUG(dbgs() << " fixed cmov: "; CMovI.dump());
+}
+
+void X86FlagsCopyLoweringPass::rewriteCondJmp(
+ MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
+ DebugLoc TestLoc, MachineInstr &JmpI, CondRegArray &CondRegs) {
+ // First get the register containing this specific condition.
+ X86::CondCode Cond = X86::getCondFromBranchOpc(JmpI.getOpcode());
+ unsigned CondReg;
+ bool Inverted;
+ std::tie(CondReg, Inverted) =
+ getCondOrInverseInReg(TestMBB, TestPos, TestLoc, Cond, CondRegs);
+
+ MachineBasicBlock &JmpMBB = *JmpI.getParent();
+
+ // Insert a direct test of the saved register.
+ insertTest(JmpMBB, JmpI.getIterator(), JmpI.getDebugLoc(), CondReg);
+
+ // Rewrite the jump to use the !ZF flag from the test, and kill its use of
+ // flags afterward.
+ JmpI.setDesc(TII->get(
+ X86::GetCondBranchFromCond(Inverted ? X86::COND_E : X86::COND_NE)));
+ const int ImplicitEFLAGSOpIdx = 1;
+ JmpI.getOperand(ImplicitEFLAGSOpIdx).setIsKill(true);
+ DEBUG(dbgs() << " fixed jCC: "; JmpI.dump());
+}
+
+void X86FlagsCopyLoweringPass::rewriteCopy(MachineInstr &MI,
+ MachineOperand &FlagUse,
+ MachineInstr &CopyDefI) {
+ // Just replace this copy with the the original copy def.
+ MRI->replaceRegWith(MI.getOperand(0).getReg(),
+ CopyDefI.getOperand(0).getReg());
+ MI.eraseFromParent();
+}
+
+void X86FlagsCopyLoweringPass::rewriteSetCarryExtended(
+ MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
+ DebugLoc TestLoc, MachineInstr &SetBI, MachineOperand &FlagUse,
+ CondRegArray &CondRegs) {
+ // This routine is only used to handle pseudos for setting a register to zero
+ // or all ones based on CF. This is essentially the sign extended from 1-bit
+ // form of SETB and modeled with the SETB_C* pseudos. They require special
+ // handling as they aren't normal SETcc instructions and are lowered to an
+ // EFLAGS clobbering operation (SBB typically). One simplifying aspect is that
+ // they are only provided in reg-defining forms. A complicating factor is that
+ // they can define many different register widths.
+ assert(SetBI.getOperand(0).isReg() &&
+ "Cannot have a non-register defined operand to this variant of SETB!");
+
+ // Little helper to do the common final step of replacing the register def'ed
+ // by this SETB instruction with a new register and removing the SETB
+ // instruction.
+ auto RewriteToReg = [&](unsigned Reg) {
+ MRI->replaceRegWith(SetBI.getOperand(0).getReg(), Reg);
+ SetBI.eraseFromParent();
+ };
+
+ // Grab the register class used for this particular instruction.
+ auto &SetBRC = *MRI->getRegClass(SetBI.getOperand(0).getReg());
+
+ MachineBasicBlock &MBB = *SetBI.getParent();
+ auto SetPos = SetBI.getIterator();
+ auto SetLoc = SetBI.getDebugLoc();
+
+ auto AdjustReg = [&](unsigned Reg) {
+ auto &OrigRC = *MRI->getRegClass(Reg);
+ if (&OrigRC == &SetBRC)
+ return Reg;
+
+ unsigned NewReg;
+
+ int OrigRegSize = TRI->getRegSizeInBits(OrigRC) / 8;
+ int TargetRegSize = TRI->getRegSizeInBits(SetBRC) / 8;
+ assert(OrigRegSize <= 8 && "No GPRs larger than 64-bits!");
+ assert(TargetRegSize <= 8 && "No GPRs larger than 64-bits!");
+ int SubRegIdx[] = {X86::NoSubRegister, X86::sub_8bit, X86::sub_16bit,
+ X86::NoSubRegister, X86::sub_32bit};
+
+ // If the original size is smaller than the target *and* is smaller than 4
+ // bytes, we need to explicitly zero extend it. We always extend to 4-bytes
+ // to maximize the chance of being able to CSE that operation and to avoid
+ // partial dependency stalls extending to 2-bytes.
+ if (OrigRegSize < TargetRegSize && OrigRegSize < 4) {
+ NewReg = MRI->createVirtualRegister(&X86::GR32RegClass);
+ BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOVZX32rr8), NewReg)
+ .addReg(Reg);
+ if (&SetBRC == &X86::GR32RegClass)
+ return NewReg;
+ Reg = NewReg;
+ OrigRegSize = 4;
+ }
+
+ NewReg = MRI->createVirtualRegister(&SetBRC);
+ if (OrigRegSize < TargetRegSize) {
+ BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::SUBREG_TO_REG),
+ NewReg)
+ .addImm(0)
+ .addReg(Reg)
+ .addImm(SubRegIdx[OrigRegSize]);
+ } else if (OrigRegSize > TargetRegSize) {
+ BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::EXTRACT_SUBREG),
+ NewReg)
+ .addReg(Reg)
+ .addImm(SubRegIdx[TargetRegSize]);
+ } else {
+ BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY), NewReg)
+ .addReg(Reg);
+ }
+ return NewReg;
+ };
+
+ unsigned &CondReg = CondRegs[X86::COND_B];
+ if (!CondReg)
+ CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, X86::COND_B);
+
+ // Adjust the condition to have the desired register width by zero-extending
+ // as needed.
+ // FIXME: We should use a better API to avoid the local reference and using a
+ // different variable here.
+ unsigned ExtCondReg = AdjustReg(CondReg);
+
+ // Now we need to turn this into a bitmask. We do this by subtracting it from
+ // zero.
+ unsigned ZeroReg = MRI->createVirtualRegister(&X86::GR32RegClass);
+ BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOV32r0), ZeroReg);
+ ZeroReg = AdjustReg(ZeroReg);
+
+ unsigned Sub;
+ switch (SetBI.getOpcode()) {
+ case X86::SETB_C8r:
+ Sub = X86::SUB8rr;
+ break;
+
+ case X86::SETB_C16r:
+ Sub = X86::SUB16rr;
+ break;
+
+ case X86::SETB_C32r:
+ Sub = X86::SUB32rr;
+ break;
+
+ case X86::SETB_C64r:
+ Sub = X86::SUB64rr;
+ break;
+
+ default:
+ llvm_unreachable("Invalid SETB_C* opcode!");
+ }
+ unsigned ResultReg = MRI->createVirtualRegister(&SetBRC);
+ BuildMI(MBB, SetPos, SetLoc, TII->get(Sub), ResultReg)
+ .addReg(ZeroReg)
+ .addReg(ExtCondReg);
+ return RewriteToReg(ResultReg);
+}
+
+void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB,
+ MachineBasicBlock::iterator TestPos,
+ DebugLoc TestLoc,
+ MachineInstr &SetCCI,
+ MachineOperand &FlagUse,
+ CondRegArray &CondRegs) {
+ X86::CondCode Cond = X86::getCondFromSETOpc(SetCCI.getOpcode());
+ // Note that we can't usefully rewrite this to the inverse without complex
+ // analysis of the users of the setCC. Largely we rely on duplicates which
+ // could have been avoided already being avoided here.
+ unsigned &CondReg = CondRegs[Cond];
+ if (!CondReg)
+ CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond);
+
+ // Rewriting a register def is trivial: we just replace the register and
+ // remove the setcc.
+ if (!SetCCI.mayStore()) {
+ assert(SetCCI.getOperand(0).isReg() &&
+ "Cannot have a non-register defined operand to SETcc!");
+ MRI->replaceRegWith(SetCCI.getOperand(0).getReg(), CondReg);
+ SetCCI.eraseFromParent();
+ return;
+ }
+
+ // Otherwise, we need to emit a store.
+ auto MIB = BuildMI(*SetCCI.getParent(), SetCCI.getIterator(),
+ SetCCI.getDebugLoc(), TII->get(X86::MOV8mr));
+ // Copy the address operands.
+ for (int i = 0; i < X86::AddrNumOperands; ++i)
+ MIB.add(SetCCI.getOperand(i));
+
+ MIB.addReg(CondReg);
+
+ MIB->setMemRefs(SetCCI.memoperands_begin(), SetCCI.memoperands_end());
+
+ SetCCI.eraseFromParent();
+ return;
+}
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 10e19f92b4a6..c1ddb771e2fa 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -27781,11 +27781,16 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MI.getOpcode() == X86::RDFLAGS32 ? X86::PUSHF32 : X86::PUSHF64;
unsigned Pop = MI.getOpcode() == X86::RDFLAGS32 ? X86::POP32r : X86::POP64r;
MachineInstr *Push = BuildMI(*BB, MI, DL, TII->get(PushF));
- // Permit reads of the FLAGS register without it being defined.
+ // Permit reads of the EFLAGS and DF registers without them being defined.
// This intrinsic exists to read external processor state in flags, such as
// the trap flag, interrupt flag, and direction flag, none of which are
// modeled by the backend.
+ assert(Push->getOperand(2).getReg() == X86::EFLAGS &&
+ "Unexpected register in operand!");
Push->getOperand(2).setIsUndef();
+ assert(Push->getOperand(3).getReg() == X86::DF &&
+ "Unexpected register in operand!");
+ Push->getOperand(3).setIsUndef();
BuildMI(*BB, MI, DL, TII->get(Pop), MI.getOperand(0).getReg());
MI.eraseFromParent(); // The pseudo is gone now.
@@ -37829,25 +37834,6 @@ bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
}
}
-/// This function checks if any of the users of EFLAGS copies the EFLAGS. We
-/// know that the code that lowers COPY of EFLAGS has to use the stack, and if
-/// we don't adjust the stack we clobber the first frame index.
-/// See X86InstrInfo::copyPhysReg.
-static bool hasCopyImplyingStackAdjustment(const MachineFunction &MF) {
- const MachineRegisterInfo &MRI = MF.getRegInfo();
- return any_of(MRI.reg_instructions(X86::EFLAGS),
- [](const MachineInstr &RI) { return RI.isCopy(); });
-}
-
-void X86TargetLowering::finalizeLowering(MachineFunction &MF) const {
- if (hasCopyImplyingStackAdjustment(MF)) {
- MachineFrameInfo &MFI = MF.getFrameInfo();
- MFI.setHasCopyImplyingStackAdjustment(true);
- }
-
- TargetLoweringBase::finalizeLowering(MF);
-}
-
/// This method query the target whether it is beneficial for dag combiner to
/// promote the specified node. If true, it should return the desired promotion
/// type by reference.
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 3aa9d01bff20..7820c3e032e5 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -1099,9 +1099,6 @@ namespace llvm {
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
unsigned Factor) const override;
-
- void finalizeLowering(MachineFunction &MF) const override;
-
protected:
std::pair<const TargetRegisterClass *, uint8_t>
findRepresentativeClass(const TargetRegisterInfo *TRI,
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index d09deb5b7584..98cc8fb7439e 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -1334,7 +1334,7 @@ let Predicates = [HasBMI2] in {
}
//===----------------------------------------------------------------------===//
-// ADCX Instruction
+// ADCX and ADOX Instructions
//
let Predicates = [HasADX], Defs = [EFLAGS], Uses = [EFLAGS],
Constraints = "$src0 = $dst", AddedComplexity = 10 in {
@@ -1349,6 +1349,15 @@ let Predicates = [HasADX], Defs = [EFLAGS], Uses = [EFLAGS],
[(set GR64:$dst, EFLAGS,
(X86adc_flag GR64:$src0, GR64:$src, EFLAGS))],
IIC_BIN_CARRY_NONMEM>, T8PD;
+
+ // We don't have patterns for ADOX yet.
+ let hasSideEffects = 0 in {
+ def ADOX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src0, GR32:$src),
+ "adox{l}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8XS;
+
+ def ADOX64rr : RI<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src0, GR64:$src),
+ "adox{q}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8XS;
+ } // hasSideEffects = 0
} // SchedRW
let mayLoad = 1, SchedRW = [WriteALULd] in {
@@ -1363,27 +1372,14 @@ let Predicates = [HasADX], Defs = [EFLAGS], Uses = [EFLAGS],
[(set GR64:$dst, EFLAGS,
(X86adc_flag GR64:$src0, (loadi64 addr:$src), EFLAGS))],
IIC_BIN_CARRY_MEM>, T8PD;
- }
-}
-//===----------------------------------------------------------------------===//
-// ADOX Instruction
-//
-let Predicates = [HasADX], hasSideEffects = 0, Defs = [EFLAGS],
- Uses = [EFLAGS] in {
- let SchedRW = [WriteALU] in {
- def ADOX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "adox{l}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8XS;
-
- def ADOX64rr : RI<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
- "adox{q}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8XS;
- } // SchedRW
-
- let mayLoad = 1, SchedRW = [WriteALULd] in {
- def ADOX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ // We don't have patterns for ADOX yet.
+ let hasSideEffects = 0 in {
+ def ADOX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src0, i32mem:$src),
"adox{l}\t{$src, $dst|$dst, $src}", [], IIC_BIN_MEM>, T8XS;
- def ADOX64rm : RI<0xF6, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ def ADOX64rm : RI<0xF6, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src0, i64mem:$src),
"adox{q}\t{$src, $dst|$dst, $src}", [], IIC_BIN_MEM>, T8XS;
}
+ } // hasSideEffects = 0
}
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index d66d9258e96f..b3371c96cc29 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -473,7 +473,7 @@ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7,
ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7,
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS, DF],
usesCustomInserter = 1, Uses = [ESP, SSP] in {
def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
"# TLS_addr32",
@@ -493,7 +493,7 @@ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7,
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS, DF],
usesCustomInserter = 1, Uses = [RSP, SSP] in {
def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
"# TLS_addr64",
@@ -509,7 +509,7 @@ def TLS_base_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
// For i386, the address of the thunk is passed on the stack, on return the
// address of the variable is in %eax. %ecx is trashed during the function
// call. All other registers are preserved.
-let Defs = [EAX, ECX, EFLAGS],
+let Defs = [EAX, ECX, EFLAGS, DF],
Uses = [ESP, SSP],
usesCustomInserter = 1 in
def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
@@ -522,7 +522,7 @@ def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
// %rdi. The lowering will do the right thing with RDI.
// On return the address of the variable is in %rax. All other
// registers are preserved.
-let Defs = [RAX, EFLAGS],
+let Defs = [RAX, EFLAGS, DF],
Uses = [RSP, SSP],
usesCustomInserter = 1 in
def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 7ca1c58184f6..11ada51a8704 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -5782,7 +5782,7 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
return false;
}
-static X86::CondCode getCondFromBranchOpc(unsigned BrOpc) {
+X86::CondCode X86::getCondFromBranchOpc(unsigned BrOpc) {
switch (BrOpc) {
default: return X86::COND_INVALID;
case X86::JE_1: return X86::COND_E;
@@ -5805,7 +5805,7 @@ static X86::CondCode getCondFromBranchOpc(unsigned BrOpc) {
}
/// Return condition code of a SET opcode.
-static X86::CondCode getCondFromSETOpc(unsigned Opc) {
+X86::CondCode X86::getCondFromSETOpc(unsigned Opc) {
switch (Opc) {
default: return X86::COND_INVALID;
case X86::SETAr: case X86::SETAm: return X86::COND_A;
@@ -6130,7 +6130,7 @@ void X86InstrInfo::replaceBranchWithTailCall(
if (!I->isBranch())
assert(0 && "Can't find the branch to replace!");
- X86::CondCode CC = getCondFromBranchOpc(I->getOpcode());
+ X86::CondCode CC = X86::getCondFromBranchOpc(I->getOpcode());
assert(BranchCond.size() == 1);
if (CC != BranchCond[0].getImm())
continue;
@@ -6237,7 +6237,7 @@ bool X86InstrInfo::AnalyzeBranchImpl(
}
// Handle conditional branches.
- X86::CondCode BranchCode = getCondFromBranchOpc(I->getOpcode());
+ X86::CondCode BranchCode = X86::getCondFromBranchOpc(I->getOpcode());
if (BranchCode == X86::COND_INVALID)
return true; // Can't handle indirect branch.
@@ -6433,7 +6433,7 @@ unsigned X86InstrInfo::removeBranch(MachineBasicBlock &MBB,
if (I->isDebugValue())
continue;
if (I->getOpcode() != X86::JMP_1 &&
- getCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID)
+ X86::getCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID)
break;
// Remove the branch.
I->eraseFromParent();
@@ -6710,102 +6710,12 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
- bool FromEFLAGS = SrcReg == X86::EFLAGS;
- bool ToEFLAGS = DestReg == X86::EFLAGS;
- int Reg = FromEFLAGS ? DestReg : SrcReg;
- bool is32 = X86::GR32RegClass.contains(Reg);
- bool is64 = X86::GR64RegClass.contains(Reg);
-
- if ((FromEFLAGS || ToEFLAGS) && (is32 || is64)) {
- int Mov = is64 ? X86::MOV64rr : X86::MOV32rr;
- int Push = is64 ? X86::PUSH64r : X86::PUSH32r;
- int PushF = is64 ? X86::PUSHF64 : X86::PUSHF32;
- int Pop = is64 ? X86::POP64r : X86::POP32r;
- int PopF = is64 ? X86::POPF64 : X86::POPF32;
- int AX = is64 ? X86::RAX : X86::EAX;
-
- if (!Subtarget.hasLAHFSAHF()) {
- assert(Subtarget.is64Bit() &&
- "Not having LAHF/SAHF only happens on 64-bit.");
- // Moving EFLAGS to / from another register requires a push and a pop.
- // Notice that we have to adjust the stack if we don't want to clobber the
- // first frame index. See X86FrameLowering.cpp - usesTheStack.
- if (FromEFLAGS) {
- BuildMI(MBB, MI, DL, get(PushF));
- BuildMI(MBB, MI, DL, get(Pop), DestReg);
- }
- if (ToEFLAGS) {
- BuildMI(MBB, MI, DL, get(Push))
- .addReg(SrcReg, getKillRegState(KillSrc));
- BuildMI(MBB, MI, DL, get(PopF));
- }
- return;
- }
-
- // The flags need to be saved, but saving EFLAGS with PUSHF/POPF is
- // inefficient. Instead:
- // - Save the overflow flag OF into AL using SETO, and restore it using a
- // signed 8-bit addition of AL and INT8_MAX.
- // - Save/restore the bottom 8 EFLAGS bits (CF, PF, AF, ZF, SF) to/from AH
- // using LAHF/SAHF.
- // - When RAX/EAX is live and isn't the destination register, make sure it
- // isn't clobbered by PUSH/POP'ing it before and after saving/restoring
- // the flags.
- // This approach is ~2.25x faster than using PUSHF/POPF.
- //
- // This is still somewhat inefficient because we don't know which flags are
- // actually live inside EFLAGS. Were we able to do a single SETcc instead of
- // SETO+LAHF / ADDB+SAHF the code could be 1.02x faster.
- //
- // PUSHF/POPF is also potentially incorrect because it affects other flags
- // such as TF/IF/DF, which LLVM doesn't model.
- //
- // Notice that we have to adjust the stack if we don't want to clobber the
- // first frame index.
- // See X86ISelLowering.cpp - X86::hasCopyImplyingStackAdjustment.
-
- const TargetRegisterInfo &TRI = getRegisterInfo();
- MachineBasicBlock::LivenessQueryResult LQR =
- MBB.computeRegisterLiveness(&TRI, AX, MI);
- // We do not want to save and restore AX if we do not have to.
- // Moreover, if we do so whereas AX is dead, we would need to set
- // an undef flag on the use of AX, otherwise the verifier will
- // complain that we read an undef value.
- // We do not want to change the behavior of the machine verifier
- // as this is usually wrong to read an undef value.
- if (MachineBasicBlock::LQR_Unknown == LQR) {
- LivePhysRegs LPR(TRI);
- LPR.addLiveOuts(MBB);
- MachineBasicBlock::iterator I = MBB.end();
- while (I != MI) {
- --I;
- LPR.stepBackward(*I);
- }
- // AX contains the top most register in the aliasing hierarchy.
- // It may not be live, but one of its aliases may be.
- for (MCRegAliasIterator AI(AX, &TRI, true);
- AI.isValid() && LQR != MachineBasicBlock::LQR_Live; ++AI)
- LQR = LPR.contains(*AI) ? MachineBasicBlock::LQR_Live
- : MachineBasicBlock::LQR_Dead;
- }
- bool AXDead = (Reg == AX) || (MachineBasicBlock::LQR_Dead == LQR);
- if (!AXDead)
- BuildMI(MBB, MI, DL, get(Push)).addReg(AX, getKillRegState(true));
- if (FromEFLAGS) {
- BuildMI(MBB, MI, DL, get(X86::SETOr), X86::AL);
- BuildMI(MBB, MI, DL, get(X86::LAHF));
- BuildMI(MBB, MI, DL, get(Mov), Reg).addReg(AX);
- }
- if (ToEFLAGS) {
- BuildMI(MBB, MI, DL, get(Mov), AX).addReg(Reg, getKillRegState(KillSrc));
- BuildMI(MBB, MI, DL, get(X86::ADD8ri), X86::AL)
- .addReg(X86::AL)
- .addImm(INT8_MAX);
- BuildMI(MBB, MI, DL, get(X86::SAHF));
- }
- if (!AXDead)
- BuildMI(MBB, MI, DL, get(Pop), AX);
- return;
+ if (SrcReg == X86::EFLAGS || DestReg == X86::EFLAGS) {
+ // FIXME: We use a fatal error here because historically LLVM has tried
+ // lower some of these physreg copies and we want to ensure we get
+ // reasonable bug reports if someone encounters a case no other testing
+ // found. This path should be removed after the LLVM 7 release.
+ report_fatal_error("Unable to copy EFLAGS physical register!");
}
DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg)
@@ -7465,9 +7375,9 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
if (IsCmpZero || IsSwapped) {
// We decode the condition code from opcode.
if (Instr.isBranch())
- OldCC = getCondFromBranchOpc(Instr.getOpcode());
+ OldCC = X86::getCondFromBranchOpc(Instr.getOpcode());
else {
- OldCC = getCondFromSETOpc(Instr.getOpcode());
+ OldCC = X86::getCondFromSETOpc(Instr.getOpcode());
if (OldCC != X86::COND_INVALID)
OpcIsSET = true;
else
@@ -9413,8 +9323,9 @@ bool X86InstrInfo::
isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
// FIXME: Return false for x87 stack register classes for now. We can't
// allow any loads of these registers before FpGet_ST0_80.
- return !(RC == &X86::CCRRegClass || RC == &X86::RFP32RegClass ||
- RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass);
+ return !(RC == &X86::CCRRegClass || RC == &X86::DFCCRRegClass ||
+ RC == &X86::RFP32RegClass || RC == &X86::RFP64RegClass ||
+ RC == &X86::RFP80RegClass);
}
/// Return a virtual register initialized with the
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 02a09c340cef..2b5ad934f9b1 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -77,6 +77,12 @@ unsigned getSETFromCond(CondCode CC, bool HasMemoryOperand = false);
unsigned getCMovFromCond(CondCode CC, unsigned RegBytes,
bool HasMemoryOperand = false);
+// Turn jCC opcode into condition code.
+CondCode getCondFromBranchOpc(unsigned Opc);
+
+// Turn setCC opcode into condition code.
+CondCode getCondFromSETOpc(unsigned Opc);
+
// Turn CMov opcode into condition code.
CondCode getCondFromCMovOpc(unsigned Opc);
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index a657b19c08c9..68f40c28d527 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -1235,18 +1235,18 @@ let mayLoad = 1, mayStore = 1, usesCustomInserter = 1,
let mayLoad = 1, mayStore = 1, usesCustomInserter = 1,
SchedRW = [WriteRMW] in {
- let Defs = [ESP, EFLAGS], Uses = [ESP] in
+ let Defs = [ESP, EFLAGS, DF], Uses = [ESP] in
def WRFLAGS32 : PseudoI<(outs), (ins GR32:$src),
[(int_x86_flags_write_u32 GR32:$src)]>,
Requires<[Not64BitMode]>;
- let Defs = [RSP, EFLAGS], Uses = [RSP] in
+ let Defs = [RSP, EFLAGS, DF], Uses = [RSP] in
def WRFLAGS64 : PseudoI<(outs), (ins GR64:$src),
[(int_x86_flags_write_u64 GR64:$src)]>,
Requires<[In64BitMode]>;
}
-let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, hasSideEffects=0,
+let Defs = [ESP, EFLAGS, DF], Uses = [ESP], mayLoad = 1, hasSideEffects=0,
SchedRW = [WriteLoad] in {
def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", [], IIC_POP_F>,
OpSize16;
@@ -1254,7 +1254,7 @@ def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", [], IIC_POP_FD>,
OpSize32, Requires<[Not64BitMode]>;
}
-let Defs = [ESP], Uses = [ESP, EFLAGS], mayStore = 1, hasSideEffects=0,
+let Defs = [ESP], Uses = [ESP, EFLAGS, DF], mayStore = 1, hasSideEffects=0,
SchedRW = [WriteStore] in {
def PUSHF16 : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", [], IIC_PUSH_F>,
OpSize16;
@@ -1294,10 +1294,10 @@ def PUSH64i32 : Ii32S<0x68, RawFrm, (outs), (ins i64i32imm:$imm),
Requires<[In64BitMode]>;
}
-let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, hasSideEffects=0 in
+let Defs = [RSP, EFLAGS, DF], Uses = [RSP], mayLoad = 1, hasSideEffects=0 in
def POPF64 : I<0x9D, RawFrm, (outs), (ins), "popfq", [], IIC_POP_FD>,
OpSize32, Requires<[In64BitMode]>, Sched<[WriteLoad]>;
-let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, hasSideEffects=0 in
+let Defs = [RSP], Uses = [RSP, EFLAGS, DF], mayStore = 1, hasSideEffects=0 in
def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", [], IIC_PUSH_F>,
OpSize32, Requires<[In64BitMode]>, Sched<[WriteStore]>;
@@ -1382,8 +1382,7 @@ def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
} // Defs = [EFLAGS]
let SchedRW = [WriteMicrocoded] in {
-// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
-let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in {
+let Defs = [EDI,ESI], Uses = [EDI,ESI,DF] in {
def MOVSB : I<0xA4, RawFrmDstSrc, (outs), (ins dstidx8:$dst, srcidx8:$src),
"movsb\t{$src, $dst|$dst, $src}", [], IIC_MOVS>;
def MOVSW : I<0xA5, RawFrmDstSrc, (outs), (ins dstidx16:$dst, srcidx16:$src),
@@ -1394,36 +1393,33 @@ def MOVSQ : RI<0xA5, RawFrmDstSrc, (outs), (ins dstidx64:$dst, srcidx64:$src),
"movsq\t{$src, $dst|$dst, $src}", [], IIC_MOVS>;
}
-// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
-let Defs = [EDI], Uses = [AL,EDI,EFLAGS] in
+let Defs = [EDI], Uses = [AL,EDI,DF] in
def STOSB : I<0xAA, RawFrmDst, (outs), (ins dstidx8:$dst),
"stosb\t{%al, $dst|$dst, al}", [], IIC_STOS>;
-let Defs = [EDI], Uses = [AX,EDI,EFLAGS] in
+let Defs = [EDI], Uses = [AX,EDI,DF] in
def STOSW : I<0xAB, RawFrmDst, (outs), (ins dstidx16:$dst),
"stosw\t{%ax, $dst|$dst, ax}", [], IIC_STOS>, OpSize16;
-let Defs = [EDI], Uses = [EAX,EDI,EFLAGS] in
+let Defs = [EDI], Uses = [EAX,EDI,DF] in
def STOSL : I<0xAB, RawFrmDst, (outs), (ins dstidx32:$dst),
"stos{l|d}\t{%eax, $dst|$dst, eax}", [], IIC_STOS>, OpSize32;
-let Defs = [RDI], Uses = [RAX,RDI,EFLAGS] in
+let Defs = [RDI], Uses = [RAX,RDI,DF] in
def STOSQ : RI<0xAB, RawFrmDst, (outs), (ins dstidx64:$dst),
"stosq\t{%rax, $dst|$dst, rax}", [], IIC_STOS>;
-// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
-let Defs = [EDI,EFLAGS], Uses = [AL,EDI,EFLAGS] in
+let Defs = [EDI,EFLAGS], Uses = [AL,EDI,DF] in
def SCASB : I<0xAE, RawFrmDst, (outs), (ins dstidx8:$dst),
"scasb\t{$dst, %al|al, $dst}", [], IIC_SCAS>;
-let Defs = [EDI,EFLAGS], Uses = [AX,EDI,EFLAGS] in
+let Defs = [EDI,EFLAGS], Uses = [AX,EDI,DF] in
def SCASW : I<0xAF, RawFrmDst, (outs), (ins dstidx16:$dst),
"scasw\t{$dst, %ax|ax, $dst}", [], IIC_SCAS>, OpSize16;
-let Defs = [EDI,EFLAGS], Uses = [EAX,EDI,EFLAGS] in
+let Defs = [EDI,EFLAGS], Uses = [EAX,EDI,DF] in
def SCASL : I<0xAF, RawFrmDst, (outs), (ins dstidx32:$dst),
"scas{l|d}\t{$dst, %eax|eax, $dst}", [], IIC_SCAS>, OpSize32;
-let Defs = [EDI,EFLAGS], Uses = [RAX,EDI,EFLAGS] in
+let Defs = [EDI,EFLAGS], Uses = [RAX,EDI,DF] in
def SCASQ : RI<0xAF, RawFrmDst, (outs), (ins dstidx64:$dst),
"scasq\t{$dst, %rax|rax, $dst}", [], IIC_SCAS>;
-// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
-let Defs = [EDI,ESI,EFLAGS], Uses = [EDI,ESI,EFLAGS] in {
+let Defs = [EDI,ESI,EFLAGS], Uses = [EDI,ESI,DF] in {
def CMPSB : I<0xA6, RawFrmDstSrc, (outs), (ins dstidx8:$dst, srcidx8:$src),
"cmpsb\t{$dst, $src|$src, $dst}", [], IIC_CMPS>;
def CMPSW : I<0xA7, RawFrmDstSrc, (outs), (ins dstidx16:$dst, srcidx16:$src),
@@ -2070,8 +2066,7 @@ def DATA32_PREFIX : I<0x66, RawFrm, (outs), (ins), "data32", [], IIC_NOP>,
} // SchedRW
// Repeat string operation instruction prefixes
-// These use the DF flag in the EFLAGS register to inc or dec ECX
-let Defs = [ECX], Uses = [ECX,EFLAGS], SchedRW = [WriteMicrocoded] in {
+let Defs = [ECX], Uses = [ECX,DF], SchedRW = [WriteMicrocoded] in {
// Repeat (used with INS, OUTS, MOVS, LODS and STOS)
def REP_PREFIX : I<0xF3, RawFrm, (outs), (ins), "rep", []>;
// Repeat while not equal (used with CMPS and SCAS)
@@ -2080,24 +2075,22 @@ def REPNE_PREFIX : I<0xF2, RawFrm, (outs), (ins), "repne", []>;
// String manipulation instructions
let SchedRW = [WriteMicrocoded] in {
-// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
-let Defs = [AL,ESI], Uses = [ESI,EFLAGS] in
+let Defs = [AL,ESI], Uses = [ESI,DF] in
def LODSB : I<0xAC, RawFrmSrc, (outs), (ins srcidx8:$src),
"lodsb\t{$src, %al|al, $src}", [], IIC_LODS>;
-let Defs = [AX,ESI], Uses = [ESI,EFLAGS] in
+let Defs = [AX,ESI], Uses = [ESI,DF] in
def LODSW : I<0xAD, RawFrmSrc, (outs), (ins srcidx16:$src),
"lodsw\t{$src, %ax|ax, $src}", [], IIC_LODS>, OpSize16;
-let Defs = [EAX,ESI], Uses = [ESI,EFLAGS] in
+let Defs = [EAX,ESI], Uses = [ESI,DF] in
def LODSL : I<0xAD, RawFrmSrc, (outs), (ins srcidx32:$src),
"lods{l|d}\t{$src, %eax|eax, $src}", [], IIC_LODS>, OpSize32;
-let Defs = [RAX,ESI], Uses = [ESI,EFLAGS] in
+let Defs = [RAX,ESI], Uses = [ESI,DF] in
def LODSQ : RI<0xAD, RawFrmSrc, (outs), (ins srcidx64:$src),
"lodsq\t{$src, %rax|rax, $src}", [], IIC_LODS>;
}
let SchedRW = [WriteSystem] in {
-// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
-let Defs = [ESI], Uses = [DX,ESI,EFLAGS] in {
+let Defs = [ESI], Uses = [DX,ESI,DF] in {
def OUTSB : I<0x6E, RawFrmSrc, (outs), (ins srcidx8:$src),
"outsb\t{$src, %dx|dx, $src}", [], IIC_OUTS>;
def OUTSW : I<0x6F, RawFrmSrc, (outs), (ins srcidx16:$src),
@@ -2106,8 +2099,7 @@ def OUTSL : I<0x6F, RawFrmSrc, (outs), (ins srcidx32:$src),
"outs{l|d}\t{$src, %dx|dx, $src}", [], IIC_OUTS>, OpSize32;
}
-// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
-let Defs = [EDI], Uses = [DX,EDI,EFLAGS] in {
+let Defs = [EDI], Uses = [DX,EDI,DF] in {
def INSB : I<0x6C, RawFrmDst, (outs), (ins dstidx8:$dst),
"insb\t{%dx, $dst|$dst, dx}", [], IIC_INS>;
def INSW : I<0x6D, RawFrmDst, (outs), (ins dstidx16:$dst),
@@ -2117,19 +2109,22 @@ def INSL : I<0x6D, RawFrmDst, (outs), (ins dstidx32:$dst),
}
}
-// Flag instructions
-let SchedRW = [WriteALU] in {
-def CLC : I<0xF8, RawFrm, (outs), (ins), "clc", [], IIC_CLC>;
-def STC : I<0xF9, RawFrm, (outs), (ins), "stc", [], IIC_STC>;
-def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", [], IIC_CLI>;
-def STI : I<0xFB, RawFrm, (outs), (ins), "sti", [], IIC_STI>;
+// EFLAGS management instructions.
+let SchedRW = [WriteALU], Defs = [EFLAGS], Uses = [EFLAGS] in {
+def CLC : I<0xF8, RawFrm, (outs), (ins), "clc", [], IIC_CLC_CMC_STC>;
+def STC : I<0xF9, RawFrm, (outs), (ins), "stc", [], IIC_CLC_CMC_STC>;
+def CMC : I<0xF5, RawFrm, (outs), (ins), "cmc", [], IIC_CLC_CMC_STC>;
+}
+
+// DF management instructions.
+// FIXME: These are a bit more expensive than CLC and STC. We should consider
+// adjusting their schedule bucket.
+let SchedRW = [WriteALU], Defs = [DF] in {
def CLD : I<0xFC, RawFrm, (outs), (ins), "cld", [], IIC_CLD>;
def STD : I<0xFD, RawFrm, (outs), (ins), "std", [], IIC_STD>;
-def CMC : I<0xF5, RawFrm, (outs), (ins), "cmc", [], IIC_CMC>;
-
-def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", [], IIC_CLTS>, TB;
}
+
// Table lookup instructions
let Uses = [AL,EBX], Defs = [AL], hasSideEffects = 0, mayLoad = 1 in
def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", [], IIC_XLAT>,
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
index 40d2dca4f9ec..576f87b13ab4 100644
--- a/lib/Target/X86/X86InstrSystem.td
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -693,6 +693,19 @@ let Uses = [RAX, RBX, RCX, RDX], Defs = [RAX, RBX, RCX] in {
} // SchedRW
//===----------------------------------------------------------------------===//
+// TS flag control instruction.
+let SchedRW = [WriteSystem] in {
+def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", [], IIC_CLTS>, TB;
+}
+
+//===----------------------------------------------------------------------===//
+// IF (inside EFLAGS) management instructions.
+let SchedRW = [WriteSystem], Uses = [EFLAGS], Defs = [EFLAGS] in {
+def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", [], IIC_CLI>;
+def STI : I<0xFB, RawFrm, (outs), (ins), "sti", [], IIC_STI>;
+}
+
+//===----------------------------------------------------------------------===//
// RDPID Instruction
let SchedRW = [WriteSystem] in {
def RDPID32 : I<0xC7, MRM7r, (outs GR32:$src), (ins),
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 2341e1fb0fac..1a776dcd04eb 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -251,9 +251,19 @@ def ST7 : X86Reg<"st(7)", 7>, DwarfRegNum<[40, 19, 18]>;
// Floating-point status word
def FPSW : X86Reg<"fpsw", 0>;
-// Status flags register
+// Status flags register.
+//
+// Note that some flags that are commonly thought of as part of the status
+// flags register are modeled separately. Typically this is due to instructions
+// reading and updating those flags independently of all the others. We don't
+// want to create false dependencies between these instructions and so we use
+// a separate register to model them.
def EFLAGS : X86Reg<"flags", 0>;
+// The direction flag.
+def DF : X86Reg<"DF", 0>;
+
+
// Segment registers
def CS : X86Reg<"cs", 1>;
def DS : X86Reg<"ds", 3>;
@@ -497,6 +507,10 @@ def FPCCR : RegisterClass<"X86", [i16], 16, (add FPSW)> {
let CopyCost = -1; // Don't allow copying of status registers.
let isAllocatable = 0;
}
+def DFCCR : RegisterClass<"X86", [i32], 32, (add DF)> {
+ let CopyCost = -1; // Don't allow copying of status registers.
+ let isAllocatable = 0;
+}
// AVX-512 vector/mask registers.
def VR512 : RegisterClass<"X86", [v16f32, v8f64, v64i8, v32i16, v16i32, v8i64],
diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td
index 2e21a97541b2..078d459634ce 100644
--- a/lib/Target/X86/X86Schedule.td
+++ b/lib/Target/X86/X86Schedule.td
@@ -608,12 +608,10 @@ def IIC_CMPXCHG_8B : InstrItinClass;
def IIC_CMPXCHG_16B : InstrItinClass;
def IIC_LODS : InstrItinClass;
def IIC_OUTS : InstrItinClass;
-def IIC_CLC : InstrItinClass;
+def IIC_CLC_CMC_STC : InstrItinClass;
def IIC_CLD : InstrItinClass;
def IIC_CLI : InstrItinClass;
-def IIC_CMC : InstrItinClass;
def IIC_CLTS : InstrItinClass;
-def IIC_STC : InstrItinClass;
def IIC_STI : InstrItinClass;
def IIC_STD : InstrItinClass;
def IIC_XLAT : InstrItinClass;
diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td
index e052ad98104c..460b9823a7e7 100644
--- a/lib/Target/X86/X86ScheduleAtom.td
+++ b/lib/Target/X86/X86ScheduleAtom.td
@@ -514,12 +514,10 @@ def AtomItineraries : ProcessorItineraries<
InstrItinData<IIC_CMPXCHG_16B, [InstrStage<22, [Port0, Port1]>] >,
InstrItinData<IIC_LODS, [InstrStage<2, [Port0, Port1]>] >,
InstrItinData<IIC_OUTS, [InstrStage<74, [Port0, Port1]>] >,
- InstrItinData<IIC_CLC, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_CLC_CMC_STC, [InstrStage<1, [Port0, Port1]>] >,
InstrItinData<IIC_CLD, [InstrStage<3, [Port0, Port1]>] >,
InstrItinData<IIC_CLI, [InstrStage<14, [Port0, Port1]>] >,
- InstrItinData<IIC_CMC, [InstrStage<1, [Port0, Port1]>] >,
InstrItinData<IIC_CLTS, [InstrStage<33, [Port0, Port1]>] >,
- InstrItinData<IIC_STC, [InstrStage<1, [Port0, Port1]>] >,
InstrItinData<IIC_STI, [InstrStage<17, [Port0, Port1]>] >,
InstrItinData<IIC_STD, [InstrStage<21, [Port0, Port1]>] >,
InstrItinData<IIC_XLAT, [InstrStage<6, [Port0, Port1]>] >,
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index ac242e1c00e0..e41e16d82d83 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -62,6 +62,7 @@ void initializeX86CallFrameOptimizationPass(PassRegistry &);
void initializeX86CmovConverterPassPass(PassRegistry &);
void initializeX86ExecutionDepsFixPass(PassRegistry &);
void initializeX86DomainReassignmentPass(PassRegistry &);
+void initializeX86FlagsCopyLoweringPassPass(PassRegistry &);
} // end namespace llvm
@@ -80,6 +81,7 @@ extern "C" void LLVMInitializeX86Target() {
initializeX86CmovConverterPassPass(PR);
initializeX86ExecutionDepsFixPass(PR);
initializeX86DomainReassignmentPass(PR);
+ initializeX86FlagsCopyLoweringPassPass(PR);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -415,6 +417,7 @@ void X86PassConfig::addPreRegAlloc() {
addPass(createX86CallFrameOptimization());
}
+ addPass(createX86FlagsCopyLoweringPass());
addPass(createX86WinAllocaExpander());
}
void X86PassConfig::addMachineSSAOptimization() {
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index b25cbcad3b9d..76c4a8fbc16e 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -847,10 +847,20 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
if (CS.getInstruction() == nullptr || !CS.isCallee(&U))
return nullptr;
+ // Can't change signature of musttail callee
+ if (CS.isMustTailCall())
+ return nullptr;
+
if (CS.getInstruction()->getParent()->getParent() == F)
isSelfRecursive = true;
}
+ // Can't change signature of musttail caller
+ // FIXME: Support promoting whole chain of musttail functions
+ for (BasicBlock &BB : *F)
+ if (BB.getTerminatingMustTailCall())
+ return nullptr;
+
const DataLayout &DL = F->getParent()->getDataLayout();
AAResults &AAR = AARGetter(*F);
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 5446541550e5..b2afa6f2c9cd 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -507,14 +507,28 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
// MaybeLive. Initialized to a list of RetCount empty lists.
RetUses MaybeLiveRetUses(RetCount);
- for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
- if (const ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()))
+ bool HasMustTailCalls = false;
+
+ for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (const ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
if (RI->getNumOperands() != 0 && RI->getOperand(0)->getType()
!= F.getFunctionType()->getReturnType()) {
// We don't support old style multiple return values.
MarkLive(F);
return;
}
+ }
+
+ // If we have any returns of `musttail` results - the signature can't
+ // change
+ if (BB->getTerminatingMustTailCall() != nullptr)
+ HasMustTailCalls = true;
+ }
+
+ if (HasMustTailCalls) {
+ DEBUG(dbgs() << "DeadArgumentEliminationPass - " << F.getName()
+ << " has musttail calls\n");
+ }
if (!F.hasLocalLinkage() && (!ShouldHackArguments || F.isIntrinsic())) {
MarkLive(F);
@@ -526,6 +540,9 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
// Keep track of the number of live retvals, so we can skip checks once all
// of them turn out to be live.
unsigned NumLiveRetVals = 0;
+
+ bool HasMustTailCallers = false;
+
// Loop all uses of the function.
for (const Use &U : F.uses()) {
// If the function is PASSED IN as an argument, its address has been
@@ -536,6 +553,11 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
return;
}
+ // The number of arguments for `musttail` call must match the number of
+ // arguments of the caller
+ if (CS.isMustTailCall())
+ HasMustTailCallers = true;
+
// If this use is anything other than a call site, the function is alive.
const Instruction *TheCall = CS.getInstruction();
if (!TheCall) { // Not a direct call site?
@@ -580,6 +602,11 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
}
}
+ if (HasMustTailCallers) {
+ DEBUG(dbgs() << "DeadArgumentEliminationPass - " << F.getName()
+ << " has musttail callers\n");
+ }
+
// Now we've inspected all callers, record the liveness of our return values.
for (unsigned i = 0; i != RetCount; ++i)
MarkValue(CreateRet(&F, i), RetValLiveness[i], MaybeLiveRetUses[i]);
@@ -593,12 +620,19 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
for (Function::const_arg_iterator AI = F.arg_begin(),
E = F.arg_end(); AI != E; ++AI, ++i) {
Liveness Result;
- if (F.getFunctionType()->isVarArg()) {
+ if (F.getFunctionType()->isVarArg() || HasMustTailCallers ||
+ HasMustTailCalls) {
// Variadic functions will already have a va_arg function expanded inside
// them, making them potentially very sensitive to ABI changes resulting
// from removing arguments entirely, so don't. For example AArch64 handles
// register and stack HFAs very differently, and this is reflected in the
// IR which has already been generated.
+ //
+ // `musttail` calls to this function restrict argument removal attempts.
+ // The signature of the caller must match the signature of the function.
+ //
+ // `musttail` calls in this function prevents us from changing its
+ // signature
Result = Live;
} else {
// See what the effect of this use is (recording any uses that cause
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 4bb2984e3b47..e0bbf45d316a 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -2099,8 +2099,31 @@ static void RemoveNestAttribute(Function *F) {
/// GHC, or anyregcc.
static bool isProfitableToMakeFastCC(Function *F) {
CallingConv::ID CC = F->getCallingConv();
+
// FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc?
- return CC == CallingConv::C || CC == CallingConv::X86_ThisCall;
+ if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall)
+ return false;
+
+ // FIXME: Change CC for the whole chain of musttail calls when possible.
+ //
+ // Can't change CC of the function that either has musttail calls, or is a
+ // musttail callee itself
+ for (User *U : F->users()) {
+ if (isa<BlockAddress>(U))
+ continue;
+ CallInst* CI = dyn_cast<CallInst>(U);
+ if (!CI)
+ continue;
+
+ if (CI->isMustTailCall())
+ return false;
+ }
+
+ for (BasicBlock &BB : *F)
+ if (BB.getTerminatingMustTailCall())
+ return false;
+
+ return true;
}
static bool
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 76b90391fbb1..8886af90ba65 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -638,6 +638,19 @@ void MergeFunctions::filterInstsUnrelatedToPDI(
DEBUG(dbgs() << " }\n");
}
+// Don't merge tiny functions using a thunk, since it can just end up
+// making the function larger.
+static bool isThunkProfitable(Function * F) {
+ if (F->size() == 1) {
+ if (F->front().size() <= 2) {
+ DEBUG(dbgs() << "isThunkProfitable: " << F->getName()
+ << " is too small to bother creating a thunk for\n");
+ return false;
+ }
+ }
+ return true;
+}
+
// Replace G with a simple tail call to bitcast(F). Also (unless
// MergeFunctionsPDI holds) replace direct uses of G with bitcast(F),
// delete G. Under MergeFunctionsPDI, we use G itself for creating
@@ -647,39 +660,6 @@ void MergeFunctions::filterInstsUnrelatedToPDI(
// For better debugability, under MergeFunctionsPDI, we do not modify G's
// call sites to point to F even when within the same translation unit.
void MergeFunctions::writeThunk(Function *F, Function *G) {
- if (!G->isInterposable() && !MergeFunctionsPDI) {
- if (G->hasGlobalUnnamedAddr()) {
- // G might have been a key in our GlobalNumberState, and it's illegal
- // to replace a key in ValueMap<GlobalValue *> with a non-global.
- GlobalNumbers.erase(G);
- // If G's address is not significant, replace it entirely.
- Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
- G->replaceAllUsesWith(BitcastF);
- } else {
- // Redirect direct callers of G to F. (See note on MergeFunctionsPDI
- // above).
- replaceDirectCallers(G, F);
- }
- }
-
- // If G was internal then we may have replaced all uses of G with F. If so,
- // stop here and delete G. There's no need for a thunk. (See note on
- // MergeFunctionsPDI above).
- if (G->hasLocalLinkage() && G->use_empty() && !MergeFunctionsPDI) {
- G->eraseFromParent();
- return;
- }
-
- // Don't merge tiny functions using a thunk, since it can just end up
- // making the function larger.
- if (F->size() == 1) {
- if (F->front().size() <= 2) {
- DEBUG(dbgs() << "writeThunk: " << F->getName()
- << " is too small to bother creating a thunk for\n");
- return;
- }
- }
-
BasicBlock *GEntryBlock = nullptr;
std::vector<Instruction *> PDIUnrelatedWL;
BasicBlock *BB = nullptr;
@@ -754,6 +734,10 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
if (F->isInterposable()) {
assert(G->isInterposable());
+ if (!isThunkProfitable(F)) {
+ return;
+ }
+
// Make them both thunks to the same internal function.
Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "",
F->getParent());
@@ -770,11 +754,41 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
F->setAlignment(MaxAlignment);
F->setLinkage(GlobalValue::PrivateLinkage);
++NumDoubleWeak;
+ ++NumFunctionsMerged;
} else {
+ // For better debugability, under MergeFunctionsPDI, we do not modify G's
+ // call sites to point to F even when within the same translation unit.
+ if (!G->isInterposable() && !MergeFunctionsPDI) {
+ if (G->hasGlobalUnnamedAddr()) {
+ // G might have been a key in our GlobalNumberState, and it's illegal
+ // to replace a key in ValueMap<GlobalValue *> with a non-global.
+ GlobalNumbers.erase(G);
+ // If G's address is not significant, replace it entirely.
+ Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
+ G->replaceAllUsesWith(BitcastF);
+ } else {
+ // Redirect direct callers of G to F. (See note on MergeFunctionsPDI
+ // above).
+ replaceDirectCallers(G, F);
+ }
+ }
+
+ // If G was internal then we may have replaced all uses of G with F. If so,
+ // stop here and delete G. There's no need for a thunk. (See note on
+ // MergeFunctionsPDI above).
+ if (G->hasLocalLinkage() && G->use_empty() && !MergeFunctionsPDI) {
+ G->eraseFromParent();
+ ++NumFunctionsMerged;
+ return;
+ }
+
+ if (!isThunkProfitable(F)) {
+ return;
+ }
+
writeThunk(F, G);
+ ++NumFunctionsMerged;
}
-
- ++NumFunctionsMerged;
}
/// Replace function F by function G.
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index b332e75c7feb..8fa7d0684b94 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -34,6 +34,7 @@
//===----------------------------------------------------------------------===//
#include "InstCombineInternal.h"
+#include "llvm-c/Initialization.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
@@ -1946,13 +1947,14 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// addrspacecast between types is canonicalized as a bitcast, then an
// addrspacecast. To take advantage of the below bitcast + struct GEP, look
// through the addrspacecast.
+ Value *ASCStrippedPtrOp = PtrOp;
if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(PtrOp)) {
// X = bitcast A addrspace(1)* to B addrspace(1)*
// Y = addrspacecast A addrspace(1)* to B addrspace(2)*
// Z = gep Y, <...constant indices...>
// Into an addrspacecasted GEP of the struct.
if (BitCastInst *BC = dyn_cast<BitCastInst>(ASC->getOperand(0)))
- PtrOp = BC;
+ ASCStrippedPtrOp = BC;
}
/// See if we can simplify:
@@ -1960,7 +1962,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
/// Y = gep X, <...constant indices...>
/// into a gep of the original struct. This is important for SROA and alias
/// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged.
- if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(ASCStrippedPtrOp)) {
Value *Operand = BCI->getOperand(0);
PointerType *OpType = cast<PointerType>(Operand->getType());
unsigned OffsetBits = DL.getPointerTypeSizeInBits(GEP.getType());
diff --git a/lib/Transforms/Scalar/CallSiteSplitting.cpp b/lib/Transforms/Scalar/CallSiteSplitting.cpp
index 4edea7cc3c82..7488cd5af8be 100644
--- a/lib/Transforms/Scalar/CallSiteSplitting.cpp
+++ b/lib/Transforms/Scalar/CallSiteSplitting.cpp
@@ -201,6 +201,46 @@ static bool canSplitCallSite(CallSite CS) {
return CallSiteBB->canSplitPredecessors();
}
+static Instruction *cloneInstForMustTail(Instruction *I, Instruction *Before,
+ Value *V) {
+ Instruction *Copy = I->clone();
+ Copy->setName(I->getName());
+ Copy->insertBefore(Before);
+ if (V)
+ Copy->setOperand(0, V);
+ return Copy;
+}
+
+/// Copy mandatory `musttail` return sequence that follows original `CI`, and
+/// link it up to `NewCI` value instead:
+///
+/// * (optional) `bitcast NewCI to ...`
+/// * `ret bitcast or NewCI`
+///
+/// Insert this sequence right before `SplitBB`'s terminator, which will be
+/// cleaned up later in `splitCallSite` below.
+static void copyMustTailReturn(BasicBlock *SplitBB, Instruction *CI,
+ Instruction *NewCI) {
+ bool IsVoid = SplitBB->getParent()->getReturnType()->isVoidTy();
+ auto II = std::next(CI->getIterator());
+
+ BitCastInst *BCI = dyn_cast<BitCastInst>(&*II);
+ if (BCI)
+ ++II;
+
+ ReturnInst *RI = dyn_cast<ReturnInst>(&*II);
+ assert(RI && "`musttail` call must be followed by `ret` instruction");
+
+ TerminatorInst *TI = SplitBB->getTerminator();
+ Value *V = NewCI;
+ if (BCI)
+ V = cloneInstForMustTail(BCI, TI, V);
+ cloneInstForMustTail(RI, TI, IsVoid ? nullptr : V);
+
+ // FIXME: remove TI here, `DuplicateInstructionsInSplitBetween` has a bug
+ // that prevents doing this now.
+}
+
/// Return true if the CS is split into its new predecessors which are directly
/// hooked to each of its original predecessors pointed by PredBB1 and PredBB2.
/// CallInst1 and CallInst2 will be the new call-sites placed in the new
@@ -245,6 +285,7 @@ static void splitCallSite(CallSite CS, BasicBlock *PredBB1, BasicBlock *PredBB2,
Instruction *CallInst1, Instruction *CallInst2) {
Instruction *Instr = CS.getInstruction();
BasicBlock *TailBB = Instr->getParent();
+ bool IsMustTailCall = CS.isMustTailCall();
assert(Instr == (TailBB->getFirstNonPHIOrDbg()) && "Unexpected call-site");
BasicBlock *SplitBlock1 =
@@ -276,9 +317,14 @@ static void splitCallSite(CallSite CS, BasicBlock *PredBB1, BasicBlock *PredBB2,
++ArgNo;
}
}
+ // Clone and place bitcast and return instructions before `TI`
+ if (IsMustTailCall) {
+ copyMustTailReturn(SplitBlock1, CS.getInstruction(), CallInst1);
+ copyMustTailReturn(SplitBlock2, CS.getInstruction(), CallInst2);
+ }
// Replace users of the original call with a PHI mering call-sites split.
- if (Instr->getNumUses()) {
+ if (!IsMustTailCall && Instr->getNumUses()) {
PHINode *PN = PHINode::Create(Instr->getType(), 2, "phi.call",
TailBB->getFirstNonPHI());
PN->addIncoming(CallInst1, SplitBlock1);
@@ -290,8 +336,25 @@ static void splitCallSite(CallSite CS, BasicBlock *PredBB1, BasicBlock *PredBB2,
<< "\n");
DEBUG(dbgs() << " " << *CallInst2 << " in " << SplitBlock2->getName()
<< "\n");
- Instr->eraseFromParent();
+
NumCallSiteSplit++;
+
+ // FIXME: remove TI in `copyMustTailReturn`
+ if (IsMustTailCall) {
+ // Remove superfluous `br` terminators from the end of the Split blocks
+ // NOTE: Removing terminator removes the SplitBlock from the TailBB's
+ // predecessors. Therefore we must get complete list of Splits before
+ // attempting removal.
+ SmallVector<BasicBlock *, 2> Splits(predecessors((TailBB)));
+ assert(Splits.size() == 2 && "Expected exactly 2 splits!");
+ for (unsigned i = 0; i < Splits.size(); i++)
+ Splits[i]->getTerminator()->eraseFromParent();
+
+ // Erase the tail block once done with musttail patching
+ TailBB->eraseFromParent();
+ return;
+ }
+ Instr->eraseFromParent();
}
// Return true if the call-site has an argument which is a PHI with only
@@ -369,7 +432,17 @@ static bool doCallSiteSplitting(Function &F, TargetLibraryInfo &TLI) {
Function *Callee = CS.getCalledFunction();
if (!Callee || Callee->isDeclaration())
continue;
+
+ // Successful musttail call-site splits result in erased CI and erased BB.
+ // Check if such path is possible before attempting the splitting.
+ bool IsMustTail = CS.isMustTailCall();
+
Changed |= tryToSplitCallSite(CS);
+
+ // There're no interesting instructions after this. The call site
+ // itself might have been erased on splitting.
+ if (IsMustTail)
+ break;
}
}
return Changed;
diff --git a/lib/Transforms/Scalar/DivRemPairs.cpp b/lib/Transforms/Scalar/DivRemPairs.cpp
index e383af89a384..e1bc590c5c9a 100644
--- a/lib/Transforms/Scalar/DivRemPairs.cpp
+++ b/lib/Transforms/Scalar/DivRemPairs.cpp
@@ -13,6 +13,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/DivRemPairs.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -48,7 +50,10 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
// Insert all divide and remainder instructions into maps keyed by their
// operands and opcode (signed or unsigned).
- DenseMap<DivRemMapKey, Instruction *> DivMap, RemMap;
+ DenseMap<DivRemMapKey, Instruction *> DivMap;
+ // Use a MapVector for RemMap so that instructions are moved/inserted in a
+ // deterministic order.
+ MapVector<DivRemMapKey, Instruction *> RemMap;
for (auto &BB : F) {
for (auto &I : BB) {
if (I.getOpcode() == Instruction::SDiv)
@@ -67,14 +72,14 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
// rare than division.
for (auto &RemPair : RemMap) {
// Find the matching division instruction from the division map.
- Instruction *DivInst = DivMap[RemPair.getFirst()];
+ Instruction *DivInst = DivMap[RemPair.first];
if (!DivInst)
continue;
// We have a matching pair of div/rem instructions. If one dominates the
// other, hoist and/or replace one.
NumPairs++;
- Instruction *RemInst = RemPair.getSecond();
+ Instruction *RemInst = RemPair.second;
bool IsSigned = DivInst->getOpcode() == Instruction::SDiv;
bool HasDivRemOp = TTI.hasDivRemOp(DivInst->getType(), IsSigned);
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 141c9938bf8b..2f1645433fb8 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -1454,6 +1454,9 @@ FindMostPopularDest(BasicBlock *BB,
if (PredToDest.second)
DestPopularity[PredToDest.second]++;
+ if (DestPopularity.empty())
+ return nullptr;
+
// Find the most popular dest.
DenseMap<BasicBlock*, unsigned>::iterator DPI = DestPopularity.begin();
BasicBlock *MostPopularDest = DPI->first;
@@ -1629,8 +1632,20 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
// threadable destination (the common case) we can avoid this.
BasicBlock *MostPopularDest = OnlyDest;
- if (MostPopularDest == MultipleDestSentinel)
+ if (MostPopularDest == MultipleDestSentinel) {
+ // Remove any loop headers from the Dest list, ThreadEdge conservatively
+ // won't process them, but we might have other destination that are eligible
+ // and we still want to process.
+ erase_if(PredToDestList,
+ [&](const std::pair<BasicBlock *, BasicBlock *> &PredToDest) {
+ return LoopHeaders.count(PredToDest.second) != 0;
+ });
+
+ if (PredToDestList.empty())
+ return false;
+
MostPopularDest = FindMostPopularDest(BB, PredToDestList);
+ }
// Now that we know what the most popular destination is, factor all
// predecessors that will jump to it into a single predecessor.
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 9dc550ceaeca..3e12649ddedc 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -223,6 +223,10 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
/// represented here for efficient lookup.
SmallPtrSet<Function *, 16> MRVFunctionsTracked;
+ /// MustTailFunctions - Each function here is a callee of non-removable
+ /// musttail call site.
+ SmallPtrSet<Function *, 16> MustTailCallees;
+
/// TrackingIncomingArguments - This is the set of functions for whose
/// arguments we make optimistic assumptions about and try to prove as
/// constants.
@@ -289,6 +293,18 @@ public:
TrackedRetVals.insert(std::make_pair(F, LatticeVal()));
}
+ /// AddMustTailCallee - If the SCCP solver finds that this function is called
+ /// from non-removable musttail call site.
+ void AddMustTailCallee(Function *F) {
+ MustTailCallees.insert(F);
+ }
+
+ /// Returns true if the given function is called from non-removable musttail
+ /// call site.
+ bool isMustTailCallee(Function *F) {
+ return MustTailCallees.count(F);
+ }
+
void AddArgumentTrackedFunction(Function *F) {
TrackingIncomingArguments.insert(F);
}
@@ -358,6 +374,12 @@ public:
return MRVFunctionsTracked;
}
+ /// getMustTailCallees - Get the set of functions which are called
+ /// from non-removable musttail call sites.
+ const SmallPtrSet<Function *, 16> getMustTailCallees() {
+ return MustTailCallees;
+ }
+
/// markOverdefined - Mark the specified value overdefined. This
/// works with both scalars and structs.
void markOverdefined(Value *V) {
@@ -1672,6 +1694,23 @@ static bool tryToReplaceWithConstant(SCCPSolver &Solver, Value *V) {
IV.isConstant() ? IV.getConstant() : UndefValue::get(V->getType());
}
assert(Const && "Constant is nullptr here!");
+
+ // Replacing `musttail` instructions with constant breaks `musttail` invariant
+ // unless the call itself can be removed
+ CallInst *CI = dyn_cast<CallInst>(V);
+ if (CI && CI->isMustTailCall() && !isInstructionTriviallyDead(CI)) {
+ CallSite CS(CI);
+ Function *F = CS.getCalledFunction();
+
+ // Don't zap returns of the callee
+ if (F)
+ Solver.AddMustTailCallee(F);
+
+ DEBUG(dbgs() << " Can\'t treat the result of musttail call : " << *CI
+ << " as a constant\n");
+ return false;
+ }
+
DEBUG(dbgs() << " Constant: " << *Const << " = " << *V << '\n');
// Replaces all of the uses of a variable with uses of the constant.
@@ -1802,10 +1841,26 @@ static void findReturnsToZap(Function &F,
if (!Solver.isArgumentTrackedFunction(&F))
return;
- for (BasicBlock &BB : F)
+ // There is a non-removable musttail call site of this function. Zapping
+ // returns is not allowed.
+ if (Solver.isMustTailCallee(&F)) {
+ DEBUG(dbgs() << "Can't zap returns of the function : " << F.getName()
+ << " due to present musttail call of it\n");
+ return;
+ }
+
+ for (BasicBlock &BB : F) {
+ if (CallInst *CI = BB.getTerminatingMustTailCall()) {
+ DEBUG(dbgs() << "Can't zap return of the block due to present "
+ << "musttail call : " << *CI << "\n");
+ (void)CI;
+ return;
+ }
+
if (auto *RI = dyn_cast<ReturnInst>(BB.getTerminator()))
if (!isa<UndefValue>(RI->getOperand(0)))
ReturnsToZap.push_back(RI);
+ }
}
static bool runIPSCCP(Module &M, const DataLayout &DL,
diff --git a/lib/Transforms/Utils/FunctionComparator.cpp b/lib/Transforms/Utils/FunctionComparator.cpp
index bddcbd86e914..75539428b688 100644
--- a/lib/Transforms/Utils/FunctionComparator.cpp
+++ b/lib/Transforms/Utils/FunctionComparator.cpp
@@ -710,7 +710,7 @@ int FunctionComparator::cmpInlineAsm(const InlineAsm *L,
return Res;
if (int Res = cmpNumbers(L->getDialect(), R->getDialect()))
return Res;
- llvm_unreachable("InlineAsm blocks were not uniqued.");
+ assert(L->getFunctionType() != R->getFunctionType());
return 0;
}
diff --git a/test/Analysis/MemorySSA/pr36883.ll b/test/Analysis/MemorySSA/pr36883.ll
new file mode 100644
index 000000000000..8411b0c228b8
--- /dev/null
+++ b/test/Analysis/MemorySSA/pr36883.ll
@@ -0,0 +1,38 @@
+; RUN: opt -basicaa -memoryssa -analyze < %s 2>&1 -S | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='print<memoryssa>,verify<memoryssa>' -S < %s 2>&1 | FileCheck %s
+;
+; We weren't properly considering the args in callsites in equality or hashing.
+
+target triple = "armv7-dcg-linux-gnueabi"
+
+; CHECK-LABEL: define <8 x i16> @vpx_idct32_32_neon
+define <8 x i16> @vpx_idct32_32_neon(i8* %p, <8 x i16> %v) {
+entry:
+; CHECK: MemoryUse(liveOnEntry)
+ %load1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 2) #4 ; load CSE replacement
+
+; CHECK: 1 = MemoryDef(liveOnEntry)
+ call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %p, <8 x i16> %v, i32 2) #4 ; clobber
+
+ %p_next = getelementptr inbounds i8, i8* %p, i32 16
+; CHECK: MemoryUse(liveOnEntry)
+ %load2 = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p_next, i32 2) #4 ; non-aliasing load needed to trigger bug
+
+; CHECK: MemoryUse(1)
+ %load3 = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 2) #4 ; load CSE removed
+
+ %add = add <8 x i16> %load1, %load2
+ %ret = add <8 x i16> %add, %load3
+ ret <8 x i16> %ret
+}
+
+; Function Attrs: argmemonly nounwind readonly
+declare <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8*, i32) #2
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v8i16(i8*, <8 x i16>, i32) #1
+
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { argmemonly nounwind readonly }
+attributes #3 = { nounwind readnone }
+attributes #4 = { nounwind }
diff --git a/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll b/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll
index 14beb1ae9c36..1032a6d620ba 100644
--- a/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll
+++ b/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll
@@ -28,6 +28,28 @@ return: ; preds = %if.then172, %cond.e
ret void
}
+; Avoid an assert/bad codegen in LD1LANEPOST lowering by not forming
+; LD1LANEPOST ISD nodes with a non-constant lane index.
+define <4 x i32> @f2(i32 *%p, <4 x i1> %m, <4 x i32> %v1, <4 x i32> %v2, i32 %idx) {
+ %L0 = load i32, i32* %p
+ %p1 = getelementptr i32, i32* %p, i64 1
+ %L1 = load i32, i32* %p1
+ %v = select <4 x i1> %m, <4 x i32> %v1, <4 x i32> %v2
+ %vret = insertelement <4 x i32> %v, i32 %L0, i32 %idx
+ store i32 %L1, i32 *%p
+ ret <4 x i32> %vret
+}
+
+; Check that a cycle is avoided during isel between the LD1LANEPOST instruction and the load of %L1.
+define <4 x i32> @f3(i32 *%p, <4 x i1> %m, <4 x i32> %v1, <4 x i32> %v2) {
+ %L0 = load i32, i32* %p
+ %p1 = getelementptr i32, i32* %p, i64 1
+ %L1 = load i32, i32* %p1
+ %v = select <4 x i1> %m, <4 x i32> %v1, <4 x i32> %v2
+ %vret = insertelement <4 x i32> %v, i32 %L0, i32 %L1
+ ret <4 x i32> %vret
+}
+
; Function Attrs: nounwind readnone
declare i64 @llvm.objectsize.i64.p0i8(i8*, i1) #1
diff --git a/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll b/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
index 2fb9d3b2d030..664078fb7e94 100644
--- a/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
+++ b/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
@@ -1,27 +1,31 @@
-; RUN: llc -mtriple=arm64-apple-ios -mcpu=cyclone < %s | FileCheck %s -check-prefix=CYCLONE --check-prefix=ALL
-; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=kryo < %s | FileCheck %s -check-prefix=KRYO --check-prefix=ALL
-; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=falkor < %s | FileCheck %s -check-prefix=FALKOR --check-prefix=ALL
+; RUN: llc -mtriple=arm64-apple-ios -mcpu=cyclone < %s | FileCheck %s -check-prefixes=ALL,CYCLONE
+; RUN: llc -mtriple=arm64-apple-ios -mcpu=cyclone -mattr=+fullfp16 < %s | FileCheck %s -check-prefixes=CYCLONE-FULLFP16
+; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=exynos-m1 < %s | FileCheck %s -check-prefixes=ALL,OTHERS
+; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=exynos-m3 < %s | FileCheck %s -check-prefixes=ALL,OTHERS
+; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=kryo < %s | FileCheck %s -check-prefixes=ALL,OTHERS
+; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=falkor < %s | FileCheck %s -check-prefixes=ALL,OTHERS
-; rdar://11481771
-; rdar://13713797
+declare void @bar(half, float, double, <2 x double>)
+declare void @bari(i32, i32)
+declare void @barl(i64, i64)
+declare void @barf(float, float)
define void @t1() nounwind ssp {
entry:
; ALL-LABEL: t1:
; ALL-NOT: fmov
-; CYCLONE: fmov d0, xzr
-; CYCLONE: fmov d1, xzr
+; ALL: ldr h0,{{.*}}
+; CYCLONE: fmov s1, wzr
; CYCLONE: fmov d2, xzr
-; CYCLONE: fmov d3, xzr
-; KRYO: movi v0.2d, #0000000000000000
-; KRYO: movi v1.2d, #0000000000000000
-; KRYO: movi v2.2d, #0000000000000000
-; KRYO: movi v3.2d, #0000000000000000
-; FALKOR: movi v0.2d, #0000000000000000
-; FALKOR: movi v1.2d, #0000000000000000
-; FALKOR: movi v2.2d, #0000000000000000
-; FALKOR: movi v3.2d, #0000000000000000
- tail call void @bar(double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00) nounwind
+; CYCLONE: movi.16b v3, #0
+; CYCLONE-FULLFP16: fmov h0, wzr
+; CYCLONE-FULLFP16: fmov s1, wzr
+; CYCLONE-FULLFP16: fmov d2, xzr
+; CYCLONE-FULLFP16: movi.16b v3, #0
+; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000
+; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000
+; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000
+ tail call void @bar(half 0.000000e+00, float 0.000000e+00, double 0.000000e+00, <2 x double> <double 0.000000e+00, double 0.000000e+00>) nounwind
ret void
}
@@ -29,8 +33,8 @@ define void @t2() nounwind ssp {
entry:
; ALL-LABEL: t2:
; ALL-NOT: mov w0, wzr
-; ALL: mov w0, #0
-; ALL: mov w1, #0
+; ALL: mov w{{[0-3]+}}, #0
+; ALL: mov w{{[0-3]+}}, #0
tail call void @bari(i32 0, i32 0) nounwind
ret void
}
@@ -39,8 +43,8 @@ define void @t3() nounwind ssp {
entry:
; ALL-LABEL: t3:
; ALL-NOT: mov x0, xzr
-; ALL: mov x0, #0
-; ALL: mov x1, #0
+; ALL: mov x{{[0-3]+}}, #0
+; ALL: mov x{{[0-3]+}}, #0
tail call void @barl(i64 0, i64 0) nounwind
ret void
}
@@ -48,26 +52,21 @@ entry:
define void @t4() nounwind ssp {
; ALL-LABEL: t4:
; ALL-NOT: fmov
-; CYCLONE: fmov s0, wzr
-; CYCLONE: fmov s1, wzr
-; KRYO: movi v0.2d, #0000000000000000
-; KRYO: movi v1.2d, #0000000000000000
-; FALKOR: movi v0.2d, #0000000000000000
-; FALKOR: movi v1.2d, #0000000000000000
+; CYCLONE: fmov s{{[0-3]+}}, wzr
+; CYCLONE: fmov s{{[0-3]+}}, wzr
+; CYCLONE-FULLFP16: fmov s{{[0-3]+}}, wzr
+; CYCLONE-FULLFP16: fmov s{{[0-3]+}}, wzr
+; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000
+; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000
tail call void @barf(float 0.000000e+00, float 0.000000e+00) nounwind
ret void
}
-declare void @bar(double, double, double, double)
-declare void @bari(i32, i32)
-declare void @barl(i64, i64)
-declare void @barf(float, float)
-
; We used to produce spills+reloads for a Q register with zero cycle zeroing
; enabled.
; ALL-LABEL: foo:
-; ALL-NOT: str {{q[0-9]+}}
-; ALL-NOT: ldr {{q[0-9]+}}
+; ALL-NOT: str q{{[0-9]+}}
+; ALL-NOT: ldr q{{[0-9]+}}
define double @foo(i32 %n) {
entry:
br label %for.body
@@ -90,8 +89,7 @@ for.end:
define <2 x i64> @t6() {
; ALL-LABEL: t6:
; CYCLONE: movi.16b v0, #0
-; KRYO: movi v0.2d, #0000000000000000
-; FALKOR: movi v0.2d, #0000000000000000
+; OTHERS: movi v0.2d, #0000000000000000
ret <2 x i64> zeroinitializer
}
diff --git a/test/CodeGen/AArch64/falkor-hwpf-fix.mir b/test/CodeGen/AArch64/falkor-hwpf-fix.mir
index 38622ae0e49a..28b19f877685 100644
--- a/test/CodeGen/AArch64/falkor-hwpf-fix.mir
+++ b/test/CodeGen/AArch64/falkor-hwpf-fix.mir
@@ -353,3 +353,28 @@ body: |
bb.1:
RET_ReallyLR
...
+---
+# Check that non-base registers are considered live when finding a
+# scratch register by making sure we don't use %x2 for the scratch
+# register for the inserted ORRXrs.
+# CHECK-LABEL: name: hwpf_offreg
+# CHECK: %x3 = ORRXrs %xzr, %x1, 0
+# CHECK: %w10 = LDRWroX %x3, %x2, 0, 0
+name: hwpf_offreg
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1, %x2, %x17, %x18
+
+ %w10 = LDRWroX %x1, %x2, 0, 0 :: ("aarch64-strided-access" load 4)
+
+ %x2 = ORRXrs %xzr, %x10, 0
+ %w26 = LDRWroX %x1, %x2, 0, 0
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...
diff --git a/test/CodeGen/AArch64/inlineasm-S-constraint.ll b/test/CodeGen/AArch64/inlineasm-S-constraint.ll
new file mode 100644
index 000000000000..3fb2a3f32cea
--- /dev/null
+++ b/test/CodeGen/AArch64/inlineasm-S-constraint.ll
@@ -0,0 +1,20 @@
+;RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+@var = global i32 0
+define void @test_inline_constraint_S() {
+; CHECK-LABEL: test_inline_constraint_S:
+ call void asm sideeffect "adrp x0, $0", "S"(i32* @var)
+ call void asm sideeffect "add x0, x0, :lo12:$0", "S"(i32* @var)
+; CHECK: adrp x0, var
+; CHECK: add x0, x0, :lo12:var
+ ret void
+}
+define i32 @test_inline_constraint_S_label(i1 %in) {
+; CHECK-LABEL: test_inline_constraint_S_label:
+ call void asm sideeffect "adr x0, $0", "S"(i8* blockaddress(@test_inline_constraint_S_label, %loc))
+; CHECK: adr x0, .Ltmp{{[0-9]+}}
+br i1 %in, label %loc, label %loc2
+loc:
+ ret i32 0
+loc2:
+ ret i32 42
+}
diff --git a/test/CodeGen/AArch64/spill-stack-realignment.mir b/test/CodeGen/AArch64/spill-stack-realignment.mir
new file mode 100644
index 000000000000..fe85f4b64027
--- /dev/null
+++ b/test/CodeGen/AArch64/spill-stack-realignment.mir
@@ -0,0 +1,35 @@
+# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass=prologepilog %s -o - | FileCheck %s
+
+# Ensure references to scavenged stack slots in the CSR area use the
+# FP as a base when the stack pointer must be aligned to something
+# larger than required by the target. This is necessary because the
+# alignment padding area is between the CSR area and the SP, so the SP
+# cannot be used to reference the CSR area.
+name: test
+tracksRegLiveness: true
+frameInfo:
+ maxAlignment: 64
+# CHECK: stack:
+# CHECK: id: 0, name: '', type: default, offset: -64, size: 4, alignment: 64
+# CHECK-NEXT: stack-id: 0
+# CHECK-NEXT: local-offset: -64
+# CHECK: id: 1, name: '', type: default, offset: -20, size: 4, alignment: 4
+# CHECK-NEXT: stack-id: 0
+# CHECK-NEXT: local-offset: -68
+stack:
+ - { id: 0, size: 4, alignment: 64, local-offset: -64 }
+ - { id: 1, size: 4, alignment: 4, local-offset: -68 }
+
+# CHECK: body:
+# CHECK: %sp = ANDXri killed %{{x[0-9]+}}, 7865
+# CHECK: STRSui %s0, %sp, 0
+# CHECK: STURSi %s0, %fp, -4
+body: |
+ bb.0.entry:
+ liveins: %s0
+
+ STRSui %s0, %stack.0, 0
+ STRSui %s0, %stack.1, 0
+ ; Force preserve a CSR to create a hole in the CSR stack region.
+ %x28 = IMPLICIT_DEF
+ RET_ReallyLR
diff --git a/test/CodeGen/AMDGPU/ctpop16.ll b/test/CodeGen/AMDGPU/ctpop16.ll
new file mode 100644
index 000000000000..8236ac07a680
--- /dev/null
+++ b/test/CodeGen/AMDGPU/ctpop16.ll
@@ -0,0 +1,334 @@
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=FUNC -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=FUNC -check-prefix=VI %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s
+
+declare i16 @llvm.ctpop.i16(i16) nounwind readnone
+declare <2 x i16> @llvm.ctpop.v2i16(<2 x i16>) nounwind readnone
+declare <4 x i16> @llvm.ctpop.v4i16(<4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>) nounwind readnone
+declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>) nounwind readnone
+
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+
+; FUNC-LABEL: {{^}}s_ctpop_i16:
+; GCN: s_load_dword [[SVAL:s[0-9]+]],
+; GCN: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[SVAL]]
+; GCN: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; GCN: buffer_store_short [[VRESULT]],
+; GCN: s_endpgm
+
+; EG: BCNT_INT
+define amdgpu_kernel void @s_ctpop_i16(i16 addrspace(1)* noalias %out, i16 %val) nounwind {
+ %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone
+ store i16 %ctpop, i16 addrspace(1)* %out, align 4
+ ret void
+}
+
+; XXX - Why 0 in register?
+; FUNC-LABEL: {{^}}v_ctpop_i16:
+; GCN: {{buffer|flat}}_load_ushort [[VAL:v[0-9]+]],
+; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], 0
+; GCN: buffer_store_short [[RESULT]],
+; GCN: s_endpgm
+
+; EG: BCNT_INT
+define amdgpu_kernel void @v_ctpop_i16(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
+ %val = load i16, i16 addrspace(1)* %in.gep, align 4
+ %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone
+ store i16 %ctpop, i16 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_ctpop_add_chain_i16:
+; SI: buffer_load_ushort [[VAL0:v[0-9]+]],
+; SI: buffer_load_ushort [[VAL1:v[0-9]+]],
+; VI: flat_load_ushort [[VAL0:v[0-9]+]],
+; VI: flat_load_ushort [[VAL1:v[0-9]+]],
+; GCN: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT:v[0-9]+]], [[VAL1]], 0
+; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
+; VI: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
+; GCN: buffer_store_short [[RESULT]],
+; GCN: s_endpgm
+
+; EG: BCNT_INT
+; EG: BCNT_INT
+define amdgpu_kernel void @v_ctpop_add_chain_i16(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in0, i16 addrspace(1)* noalias %in1) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %in0.gep = getelementptr i16, i16 addrspace(1)* %in0, i32 %tid
+ %in1.gep = getelementptr i16, i16 addrspace(1)* %in1, i32 %tid
+ %val0 = load volatile i16, i16 addrspace(1)* %in0.gep, align 4
+ %val1 = load volatile i16, i16 addrspace(1)* %in1.gep, align 4
+ %ctpop0 = call i16 @llvm.ctpop.i16(i16 %val0) nounwind readnone
+ %ctpop1 = call i16 @llvm.ctpop.i16(i16 %val1) nounwind readnone
+ %add = add i16 %ctpop0, %ctpop1
+ store i16 %add, i16 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_ctpop_add_sgpr_i16:
+; GCN: {{buffer|flat}}_load_ushort [[VAL0:v[0-9]+]],
+; GCN: s_waitcnt
+; GCN-NEXT: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}}
+; GCN: buffer_store_short [[RESULT]],
+; GCN: s_endpgm
+define amdgpu_kernel void @v_ctpop_add_sgpr_i16(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in, i16 %sval) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
+ %val = load i16, i16 addrspace(1)* %in.gep, align 4
+ %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone
+ %add = add i16 %ctpop, %sval
+ store i16 %add, i16 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_ctpop_v2i16:
+; GCN: v_bcnt_u32_b32{{(_e64)*}}
+; GCN: v_bcnt_u32_b32{{(_e64)*}}
+; GCN: s_endpgm
+
+; EG: BCNT_INT
+; EG: BCNT_INT
+define amdgpu_kernel void @v_ctpop_v2i16(<2 x i16> addrspace(1)* noalias %out, <2 x i16> addrspace(1)* noalias %in) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %in.gep = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i32 %tid
+ %val = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep, align 8
+ %ctpop = call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> %val) nounwind readnone
+ store <2 x i16> %ctpop, <2 x i16> addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_ctpop_v4i16:
+; GCN: v_bcnt_u32_b32{{(_e64)*}}
+; GCN: v_bcnt_u32_b32{{(_e64)*}}
+; GCN: v_bcnt_u32_b32{{(_e64)*}}
+; GCN: v_bcnt_u32_b32{{(_e64)*}}
+; GCN: s_endpgm
+
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+define amdgpu_kernel void @v_ctpop_v4i16(<4 x i16> addrspace(1)* noalias %out, <4 x i16> addrspace(1)* noalias %in) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %in.gep = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in, i32 %tid
+ %val = load <4 x i16>, <4 x i16> addrspace(1)* %in.gep, align 16
+ %ctpop = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> %val) nounwind readnone
+ store <4 x i16> %ctpop, <4 x i16> addrspace(1)* %out, align 16
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_ctpop_v8i16:
+; GCN: v_bcnt_u32_b32{{(_e64)*}}
+; GCN: v_bcnt_u32_b32{{(_e64)*}}
+; GCN: v_bcnt_u32_b32{{(_e64)*}}
+; GCN: v_bcnt_u32_b32{{(_e64)*}}
+; GCN: v_bcnt_u32_b32{{(_e64)*}}
+; GCN: v_bcnt_u32_b32{{(_e64)*}}
+; GCN: v_bcnt_u32_b32{{(_e64)*}}
+; GCN: v_bcnt_u32_b32{{(_e64)*}}
+; GCN: s_endpgm
+
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+define amdgpu_kernel void @v_ctpop_v8i16(<8 x i16> addrspace(1)* noalias %out, <8 x i16> addrspace(1)* noalias %in) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %in.gep = getelementptr <8 x i16>, <8 x i16> addrspace(1)* %in, i32 %tid
+ %val = load <8 x i16>, <8 x i16> addrspace(1)* %in.gep, align 32
+ %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %val) nounwind readnone
+ store <8 x i16> %ctpop, <8 x i16> addrspace(1)* %out, align 32
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_ctpop_v16i16:
+; GCN: v_bcnt_u32_b32{{(_e64)*}}
+; GCN: v_bcnt_u32_b32{{(_e64)*}}
+; GCN: v_bcnt_u32_b32{{(_e64)*}}
+; GCN: v_bcnt_u32_b32{{(_e64)*}}
+; GCN: v_bcnt_u32_b32{{(_e64)*}}
+; GCN: v_bcnt_u32_b32{{(_e64)*}}
+; GCN: v_bcnt_u32_b32{{(_e64)*}}
+; GCN: v_bcnt_u32_b32{{(_e64)*}}
+; GCN: v_bcnt_u32_b32{{(_e64)*}}
+; GCN: v_bcnt_u32_b32{{(_e64)*}}
+; GCN: v_bcnt_u32_b32{{(_e64)*}}
+; GCN: v_bcnt_u32_b32{{(_e64)*}}
+; GCN: v_bcnt_u32_b32{{(_e64)*}}
+; GCN: v_bcnt_u32_b32{{(_e64)*}}
+; GCN: v_bcnt_u32_b32{{(_e64)*}}
+; GCN: v_bcnt_u32_b32{{(_e64)*}}
+; GCN: s_endpgm
+
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+; EG: BCNT_INT
+define amdgpu_kernel void @v_ctpop_v16i16(<16 x i16> addrspace(1)* noalias %out, <16 x i16> addrspace(1)* noalias %in) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %in.gep = getelementptr <16 x i16>, <16 x i16> addrspace(1)* %in, i32 %tid
+ %val = load <16 x i16>, <16 x i16> addrspace(1)* %in.gep, align 32
+ %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %val) nounwind readnone
+ store <16 x i16> %ctpop, <16 x i16> addrspace(1)* %out, align 32
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_ctpop_i16_add_inline_constant:
+; GCN: {{buffer|flat}}_load_ushort [[VAL:v[0-9]+]],
+; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], 4
+; GCN: buffer_store_short [[RESULT]],
+; GCN: s_endpgm
+
+; EG: BCNT_INT
+define amdgpu_kernel void @v_ctpop_i16_add_inline_constant(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
+ %val = load i16, i16 addrspace(1)* %in.gep, align 4
+ %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone
+ %add = add i16 %ctpop, 4
+ store i16 %add, i16 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_ctpop_i16_add_inline_constant_inv:
+; GCN: {{buffer|flat}}_load_ushort [[VAL:v[0-9]+]],
+; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], 4
+; GCN: buffer_store_short [[RESULT]],
+; GCN: s_endpgm
+
+; EG: BCNT_INT
+define amdgpu_kernel void @v_ctpop_i16_add_inline_constant_inv(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
+ %val = load i16, i16 addrspace(1)* %in.gep, align 4
+ %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone
+ %add = add i16 4, %ctpop
+ store i16 %add, i16 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_ctpop_i16_add_literal:
+; GCN-DAG: {{buffer|flat}}_load_ushort [[VAL:v[0-9]+]],
+; SI-DAG: v_mov_b32_e32 [[LIT:v[0-9]+]], 0x3e7
+; VI-DAG: s_movk_i32 [[LIT:s[0-9]+]], 0x3e7
+; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
+; VI: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
+; GCN: buffer_store_short [[RESULT]],
+; GCN: s_endpgm
+define amdgpu_kernel void @v_ctpop_i16_add_literal(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
+ %val = load i16, i16 addrspace(1)* %in.gep, align 4
+ %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone
+ %add = add i16 %ctpop, 999
+ store i16 %add, i16 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_ctpop_i16_add_var:
+; GCN-DAG: {{buffer|flat}}_load_ushort [[VAL:v[0-9]+]],
+; GCN-DAG: s_load_dword [[VAR:s[0-9]+]],
+; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
+; GCN: buffer_store_short [[RESULT]],
+; GCN: s_endpgm
+
+; EG: BCNT_INT
+define amdgpu_kernel void @v_ctpop_i16_add_var(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in, i16 %const) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
+ %val = load i16, i16 addrspace(1)* %in.gep, align 4
+ %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone
+ %add = add i16 %ctpop, %const
+ store i16 %add, i16 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_ctpop_i16_add_var_inv:
+; GCN-DAG: {{buffer|flat}}_load_ushort [[VAL:v[0-9]+]],
+; GCN-DAG: s_load_dword [[VAR:s[0-9]+]],
+; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
+; GCN: buffer_store_short [[RESULT]],
+; GCN: s_endpgm
+
+; EG: BCNT_INT
+define amdgpu_kernel void @v_ctpop_i16_add_var_inv(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in, i16 %const) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
+ %val = load i16, i16 addrspace(1)* %in.gep, align 4
+ %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone
+ %add = add i16 %const, %ctpop
+ store i16 %add, i16 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_ctpop_i16_add_vvar_inv:
+; SI: buffer_load_ushort [[VAR:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64
+; SI: buffer_load_ushort [[VAL:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64
+; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAR]], [[VAL]]
+; VI: flat_load_ushort [[VAR:v[0-9]+]], v[{{[0-9]+:[0-9]+}}]
+; VI: flat_load_ushort [[VAL:v[0-9]+]], v[{{[0-9]+:[0-9]+}}]
+; VI: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
+; GCN: buffer_store_short [[RESULT]],
+; GCN: s_endpgm
+
+; EG: BCNT_INT
+define amdgpu_kernel void @v_ctpop_i16_add_vvar_inv(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in, i16 addrspace(1)* noalias %constptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
+ %val = load i16, i16 addrspace(1)* %in.gep, align 4
+ %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone
+ %gep = getelementptr i16, i16 addrspace(1)* %constptr, i32 %tid
+ %const = load i16, i16 addrspace(1)* %gep, align 4
+ %add = add i16 %const, %ctpop
+ store i16 %add, i16 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FIXME: We currently disallow SALU instructions in all branches,
+; but there are some cases when the should be allowed.
+
+; FUNC-LABEL: {{^}}ctpop_i16_in_br:
+; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xd
+; VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x34
+; GCN: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[VAL]]
+; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], [[SRESULT]]
+; GCN: buffer_store_short [[RESULT]],
+; GCN: s_endpgm
+; EG: BCNT_INT
+define amdgpu_kernel void @ctpop_i16_in_br(i16 addrspace(1)* %out, i16 addrspace(1)* %in, i16 %ctpop_arg, i16 %cond) {
+entry:
+ %tmp0 = icmp eq i16 %cond, 0
+ br i1 %tmp0, label %if, label %else
+
+if:
+ %tmp2 = call i16 @llvm.ctpop.i16(i16 %ctpop_arg)
+ br label %endif
+
+else:
+ %tmp3 = getelementptr i16, i16 addrspace(1)* %in, i16 1
+ %tmp4 = load i16, i16 addrspace(1)* %tmp3
+ br label %endif
+
+endif:
+ %tmp5 = phi i16 [%tmp2, %if], [%tmp4, %else]
+ store i16 %tmp5, i16 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/ARM/peephole-phi.mir b/test/CodeGen/ARM/peephole-phi.mir
index 30343654dea1..54ae0115840b 100644
--- a/test/CodeGen/ARM/peephole-phi.mir
+++ b/test/CodeGen/ARM/peephole-phi.mir
@@ -65,3 +65,39 @@ body: |
%4:gpr = PHI %0, %bb.1, %2, %bb.2
%5:spr = VMOVSR %4, 14, %noreg
...
+
+# The current implementation doesn't perform any transformations if undef
+# operands are involved.
+# CHECK-LABEL: name: func-undefops
+# CHECK: body: |
+# CHECK: bb.0:
+# CHECK: Bcc %bb.2, 1, undef %cpsr
+#
+# CHECK: bb.1:
+# CHECK: %0:gpr = VMOVRS undef %1:spr, 14, %noreg
+# CHECK: B %bb.3
+#
+# CHECK: bb.2:
+# CHECK: %2:gpr = VMOVRS undef %3:spr, 14, %noreg
+#
+# CHECK: bb.3:
+# CHECK: %4:gpr = PHI %0, %bb.1, %2, %bb.2
+# CHECK: %5:spr = VMOVSR %4, 14, %noreg
+---
+name: func-undefops
+tracksRegLiveness: true
+body: |
+ bb.0:
+ Bcc %bb.2, 1, undef %cpsr
+
+ bb.1:
+ %0:gpr = VMOVRS undef %1:spr, 14, %noreg
+ B %bb.3
+
+ bb.2:
+ %2:gpr = VMOVRS undef %3:spr, 14, %noreg
+
+ bb.3:
+ %4:gpr = PHI %0, %bb.1, %2, %bb.2
+ %5:spr = VMOVSR %4, 14, %noreg
+...
diff --git a/test/CodeGen/Hexagon/ifcvt-diamond-ret.mir b/test/CodeGen/Hexagon/ifcvt-diamond-ret.mir
new file mode 100644
index 000000000000..e896d9aaa9a4
--- /dev/null
+++ b/test/CodeGen/Hexagon/ifcvt-diamond-ret.mir
@@ -0,0 +1,25 @@
+# RUN: llc -march=hexagon -run-pass if-converter %s -o - | FileCheck %s
+
+# Make sure this gets if-converted and it doesn't crash.
+# CHECK-LABEL: bb.0
+# CHECK: PS_jmpret %r31
+# CHECK-NOT: bb.{{[1-9]+}}:
+
+---
+name: fred
+tracksRegLiveness: true
+body: |
+ bb.0:
+ successors: %bb.1, %bb.2
+ liveins: %r0
+ renamable %p0 = C2_cmpeqi killed renamable %r0, 0
+ J2_jumpf killed renamable %p0, %bb.2, implicit-def dead %pc
+
+ bb.1:
+ S4_storeiri_io undef renamable %r0, 0, 32768 :: (store 4 into `i32* undef`)
+ PS_jmpret %r31, implicit-def dead %pc
+
+ bb.2:
+ S4_storeiri_io undef renamable %r0, 0, 32768 :: (store 4 into `i32* undef`)
+ PS_jmpret %r31, implicit-def dead %pc
+...
diff --git a/test/CodeGen/MIR/PowerPC/ifcvt-diamond-ret.mir b/test/CodeGen/MIR/PowerPC/ifcvt-diamond-ret.mir
new file mode 100644
index 000000000000..c63c055c3b31
--- /dev/null
+++ b/test/CodeGen/MIR/PowerPC/ifcvt-diamond-ret.mir
@@ -0,0 +1,34 @@
+# RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -run-pass=if-converter %s -o - | FileCheck %s
+---
+name: foo
+body: |
+ bb.0:
+ liveins: %x0, %x3
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+
+ dead renamable %x3 = ANDIo8 killed renamable %x3, 1, implicit-def dead %cr0, implicit-def %cr0gt
+ %cr2lt = CROR %cr0gt, %cr0gt
+ BCn killed renamable %cr2lt, %bb.2
+ B %bb.1
+
+ bb.1:
+ renamable %x3 = LIS8 4096
+ MTLR8 %x0, implicit-def %lr8
+ BLR8 implicit %lr8, implicit %rm, implicit %x3
+
+ bb.2:
+ renamable %x3 = LIS8 4096
+ MTLR8 %x0, implicit-def %lr8
+ BLR8 implicit %lr8, implicit %rm, implicit %x3
+...
+
+# Diamond testcase with equivalent branches terminating in returns.
+
+# CHECK: body: |
+# CHECK: bb.0:
+# CHECK: dead renamable %x3 = ANDIo8 killed renamable %x3, 1, implicit-def dead %cr0, implicit-def %cr0gt
+# CHECK: %cr2lt = CROR %cr0gt, %cr0gt
+# CHECK: renamable %x3 = LIS8 4096
+# CHECK: MTLR8 %x0, implicit-def %lr8
+# CHECK: BLR8 implicit %lr8, implicit %rm, implicit %x3
+
diff --git a/test/CodeGen/Mips/const-mult.ll b/test/CodeGen/Mips/const-mult.ll
index 459aad61828c..dc4f2f9c862b 100644
--- a/test/CodeGen/Mips/const-mult.ll
+++ b/test/CodeGen/Mips/const-mult.ll
@@ -1,93 +1,626 @@
-; RUN: llc -march=mipsel < %s | FileCheck %s
-; RUN: llc -march=mips64el < %s | FileCheck %s -check-prefixes=CHECK,CHECK64
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=mipsel-mti-linux-gnu < %s | FileCheck %s -check-prefix=MIPS32
+; RUN: llc -mtriple=mips64el-mti-linux-gnu < %s | FileCheck %s -check-prefix=MIPS64
-; CHECK-LABEL: mul5_32:
-; CHECK: sll $[[R0:[0-9]+]], $4, 2
-; CHECK: addu ${{[0-9]+}}, $[[R0]], $4
define i32 @mul5_32(i32 signext %a) {
+; MIPS32-LABEL: mul5_32:
+; MIPS32: # %bb.0: # %entry
+; MIPS32-NEXT: sll $1, $4, 2
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: addu $2, $1, $4
+;
+; MIPS64-LABEL: mul5_32:
+; MIPS64: # %bb.0: # %entry
+; MIPS64-NEXT: sll $1, $4, 2
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: addu $2, $1, $4
entry:
%mul = mul nsw i32 %a, 5
ret i32 %mul
}
-; CHECK-LABEL: mul27_32:
-; CHECK-DAG: sll $[[R0:[0-9]+]], $4, 2
-; CHECK-DAG: addu $[[R1:[0-9]+]], $[[R0]], $4
-; CHECK-DAG: sll $[[R2:[0-9]+]], $4, 5
-; CHECK: subu ${{[0-9]+}}, $[[R2]], $[[R1]]
-
define i32 @mul27_32(i32 signext %a) {
+; MIPS32-LABEL: mul27_32:
+; MIPS32: # %bb.0: # %entry
+; MIPS32-NEXT: sll $1, $4, 2
+; MIPS32-NEXT: addu $1, $1, $4
+; MIPS32-NEXT: sll $2, $4, 5
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: subu $2, $2, $1
+;
+; MIPS64-LABEL: mul27_32:
+; MIPS64: # %bb.0: # %entry
+; MIPS64-NEXT: sll $1, $4, 2
+; MIPS64-NEXT: addu $1, $1, $4
+; MIPS64-NEXT: sll $2, $4, 5
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: subu $2, $2, $1
entry:
%mul = mul nsw i32 %a, 27
ret i32 %mul
}
-; CHECK-LABEL: muln2147483643_32:
-; CHECK-DAG: sll $[[R0:[0-9]+]], $4, 2
-; CHECK-DAG: addu $[[R1:[0-9]+]], $[[R0]], $4
-; CHECK-DAG: sll $[[R2:[0-9]+]], $4, 31
-; CHECK: addu ${{[0-9]+}}, $[[R2]], $[[R1]]
-
define i32 @muln2147483643_32(i32 signext %a) {
+; MIPS32-LABEL: muln2147483643_32:
+; MIPS32: # %bb.0: # %entry
+; MIPS32-NEXT: sll $1, $4, 2
+; MIPS32-NEXT: addu $1, $1, $4
+; MIPS32-NEXT: sll $2, $4, 31
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: addu $2, $2, $1
+;
+; MIPS64-LABEL: muln2147483643_32:
+; MIPS64: # %bb.0: # %entry
+; MIPS64-NEXT: sll $1, $4, 2
+; MIPS64-NEXT: addu $1, $1, $4
+; MIPS64-NEXT: sll $2, $4, 31
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: addu $2, $2, $1
entry:
%mul = mul nsw i32 %a, -2147483643
ret i32 %mul
}
-; CHECK64-LABEL: muln9223372036854775805_64:
-; CHECK64-DAG: dsll $[[R0:[0-9]+]], $4, 1
-; CHECK64-DAG: daddu $[[R1:[0-9]+]], $[[R0]], $4
-; CHECK64-DAG: dsll $[[R2:[0-9]+]], $4, 63
-; CHECK64: daddu ${{[0-9]+}}, $[[R2]], $[[R1]]
-
define i64 @muln9223372036854775805_64(i64 signext %a) {
+; MIPS32-LABEL: muln9223372036854775805_64:
+; MIPS32: # %bb.0: # %entry
+; MIPS32-NEXT: sll $1, $4, 1
+; MIPS32-NEXT: addu $2, $1, $4
+; MIPS32-NEXT: sltu $1, $2, $1
+; MIPS32-NEXT: srl $3, $4, 31
+; MIPS32-NEXT: sll $6, $5, 1
+; MIPS32-NEXT: or $3, $6, $3
+; MIPS32-NEXT: addu $3, $3, $5
+; MIPS32-NEXT: addu $1, $3, $1
+; MIPS32-NEXT: sll $3, $4, 31
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: addu $3, $3, $1
+;
+; MIPS64-LABEL: muln9223372036854775805_64:
+; MIPS64: # %bb.0: # %entry
+; MIPS64-NEXT: dsll $1, $4, 1
+; MIPS64-NEXT: daddu $1, $1, $4
+; MIPS64-NEXT: dsll $2, $4, 63
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: daddu $2, $2, $1
entry:
%mul = mul nsw i64 %a, -9223372036854775805
ret i64 %mul
}
-; CHECK64-LABEL: muln170141183460469231731687303715884105725_128:
-; CHECK64-DAG: dsrl $[[R0:[0-9]+]], $4, 63
-; CHECK64-DAG: dsll $[[R1:[0-9]+]], $5, 1
-; CHECK64-DAG: or $[[R2:[0-9]+]], $[[R1]], $[[R0]]
-; CHECK64-DAG: daddu $[[R3:[0-9]+]], $[[R2]], $5
-; CHECK64-DAG: dsll $[[R4:[0-9]+]], $4, 1
-; CHECK64-DAG: daddu $[[R5:[0-9]+]], $[[R4]], $4
-; CHECK64-DAG: sltu $[[R6:[0-9]+]], $[[R5]], $[[R4]]
-; CHECK64-DAG: dsll $[[R7:[0-9]+]], $[[R6]], 32
-; CHECK64-DAG: dsrl $[[R8:[0-9]+]], $[[R7]], 32
-; CHECK64-DAG: daddu $[[R9:[0-9]+]], $[[R3]], $[[R8]]
-; CHECK64-DAG: dsll $[[R10:[0-9]+]], $4, 63
-; CHECK64: daddu ${{[0-9]+}}, $[[R10]], $[[R9]]
-
define i128 @muln170141183460469231731687303715884105725_128(i128 signext %a) {
+; MIPS32-LABEL: muln170141183460469231731687303715884105725_128:
+; MIPS32: # %bb.0: # %entry
+; MIPS32-NEXT: sll $1, $4, 1
+; MIPS32-NEXT: addu $2, $1, $4
+; MIPS32-NEXT: sltu $1, $2, $1
+; MIPS32-NEXT: srl $3, $4, 31
+; MIPS32-NEXT: sll $8, $5, 1
+; MIPS32-NEXT: or $8, $8, $3
+; MIPS32-NEXT: addu $3, $8, $5
+; MIPS32-NEXT: addu $3, $3, $1
+; MIPS32-NEXT: sltu $9, $3, $8
+; MIPS32-NEXT: xor $8, $3, $8
+; MIPS32-NEXT: movz $9, $1, $8
+; MIPS32-NEXT: srl $1, $5, 31
+; MIPS32-NEXT: sll $5, $6, 1
+; MIPS32-NEXT: or $5, $5, $1
+; MIPS32-NEXT: addu $8, $5, $6
+; MIPS32-NEXT: addu $1, $8, $9
+; MIPS32-NEXT: sltu $5, $8, $5
+; MIPS32-NEXT: srl $6, $6, 31
+; MIPS32-NEXT: sll $9, $7, 1
+; MIPS32-NEXT: or $6, $9, $6
+; MIPS32-NEXT: addu $6, $6, $7
+; MIPS32-NEXT: addu $5, $6, $5
+; MIPS32-NEXT: sll $4, $4, 31
+; MIPS32-NEXT: sltu $6, $1, $8
+; MIPS32-NEXT: addu $5, $5, $6
+; MIPS32-NEXT: addu $5, $4, $5
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: move $4, $1
+;
+; MIPS64-LABEL: muln170141183460469231731687303715884105725_128:
+; MIPS64: # %bb.0: # %entry
+; MIPS64-NEXT: dsrl $1, $4, 63
+; MIPS64-NEXT: dsll $2, $5, 1
+; MIPS64-NEXT: or $1, $2, $1
+; MIPS64-NEXT: daddu $1, $1, $5
+; MIPS64-NEXT: dsll $3, $4, 1
+; MIPS64-NEXT: daddu $2, $3, $4
+; MIPS64-NEXT: sltu $3, $2, $3
+; MIPS64-NEXT: dsll $3, $3, 32
+; MIPS64-NEXT: dsrl $3, $3, 32
+; MIPS64-NEXT: daddu $1, $1, $3
+; MIPS64-NEXT: dsll $3, $4, 63
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: daddu $3, $3, $1
entry:
%mul = mul nsw i128 %a, -170141183460469231731687303715884105725
ret i128 %mul
}
-; CHECK64-LABEL: mul170141183460469231731687303715884105723_128:
-; CHECK64-DAG: dsrl $[[R0:[0-9]+]], $4, 62
-; CHECK64-DAG: dsll $[[R1:[0-9]+]], $5, 2
-; CHECK64-DAG: or $[[R2:[0-9]+]], $[[R1]], $[[R0]]
-; CHECK64-DAG: daddu $[[R3:[0-9]+]], $[[R2]], $5
-; CHECK64-DAG: dsll $[[R4:[0-9]+]], $4, 2
-; CHECK64-DAG: daddu $[[R5:[0-9]+]], $[[R4]], $4
-; CHECK64-DAG: sltu $[[R6:[0-9]+]], $[[R5]], $[[R4]]
-; CHECK64-DAG: dsll $[[R7:[0-9]+]], $[[R6]], 32
-; CHECK64-DAG: dsrl $[[R8:[0-9]+]], $[[R7]], 32
-; CHECK64-DAG: daddu $[[R9:[0-9]+]], $[[R3]], $[[R8]]
-; CHECK64-DAG: dsll $[[R10:[0-9]+]], $4, 63
-; CHECK64-DAG: dsubu $[[R11:[0-9]+]], $[[R10]], $[[R9]]
-; CHECK64-DAG: sltu $[[R12:[0-9]+]], $zero, $[[R5]]
-; CHECK64-DAG: dsll $[[R13:[0-9]+]], $[[R12]], 32
-; CHECK64-DAG: dsrl $[[R14:[0-9]+]], $[[R13]], 32
-; CHECK64-DAG: dsubu $[[R15:[0-9]+]], $[[R11]], $[[R14]]
-; CHECK64: dnegu ${{[0-9]+}}, $[[R5]]
-
define i128 @mul170141183460469231731687303715884105723_128(i128 signext %a) {
+; MIPS32-LABEL: mul170141183460469231731687303715884105723_128:
+; MIPS32: # %bb.0: # %entry
+; MIPS32-NEXT: sll $1, $4, 2
+; MIPS32-NEXT: addu $2, $1, $4
+; MIPS32-NEXT: sltu $1, $2, $1
+; MIPS32-NEXT: srl $3, $4, 30
+; MIPS32-NEXT: sll $8, $5, 2
+; MIPS32-NEXT: or $3, $8, $3
+; MIPS32-NEXT: addu $8, $3, $5
+; MIPS32-NEXT: addu $8, $8, $1
+; MIPS32-NEXT: sltu $9, $8, $3
+; MIPS32-NEXT: xor $3, $8, $3
+; MIPS32-NEXT: sltu $10, $zero, $8
+; MIPS32-NEXT: sltu $11, $zero, $2
+; MIPS32-NEXT: movz $10, $11, $8
+; MIPS32-NEXT: movz $9, $1, $3
+; MIPS32-NEXT: srl $1, $5, 30
+; MIPS32-NEXT: sll $3, $6, 2
+; MIPS32-NEXT: or $1, $3, $1
+; MIPS32-NEXT: addu $3, $1, $6
+; MIPS32-NEXT: addu $5, $3, $9
+; MIPS32-NEXT: sll $4, $4, 31
+; MIPS32-NEXT: negu $9, $5
+; MIPS32-NEXT: sltu $12, $9, $10
+; MIPS32-NEXT: sltu $13, $5, $3
+; MIPS32-NEXT: sltu $1, $3, $1
+; MIPS32-NEXT: srl $3, $6, 30
+; MIPS32-NEXT: sll $6, $7, 2
+; MIPS32-NEXT: or $3, $6, $3
+; MIPS32-NEXT: addu $3, $3, $7
+; MIPS32-NEXT: addu $1, $3, $1
+; MIPS32-NEXT: addu $1, $1, $13
+; MIPS32-NEXT: subu $1, $4, $1
+; MIPS32-NEXT: sltu $3, $zero, $5
+; MIPS32-NEXT: subu $1, $1, $3
+; MIPS32-NEXT: subu $5, $1, $12
+; MIPS32-NEXT: subu $4, $9, $10
+; MIPS32-NEXT: negu $1, $8
+; MIPS32-NEXT: subu $3, $1, $11
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: negu $2, $2
+;
+; MIPS64-LABEL: mul170141183460469231731687303715884105723_128:
+; MIPS64: # %bb.0: # %entry
+; MIPS64-NEXT: dsrl $1, $4, 62
+; MIPS64-NEXT: dsll $2, $5, 2
+; MIPS64-NEXT: or $1, $2, $1
+; MIPS64-NEXT: daddu $1, $1, $5
+; MIPS64-NEXT: dsll $2, $4, 2
+; MIPS64-NEXT: daddu $5, $2, $4
+; MIPS64-NEXT: sltu $2, $5, $2
+; MIPS64-NEXT: dsll $2, $2, 32
+; MIPS64-NEXT: dsrl $2, $2, 32
+; MIPS64-NEXT: daddu $1, $1, $2
+; MIPS64-NEXT: dsll $2, $4, 63
+; MIPS64-NEXT: dsubu $1, $2, $1
+; MIPS64-NEXT: sltu $2, $zero, $5
+; MIPS64-NEXT: dsll $2, $2, 32
+; MIPS64-NEXT: dsrl $2, $2, 32
+; MIPS64-NEXT: dsubu $3, $1, $2
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: dnegu $2, $5
entry:
%mul = mul nsw i128 %a, 170141183460469231731687303715884105723
ret i128 %mul
}
+
+define i32 @mul42949673_32(i32 %a) {
+; MIPS32-LABEL: mul42949673_32:
+; MIPS32: # %bb.0:
+; MIPS32-NEXT: sll $1, $4, 3
+; MIPS32-NEXT: addu $1, $1, $4
+; MIPS32-NEXT: sll $2, $4, 5
+; MIPS32-NEXT: addu $1, $2, $1
+; MIPS32-NEXT: sll $2, $4, 10
+; MIPS32-NEXT: subu $1, $2, $1
+; MIPS32-NEXT: sll $2, $4, 13
+; MIPS32-NEXT: addu $1, $2, $1
+; MIPS32-NEXT: sll $2, $4, 15
+; MIPS32-NEXT: addu $1, $2, $1
+; MIPS32-NEXT: sll $2, $4, 20
+; MIPS32-NEXT: subu $1, $2, $1
+; MIPS32-NEXT: sll $2, $4, 25
+; MIPS32-NEXT: sll $3, $4, 23
+; MIPS32-NEXT: addu $1, $3, $1
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: addu $2, $2, $1
+;
+; MIPS64-LABEL: mul42949673_32:
+; MIPS64: # %bb.0:
+; MIPS64-NEXT: sll $1, $4, 0
+; MIPS64-NEXT: sll $2, $1, 3
+; MIPS64-NEXT: addu $2, $2, $1
+; MIPS64-NEXT: sll $3, $1, 5
+; MIPS64-NEXT: addu $2, $3, $2
+; MIPS64-NEXT: sll $3, $1, 10
+; MIPS64-NEXT: subu $2, $3, $2
+; MIPS64-NEXT: sll $3, $1, 13
+; MIPS64-NEXT: addu $2, $3, $2
+; MIPS64-NEXT: sll $3, $1, 15
+; MIPS64-NEXT: addu $2, $3, $2
+; MIPS64-NEXT: sll $3, $1, 20
+; MIPS64-NEXT: subu $2, $3, $2
+; MIPS64-NEXT: sll $3, $1, 25
+; MIPS64-NEXT: sll $1, $1, 23
+; MIPS64-NEXT: addu $1, $1, $2
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: addu $2, $3, $1
+ %b = mul i32 %a, 42949673
+ ret i32 %b
+}
+
+define i64 @mul42949673_64(i64 %a) {
+; MIPS32-LABEL: mul42949673_64:
+; MIPS32: # %bb.0: # %entry
+; MIPS32-NEXT: lui $1, 655
+; MIPS32-NEXT: ori $1, $1, 23593
+; MIPS32-NEXT: multu $4, $1
+; MIPS32-NEXT: mflo $2
+; MIPS32-NEXT: mfhi $1
+; MIPS32-NEXT: sll $3, $5, 3
+; MIPS32-NEXT: addu $3, $3, $5
+; MIPS32-NEXT: sll $4, $5, 5
+; MIPS32-NEXT: addu $3, $4, $3
+; MIPS32-NEXT: sll $4, $5, 10
+; MIPS32-NEXT: subu $3, $4, $3
+; MIPS32-NEXT: sll $4, $5, 13
+; MIPS32-NEXT: addu $3, $4, $3
+; MIPS32-NEXT: sll $4, $5, 15
+; MIPS32-NEXT: addu $3, $4, $3
+; MIPS32-NEXT: sll $4, $5, 20
+; MIPS32-NEXT: subu $3, $4, $3
+; MIPS32-NEXT: sll $4, $5, 25
+; MIPS32-NEXT: sll $5, $5, 23
+; MIPS32-NEXT: addu $3, $5, $3
+; MIPS32-NEXT: addu $3, $4, $3
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: addu $3, $1, $3
+;
+; MIPS64-LABEL: mul42949673_64:
+; MIPS64: # %bb.0: # %entry
+; MIPS64-NEXT: dsll $1, $4, 3
+; MIPS64-NEXT: daddu $1, $1, $4
+; MIPS64-NEXT: dsll $2, $4, 5
+; MIPS64-NEXT: daddu $1, $2, $1
+; MIPS64-NEXT: dsll $2, $4, 10
+; MIPS64-NEXT: dsubu $1, $2, $1
+; MIPS64-NEXT: dsll $2, $4, 13
+; MIPS64-NEXT: daddu $1, $2, $1
+; MIPS64-NEXT: dsll $2, $4, 15
+; MIPS64-NEXT: daddu $1, $2, $1
+; MIPS64-NEXT: dsll $2, $4, 20
+; MIPS64-NEXT: dsubu $1, $2, $1
+; MIPS64-NEXT: dsll $2, $4, 25
+; MIPS64-NEXT: dsll $3, $4, 23
+; MIPS64-NEXT: daddu $1, $3, $1
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: daddu $2, $2, $1
+entry:
+ %b = mul i64 %a, 42949673
+ ret i64 %b
+}
+
+define i32 @mul22224078_32(i32 %a) {
+; MIPS32-LABEL: mul22224078_32:
+; MIPS32: # %bb.0: # %entry
+; MIPS32-NEXT: sll $1, $4, 1
+; MIPS32-NEXT: sll $2, $4, 4
+; MIPS32-NEXT: subu $1, $2, $1
+; MIPS32-NEXT: sll $2, $4, 6
+; MIPS32-NEXT: subu $1, $2, $1
+; MIPS32-NEXT: sll $2, $4, 8
+; MIPS32-NEXT: subu $1, $2, $1
+; MIPS32-NEXT: sll $2, $4, 10
+; MIPS32-NEXT: subu $1, $2, $1
+; MIPS32-NEXT: sll $2, $4, 13
+; MIPS32-NEXT: subu $1, $2, $1
+; MIPS32-NEXT: sll $2, $4, 16
+; MIPS32-NEXT: subu $1, $2, $1
+; MIPS32-NEXT: sll $2, $4, 24
+; MIPS32-NEXT: sll $3, $4, 22
+; MIPS32-NEXT: sll $5, $4, 20
+; MIPS32-NEXT: sll $4, $4, 18
+; MIPS32-NEXT: subu $1, $4, $1
+; MIPS32-NEXT: addu $1, $5, $1
+; MIPS32-NEXT: addu $1, $3, $1
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: addu $2, $2, $1
+;
+; MIPS64-LABEL: mul22224078_32:
+; MIPS64: # %bb.0: # %entry
+; MIPS64-NEXT: sll $1, $4, 0
+; MIPS64-NEXT: sll $2, $1, 1
+; MIPS64-NEXT: sll $3, $1, 4
+; MIPS64-NEXT: subu $2, $3, $2
+; MIPS64-NEXT: sll $3, $1, 6
+; MIPS64-NEXT: subu $2, $3, $2
+; MIPS64-NEXT: sll $3, $1, 8
+; MIPS64-NEXT: subu $2, $3, $2
+; MIPS64-NEXT: sll $3, $1, 10
+; MIPS64-NEXT: subu $2, $3, $2
+; MIPS64-NEXT: sll $3, $1, 13
+; MIPS64-NEXT: subu $2, $3, $2
+; MIPS64-NEXT: sll $3, $1, 16
+; MIPS64-NEXT: subu $2, $3, $2
+; MIPS64-NEXT: sll $3, $1, 24
+; MIPS64-NEXT: sll $4, $1, 22
+; MIPS64-NEXT: sll $5, $1, 20
+; MIPS64-NEXT: sll $1, $1, 18
+; MIPS64-NEXT: subu $1, $1, $2
+; MIPS64-NEXT: addu $1, $5, $1
+; MIPS64-NEXT: addu $1, $4, $1
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: addu $2, $3, $1
+entry:
+ %b = mul i32 %a, 22224078
+ ret i32 %b
+}
+
+define i64 @mul22224078_64(i64 %a) {
+; MIPS32-LABEL: mul22224078_64:
+; MIPS32: # %bb.0: # %entry
+; MIPS32-NEXT: lui $1, 339
+; MIPS32-NEXT: ori $1, $1, 7374
+; MIPS32-NEXT: multu $4, $1
+; MIPS32-NEXT: mflo $2
+; MIPS32-NEXT: mfhi $1
+; MIPS32-NEXT: sll $3, $5, 1
+; MIPS32-NEXT: sll $4, $5, 4
+; MIPS32-NEXT: subu $3, $4, $3
+; MIPS32-NEXT: sll $4, $5, 6
+; MIPS32-NEXT: subu $3, $4, $3
+; MIPS32-NEXT: sll $4, $5, 8
+; MIPS32-NEXT: subu $3, $4, $3
+; MIPS32-NEXT: sll $4, $5, 10
+; MIPS32-NEXT: subu $3, $4, $3
+; MIPS32-NEXT: sll $4, $5, 13
+; MIPS32-NEXT: subu $3, $4, $3
+; MIPS32-NEXT: sll $4, $5, 16
+; MIPS32-NEXT: subu $3, $4, $3
+; MIPS32-NEXT: sll $4, $5, 24
+; MIPS32-NEXT: sll $6, $5, 22
+; MIPS32-NEXT: sll $7, $5, 20
+; MIPS32-NEXT: sll $5, $5, 18
+; MIPS32-NEXT: subu $3, $5, $3
+; MIPS32-NEXT: addu $3, $7, $3
+; MIPS32-NEXT: addu $3, $6, $3
+; MIPS32-NEXT: addu $3, $4, $3
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: addu $3, $1, $3
+;
+; MIPS64-LABEL: mul22224078_64:
+; MIPS64: # %bb.0: # %entry
+; MIPS64-NEXT: dsll $1, $4, 1
+; MIPS64-NEXT: dsll $2, $4, 4
+; MIPS64-NEXT: dsubu $1, $2, $1
+; MIPS64-NEXT: dsll $2, $4, 6
+; MIPS64-NEXT: dsubu $1, $2, $1
+; MIPS64-NEXT: dsll $2, $4, 8
+; MIPS64-NEXT: dsubu $1, $2, $1
+; MIPS64-NEXT: dsll $2, $4, 10
+; MIPS64-NEXT: dsubu $1, $2, $1
+; MIPS64-NEXT: dsll $2, $4, 13
+; MIPS64-NEXT: dsubu $1, $2, $1
+; MIPS64-NEXT: dsll $2, $4, 16
+; MIPS64-NEXT: dsubu $1, $2, $1
+; MIPS64-NEXT: dsll $2, $4, 24
+; MIPS64-NEXT: dsll $3, $4, 22
+; MIPS64-NEXT: dsll $5, $4, 20
+; MIPS64-NEXT: dsll $4, $4, 18
+; MIPS64-NEXT: dsubu $1, $4, $1
+; MIPS64-NEXT: daddu $1, $5, $1
+; MIPS64-NEXT: daddu $1, $3, $1
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: daddu $2, $2, $1
+entry:
+ %b = mul i64 %a, 22224078
+ ret i64 %b
+}
+
+define i32 @mul22245375_32(i32 %a) {
+; MIPS32-LABEL: mul22245375_32:
+; MIPS32: # %bb.0: # %entry
+; MIPS32-NEXT: sll $1, $4, 12
+; MIPS32-NEXT: addu $1, $1, $4
+; MIPS32-NEXT: sll $2, $4, 15
+; MIPS32-NEXT: addu $1, $2, $1
+; MIPS32-NEXT: sll $2, $4, 18
+; MIPS32-NEXT: subu $1, $2, $1
+; MIPS32-NEXT: sll $2, $4, 20
+; MIPS32-NEXT: addu $1, $2, $1
+; MIPS32-NEXT: sll $2, $4, 22
+; MIPS32-NEXT: addu $1, $2, $1
+; MIPS32-NEXT: sll $2, $4, 24
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: addu $2, $2, $1
+;
+; MIPS64-LABEL: mul22245375_32:
+; MIPS64: # %bb.0: # %entry
+; MIPS64-NEXT: sll $1, $4, 0
+; MIPS64-NEXT: sll $2, $1, 12
+; MIPS64-NEXT: addu $2, $2, $1
+; MIPS64-NEXT: sll $3, $1, 15
+; MIPS64-NEXT: addu $2, $3, $2
+; MIPS64-NEXT: sll $3, $1, 18
+; MIPS64-NEXT: subu $2, $3, $2
+; MIPS64-NEXT: sll $3, $1, 20
+; MIPS64-NEXT: addu $2, $3, $2
+; MIPS64-NEXT: sll $3, $1, 22
+; MIPS64-NEXT: addu $2, $3, $2
+; MIPS64-NEXT: sll $1, $1, 24
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: addu $2, $1, $2
+entry:
+ %b = mul i32 %a, 22245375
+ ret i32 %b
+}
+
+define i64 @mul22245375_64(i64 %a) {
+; MIPS32-LABEL: mul22245375_64:
+; MIPS32: # %bb.0: # %entry
+; MIPS32-NEXT: lui $1, 339
+; MIPS32-NEXT: ori $1, $1, 28671
+; MIPS32-NEXT: multu $4, $1
+; MIPS32-NEXT: mflo $2
+; MIPS32-NEXT: mfhi $1
+; MIPS32-NEXT: sll $3, $5, 12
+; MIPS32-NEXT: addu $3, $3, $5
+; MIPS32-NEXT: sll $4, $5, 15
+; MIPS32-NEXT: addu $3, $4, $3
+; MIPS32-NEXT: sll $4, $5, 18
+; MIPS32-NEXT: subu $3, $4, $3
+; MIPS32-NEXT: sll $4, $5, 20
+; MIPS32-NEXT: addu $3, $4, $3
+; MIPS32-NEXT: sll $4, $5, 22
+; MIPS32-NEXT: addu $3, $4, $3
+; MIPS32-NEXT: sll $4, $5, 24
+; MIPS32-NEXT: addu $3, $4, $3
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: addu $3, $1, $3
+;
+; MIPS64-LABEL: mul22245375_64:
+; MIPS64: # %bb.0: # %entry
+; MIPS64-NEXT: dsll $1, $4, 12
+; MIPS64-NEXT: daddu $1, $1, $4
+; MIPS64-NEXT: dsll $2, $4, 15
+; MIPS64-NEXT: daddu $1, $2, $1
+; MIPS64-NEXT: dsll $2, $4, 18
+; MIPS64-NEXT: dsubu $1, $2, $1
+; MIPS64-NEXT: dsll $2, $4, 20
+; MIPS64-NEXT: daddu $1, $2, $1
+; MIPS64-NEXT: dsll $2, $4, 22
+; MIPS64-NEXT: daddu $1, $2, $1
+; MIPS64-NEXT: dsll $2, $4, 24
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: daddu $2, $2, $1
+entry:
+ %b = mul i64 %a, 22245375
+ ret i64 %b
+}
+
+define i32 @mul25165824_32(i32 %a) {
+; MIPS32-LABEL: mul25165824_32:
+; MIPS32: # %bb.0: # %entry
+; MIPS32-NEXT: sll $1, $4, 12
+; MIPS32-NEXT: addu $1, $1, $4
+; MIPS32-NEXT: sll $2, $4, 15
+; MIPS32-NEXT: addu $1, $2, $1
+; MIPS32-NEXT: sll $2, $4, 18
+; MIPS32-NEXT: subu $1, $2, $1
+; MIPS32-NEXT: sll $2, $4, 20
+; MIPS32-NEXT: addu $1, $2, $1
+; MIPS32-NEXT: sll $2, $4, 22
+; MIPS32-NEXT: addu $1, $2, $1
+; MIPS32-NEXT: sll $2, $4, 24
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: addu $2, $2, $1
+;
+; MIPS64-LABEL: mul25165824_32:
+; MIPS64: # %bb.0: # %entry
+; MIPS64-NEXT: sll $1, $4, 0
+; MIPS64-NEXT: sll $2, $1, 12
+; MIPS64-NEXT: addu $2, $2, $1
+; MIPS64-NEXT: sll $3, $1, 15
+; MIPS64-NEXT: addu $2, $3, $2
+; MIPS64-NEXT: sll $3, $1, 18
+; MIPS64-NEXT: subu $2, $3, $2
+; MIPS64-NEXT: sll $3, $1, 20
+; MIPS64-NEXT: addu $2, $3, $2
+; MIPS64-NEXT: sll $3, $1, 22
+; MIPS64-NEXT: addu $2, $3, $2
+; MIPS64-NEXT: sll $1, $1, 24
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: addu $2, $1, $2
+entry:
+ %b = mul i32 %a, 22245375
+ ret i32 %b
+}
+
+define i64 @mul25165824_64(i64 %a) {
+; MIPS32-LABEL: mul25165824_64:
+; MIPS32: # %bb.0: # %entry
+; MIPS32-NEXT: srl $1, $4, 9
+; MIPS32-NEXT: sll $2, $5, 23
+; MIPS32-NEXT: or $1, $2, $1
+; MIPS32-NEXT: srl $2, $4, 8
+; MIPS32-NEXT: sll $3, $5, 24
+; MIPS32-NEXT: or $2, $3, $2
+; MIPS32-NEXT: addu $1, $2, $1
+; MIPS32-NEXT: sll $2, $4, 23
+; MIPS32-NEXT: sll $3, $4, 24
+; MIPS32-NEXT: addu $2, $3, $2
+; MIPS32-NEXT: sltu $3, $2, $3
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: addu $3, $1, $3
+;
+; MIPS64-LABEL: mul25165824_64:
+; MIPS64: # %bb.0: # %entry
+; MIPS64-NEXT: dsll $1, $4, 23
+; MIPS64-NEXT: dsll $2, $4, 24
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: daddu $2, $2, $1
+entry:
+ %b = mul i64 %a, 25165824
+ ret i64 %b
+}
+
+define i32 @mul33554432_32(i32 %a) {
+; MIPS32-LABEL: mul33554432_32:
+; MIPS32: # %bb.0: # %entry
+; MIPS32-NEXT: sll $1, $4, 12
+; MIPS32-NEXT: addu $1, $1, $4
+; MIPS32-NEXT: sll $2, $4, 15
+; MIPS32-NEXT: addu $1, $2, $1
+; MIPS32-NEXT: sll $2, $4, 18
+; MIPS32-NEXT: subu $1, $2, $1
+; MIPS32-NEXT: sll $2, $4, 20
+; MIPS32-NEXT: addu $1, $2, $1
+; MIPS32-NEXT: sll $2, $4, 22
+; MIPS32-NEXT: addu $1, $2, $1
+; MIPS32-NEXT: sll $2, $4, 24
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: addu $2, $2, $1
+;
+; MIPS64-LABEL: mul33554432_32:
+; MIPS64: # %bb.0: # %entry
+; MIPS64-NEXT: sll $1, $4, 0
+; MIPS64-NEXT: sll $2, $1, 12
+; MIPS64-NEXT: addu $2, $2, $1
+; MIPS64-NEXT: sll $3, $1, 15
+; MIPS64-NEXT: addu $2, $3, $2
+; MIPS64-NEXT: sll $3, $1, 18
+; MIPS64-NEXT: subu $2, $3, $2
+; MIPS64-NEXT: sll $3, $1, 20
+; MIPS64-NEXT: addu $2, $3, $2
+; MIPS64-NEXT: sll $3, $1, 22
+; MIPS64-NEXT: addu $2, $3, $2
+; MIPS64-NEXT: sll $1, $1, 24
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: addu $2, $1, $2
+entry:
+ %b = mul i32 %a, 22245375
+ ret i32 %b
+}
+
+define i64 @mul33554432_64(i64 %a) {
+; MIPS32-LABEL: mul33554432_64:
+; MIPS32: # %bb.0: # %entry
+; MIPS32-NEXT: srl $1, $4, 7
+; MIPS32-NEXT: sll $2, $5, 25
+; MIPS32-NEXT: or $3, $2, $1
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: sll $2, $4, 25
+;
+; MIPS64-LABEL: mul33554432_64:
+; MIPS64: # %bb.0: # %entry
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: dsll $2, $4, 25
+entry:
+ %b = mul i64 %a, 33554432
+ ret i64 %b
+}
diff --git a/test/CodeGen/Mips/indirect-jump-hazard/calls.ll b/test/CodeGen/Mips/indirect-jump-hazard/calls.ll
new file mode 100644
index 000000000000..20e89136d87c
--- /dev/null
+++ b/test/CodeGen/Mips/indirect-jump-hazard/calls.ll
@@ -0,0 +1,188 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=mips-mti-linux-gnu -relocation-model=static \
+; RUN: -mips-tail-calls=1 -mcpu=mips32r2 -mattr=+use-indirect-jump-hazard \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=MIPS32R2
+; RUN: llc < %s -mtriple=mips-img-linux-gnu -relocation-model=static \
+; RUN: -mips-tail-calls=1 -mcpu=mips32r6 -mattr=+use-indirect-jump-hazard \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=MIPS32R6
+; RUN: llc < %s -mtriple=mips64-mti-linux-gnu -relocation-model=static \
+; RUN: -mips-tail-calls=1 -mcpu=mips64r2 -mattr=+use-indirect-jump-hazard \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=MIPS64R2
+; RUN: llc < %s -mtriple=mips64-img-linux-gnu -relocation-model=static \
+; RUN: -mips-tail-calls=1 -mcpu=mips64r6 -mattr=+use-indirect-jump-hazard \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=MIPS64R6
+
+; RUN: llc < %s -mtriple=mips-mti-linux-gnu -relocation-model=pic \
+; RUN: -mips-tail-calls=1 -mcpu=mips32r2 -mattr=+use-indirect-jump-hazard \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=PIC-MIPS32R2
+; RUN: llc < %s -mtriple=mips-img-linux-gnu -relocation-model=pic \
+; RUN: -mips-tail-calls=1 -mcpu=mips32r6 -mattr=+use-indirect-jump-hazard \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=PIC-MIPS32R6
+; RUN: llc < %s -mtriple=mips64-mti-linux-gnu -relocation-model=pic \
+; RUN: -mips-tail-calls=1 -mcpu=mips64r2 -mattr=+use-indirect-jump-hazard \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=PIC-MIPS64R2
+; RUN: llc < %s -mtriple=mips64-img-linux-gnu -relocation-model=pic \
+; RUN: -mips-tail-calls=1 -mcpu=mips64r6 -mattr=+use-indirect-jump-hazard \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=PIC-MIPS64R6
+
+define void @fooNonTail(void (i32)* nocapture %f1) nounwind {
+; MIPS32R2-LABEL: fooNonTail:
+; MIPS32R2: # %bb.0: # %entry
+; MIPS32R2-NEXT: addiu $sp, $sp, -24
+; MIPS32R2-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32R2-NEXT: move $1, $4
+; MIPS32R2-NEXT: move $25, $1
+; MIPS32R2-NEXT: jalr.hb $25
+; MIPS32R2-NEXT: addiu $4, $zero, 13
+; MIPS32R2-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32R2-NEXT: jr $ra
+; MIPS32R2-NEXT: addiu $sp, $sp, 24
+;
+; MIPS32R6-LABEL: fooNonTail:
+; MIPS32R6: # %bb.0: # %entry
+; MIPS32R6-NEXT: addiu $sp, $sp, -24
+; MIPS32R6-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32R6-NEXT: move $1, $4
+; MIPS32R6-NEXT: move $25, $1
+; MIPS32R6-NEXT: jalr.hb $25
+; MIPS32R6-NEXT: addiu $4, $zero, 13
+; MIPS32R6-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32R6-NEXT: jr $ra
+; MIPS32R6-NEXT: addiu $sp, $sp, 24
+;
+; MIPS64R2-LABEL: fooNonTail:
+; MIPS64R2: # %bb.0: # %entry
+; MIPS64R2-NEXT: daddiu $sp, $sp, -16
+; MIPS64R2-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64R2-NEXT: move $1, $4
+; MIPS64R2-NEXT: move $25, $1
+; MIPS64R2-NEXT: jalr.hb $25
+; MIPS64R2-NEXT: daddiu $4, $zero, 13
+; MIPS64R2-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64R2-NEXT: jr $ra
+; MIPS64R2-NEXT: daddiu $sp, $sp, 16
+;
+; MIPS64R6-LABEL: fooNonTail:
+; MIPS64R6: # %bb.0: # %entry
+; MIPS64R6-NEXT: daddiu $sp, $sp, -16
+; MIPS64R6-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64R6-NEXT: move $1, $4
+; MIPS64R6-NEXT: move $25, $1
+; MIPS64R6-NEXT: jalr.hb $25
+; MIPS64R6-NEXT: daddiu $4, $zero, 13
+; MIPS64R6-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64R6-NEXT: jr $ra
+; MIPS64R6-NEXT: daddiu $sp, $sp, 16
+;
+; PIC-MIPS32R2-LABEL: fooNonTail:
+; PIC-MIPS32R2: # %bb.0: # %entry
+; PIC-MIPS32R2-NEXT: addiu $sp, $sp, -24
+; PIC-MIPS32R2-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
+; PIC-MIPS32R2-NEXT: move $1, $4
+; PIC-MIPS32R2-NEXT: move $25, $1
+; PIC-MIPS32R2-NEXT: jalr.hb $25
+; PIC-MIPS32R2-NEXT: addiu $4, $zero, 13
+; PIC-MIPS32R2-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
+; PIC-MIPS32R2-NEXT: jr $ra
+; PIC-MIPS32R2-NEXT: addiu $sp, $sp, 24
+;
+; PIC-MIPS32R6-LABEL: fooNonTail:
+; PIC-MIPS32R6: # %bb.0: # %entry
+; PIC-MIPS32R6-NEXT: addiu $sp, $sp, -24
+; PIC-MIPS32R6-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
+; PIC-MIPS32R6-NEXT: move $1, $4
+; PIC-MIPS32R6-NEXT: move $25, $1
+; PIC-MIPS32R6-NEXT: jalr.hb $25
+; PIC-MIPS32R6-NEXT: addiu $4, $zero, 13
+; PIC-MIPS32R6-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
+; PIC-MIPS32R6-NEXT: jr $ra
+; PIC-MIPS32R6-NEXT: addiu $sp, $sp, 24
+;
+; PIC-MIPS64R2-LABEL: fooNonTail:
+; PIC-MIPS64R2: # %bb.0: # %entry
+; PIC-MIPS64R2-NEXT: daddiu $sp, $sp, -16
+; PIC-MIPS64R2-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
+; PIC-MIPS64R2-NEXT: move $1, $4
+; PIC-MIPS64R2-NEXT: move $25, $1
+; PIC-MIPS64R2-NEXT: jalr.hb $25
+; PIC-MIPS64R2-NEXT: daddiu $4, $zero, 13
+; PIC-MIPS64R2-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
+; PIC-MIPS64R2-NEXT: jr $ra
+; PIC-MIPS64R2-NEXT: daddiu $sp, $sp, 16
+;
+; PIC-MIPS64R6-LABEL: fooNonTail:
+; PIC-MIPS64R6: # %bb.0: # %entry
+; PIC-MIPS64R6-NEXT: daddiu $sp, $sp, -16
+; PIC-MIPS64R6-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
+; PIC-MIPS64R6-NEXT: move $1, $4
+; PIC-MIPS64R6-NEXT: move $25, $1
+; PIC-MIPS64R6-NEXT: jalr.hb $25
+; PIC-MIPS64R6-NEXT: daddiu $4, $zero, 13
+; PIC-MIPS64R6-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
+; PIC-MIPS64R6-NEXT: jr $ra
+; PIC-MIPS64R6-NEXT: daddiu $sp, $sp, 16
+entry:
+ call void %f1(i32 13) nounwind
+ ret void
+}
+
+define i32 @fooTail(i32 (i32)* nocapture %f1) nounwind {
+; MIPS32R2-LABEL: fooTail:
+; MIPS32R2: # %bb.0: # %entry
+; MIPS32R2-NEXT: move $1, $4
+; MIPS32R2-NEXT: move $25, $1
+; MIPS32R2-NEXT: jr.hb $25
+; MIPS32R2-NEXT: addiu $4, $zero, 14
+;
+; MIPS32R6-LABEL: fooTail:
+; MIPS32R6: # %bb.0: # %entry
+; MIPS32R6-NEXT: move $1, $4
+; MIPS32R6-NEXT: move $25, $1
+; MIPS32R6-NEXT: jr.hb $25
+; MIPS32R6-NEXT: addiu $4, $zero, 14
+;
+; MIPS64R2-LABEL: fooTail:
+; MIPS64R2: # %bb.0: # %entry
+; MIPS64R2-NEXT: move $1, $4
+; MIPS64R2-NEXT: move $25, $1
+; MIPS64R2-NEXT: jr.hb $25
+; MIPS64R2-NEXT: daddiu $4, $zero, 14
+;
+; MIPS64R6-LABEL: fooTail:
+; MIPS64R6: # %bb.0: # %entry
+; MIPS64R6-NEXT: move $1, $4
+; MIPS64R6-NEXT: move $25, $1
+; MIPS64R6-NEXT: jr.hb $25
+; MIPS64R6-NEXT: daddiu $4, $zero, 14
+;
+; PIC-MIPS32R2-LABEL: fooTail:
+; PIC-MIPS32R2: # %bb.0: # %entry
+; PIC-MIPS32R2-NEXT: move $1, $4
+; PIC-MIPS32R2-NEXT: move $25, $1
+; PIC-MIPS32R2-NEXT: jr.hb $25
+; PIC-MIPS32R2-NEXT: addiu $4, $zero, 14
+;
+; PIC-MIPS32R6-LABEL: fooTail:
+; PIC-MIPS32R6: # %bb.0: # %entry
+; PIC-MIPS32R6-NEXT: move $1, $4
+; PIC-MIPS32R6-NEXT: move $25, $1
+; PIC-MIPS32R6-NEXT: jr.hb $25
+; PIC-MIPS32R6-NEXT: addiu $4, $zero, 14
+;
+; PIC-MIPS64R2-LABEL: fooTail:
+; PIC-MIPS64R2: # %bb.0: # %entry
+; PIC-MIPS64R2-NEXT: move $1, $4
+; PIC-MIPS64R2-NEXT: move $25, $1
+; PIC-MIPS64R2-NEXT: jr.hb $25
+; PIC-MIPS64R2-NEXT: daddiu $4, $zero, 14
+;
+; PIC-MIPS64R6-LABEL: fooTail:
+; PIC-MIPS64R6: # %bb.0: # %entry
+; PIC-MIPS64R6-NEXT: move $1, $4
+; PIC-MIPS64R6-NEXT: move $25, $1
+; PIC-MIPS64R6-NEXT: jr.hb $25
+; PIC-MIPS64R6-NEXT: daddiu $4, $zero, 14
+entry:
+ %0 = tail call i32 %f1(i32 14) nounwind
+ ret i32 %0
+}
diff --git a/test/CodeGen/Mips/indirect-jump-hazard/guards-verify-call.mir b/test/CodeGen/Mips/indirect-jump-hazard/guards-verify-call.mir
new file mode 100644
index 000000000000..1c11d700b53e
--- /dev/null
+++ b/test/CodeGen/Mips/indirect-jump-hazard/guards-verify-call.mir
@@ -0,0 +1,58 @@
+# RUN: not llc -mtriple=mips-mti-linux-gnu -mcpu=mips32r2 %s \
+# RUN: -start-after=expand-isel-pseudos -stop-after=expand-isel-pseudos \
+# RUN: -verify-machineinstrs -mattr=+use-indirect-jump-hazard -o - 2>&1 \
+# RUN: | FileCheck %s
+
+# Test that calls are checked when using indirect jumps guards (hazard variant).
+
+# CHECK: Bad machine code: invalid instruction when using jump guards!
+--- |
+ define i32 @fooTail(i32 (i32)* nocapture %f1) {
+ entry:
+ %0 = tail call i32 %f1(i32 14)
+ ret i32 %0
+ }
+...
+---
+name: fooTail
+alignment: 2
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32, preferred-register: '' }
+ - { id: 1, class: gpr32, preferred-register: '' }
+liveins:
+ - { reg: '%a0', virtual-reg: '%0' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 1
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ maxCallFrameSize: 4294967295
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ savePoint: ''
+ restorePoint: ''
+fixedStack:
+stack:
+constants:
+body: |
+ bb.0.entry:
+ liveins: %a0
+
+ %0:gpr32 = COPY %a0
+ %1:gpr32 = ADDiu $zero, 14
+ %a0 = COPY %1
+ TAILCALLREG %0, csr_o32, implicit-def dead %at, implicit %a0
+
+...
diff --git a/test/CodeGen/Mips/indirect-jump-hazard/guards-verify-tailcall.mir b/test/CodeGen/Mips/indirect-jump-hazard/guards-verify-tailcall.mir
new file mode 100644
index 000000000000..00e22b934bbc
--- /dev/null
+++ b/test/CodeGen/Mips/indirect-jump-hazard/guards-verify-tailcall.mir
@@ -0,0 +1,59 @@
+# RUN: not llc -mtriple=mips-mti-linux-gnu -mcpu=mips32r2 %s \
+# RUN: -start-after=expand-isel-pseudos -stop-after=expand-isel-pseudos \
+# RUN: -verify-machineinstrs -mattr=+use-indirect-jump-hazard -o - 2>&1 \
+# RUN: | FileCheck %s
+
+# That that tail calls are checked when using indirect jump guards (hazard variant).
+
+# CHECK: Bad machine code: invalid instruction when using jump guards!
+--- |
+ define i32 @fooTail(i32 (i32)* nocapture %f1) {
+ entry:
+ %0 = tail call i32 %f1(i32 14)
+ ret i32 %0
+ }
+
+...
+---
+name: fooTail
+alignment: 2
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32, preferred-register: '' }
+ - { id: 1, class: gpr32, preferred-register: '' }
+liveins:
+ - { reg: '%a0', virtual-reg: '%0' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 1
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ maxCallFrameSize: 4294967295
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ savePoint: ''
+ restorePoint: ''
+fixedStack:
+stack:
+constants:
+body: |
+ bb.0.entry:
+ liveins: %a0
+
+ %0:gpr32 = COPY %a0
+ %1:gpr32 = ADDiu $zero, 14
+ %a0 = COPY %1
+ TAILCALLREG %0, csr_o32, implicit-def dead %at, implicit %a0
+
+...
diff --git a/test/CodeGen/Mips/indirect-jump-hazard/jumptables.ll b/test/CodeGen/Mips/indirect-jump-hazard/jumptables.ll
new file mode 100644
index 000000000000..c530dd614ef8
--- /dev/null
+++ b/test/CodeGen/Mips/indirect-jump-hazard/jumptables.ll
@@ -0,0 +1,649 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=mips-mti-linux-gnu -relocation-model=static \
+; RUN: -mips-tail-calls=1 -mcpu=mips32r2 -mattr=+use-indirect-jump-hazard \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=MIPS32R2
+; RUN: llc < %s -mtriple=mips-img-linux-gnu -relocation-model=static \
+; RUN: -mips-tail-calls=1 -mcpu=mips32r6 -mattr=+use-indirect-jump-hazard \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=MIPS32R6
+; RUN: llc < %s -mtriple=mips64-mti-linux-gnu -relocation-model=static \
+; RUN: -mips-tail-calls=1 -mcpu=mips64r2 -mattr=+use-indirect-jump-hazard \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=MIPS64R2
+; RUN: llc < %s -mtriple=mips64-img-linux-gnu -relocation-model=static \
+; RUN: -mips-tail-calls=1 -mcpu=mips64r6 -mattr=+use-indirect-jump-hazard \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=MIPS64R6
+
+; RUN: llc < %s -mtriple=mips-mti-linux-gnu -relocation-model=pic \
+; RUN: -mips-tail-calls=1 -mcpu=mips32r2 -mattr=+use-indirect-jump-hazard \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=PIC-MIPS32R2
+; RUN: llc < %s -mtriple=mips-img-linux-gnu -relocation-model=pic \
+; RUN: -mips-tail-calls=1 -mcpu=mips32r6 -mattr=+use-indirect-jump-hazard \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=PIC-MIPS32R6
+; RUN: llc < %s -mtriple=mips64-mti-linux-gnu -relocation-model=pic \
+; RUN: -mips-tail-calls=1 -mcpu=mips64r2 -mattr=+use-indirect-jump-hazard \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=PIC-MIPS64R2
+; RUN: llc < %s -mtriple=mips64-img-linux-gnu -relocation-model=pic \
+; RUN: -mips-tail-calls=1 -mcpu=mips64r6 -mattr=+use-indirect-jump-hazard \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=PIC-MIPS64R6
+
+@.str = private unnamed_addr constant [2 x i8] c"A\00", align 1
+@.str.1 = private unnamed_addr constant [2 x i8] c"B\00", align 1
+@.str.2 = private unnamed_addr constant [2 x i8] c"C\00", align 1
+@.str.3 = private unnamed_addr constant [2 x i8] c"D\00", align 1
+@.str.4 = private unnamed_addr constant [2 x i8] c"E\00", align 1
+@.str.5 = private unnamed_addr constant [2 x i8] c"F\00", align 1
+@.str.6 = private unnamed_addr constant [2 x i8] c"G\00", align 1
+@.str.7 = private unnamed_addr constant [1 x i8] zeroinitializer, align 1
+
+define i8* @_Z3fooi(i32 signext %Letter) {
+; MIPS32R2-LABEL: _Z3fooi:
+; MIPS32R2: # %bb.0: # %entry
+; MIPS32R2-NEXT: addiu $sp, $sp, -16
+; MIPS32R2-NEXT: .cfi_def_cfa_offset 16
+; MIPS32R2-NEXT: sltiu $1, $4, 7
+; MIPS32R2-NEXT: beqz $1, $BB0_3
+; MIPS32R2-NEXT: sw $4, 4($sp)
+; MIPS32R2-NEXT: $BB0_1: # %entry
+; MIPS32R2-NEXT: sll $1, $4, 2
+; MIPS32R2-NEXT: lui $2, %hi($JTI0_0)
+; MIPS32R2-NEXT: addu $1, $1, $2
+; MIPS32R2-NEXT: lw $1, %lo($JTI0_0)($1)
+; MIPS32R2-NEXT: jr.hb $1
+; MIPS32R2-NEXT: nop
+; MIPS32R2-NEXT: $BB0_2: # %sw.bb
+; MIPS32R2-NEXT: lui $1, %hi($.str)
+; MIPS32R2-NEXT: addiu $1, $1, %lo($.str)
+; MIPS32R2-NEXT: j $BB0_10
+; MIPS32R2-NEXT: sw $1, 8($sp)
+; MIPS32R2-NEXT: $BB0_3: # %sw.epilog
+; MIPS32R2-NEXT: lui $1, %hi($.str.7)
+; MIPS32R2-NEXT: addiu $1, $1, %lo($.str.7)
+; MIPS32R2-NEXT: j $BB0_10
+; MIPS32R2-NEXT: sw $1, 8($sp)
+; MIPS32R2-NEXT: $BB0_4: # %sw.bb1
+; MIPS32R2-NEXT: lui $1, %hi($.str.1)
+; MIPS32R2-NEXT: addiu $1, $1, %lo($.str.1)
+; MIPS32R2-NEXT: j $BB0_10
+; MIPS32R2-NEXT: sw $1, 8($sp)
+; MIPS32R2-NEXT: $BB0_5: # %sw.bb2
+; MIPS32R2-NEXT: lui $1, %hi($.str.2)
+; MIPS32R2-NEXT: addiu $1, $1, %lo($.str.2)
+; MIPS32R2-NEXT: j $BB0_10
+; MIPS32R2-NEXT: sw $1, 8($sp)
+; MIPS32R2-NEXT: $BB0_6: # %sw.bb3
+; MIPS32R2-NEXT: lui $1, %hi($.str.3)
+; MIPS32R2-NEXT: addiu $1, $1, %lo($.str.3)
+; MIPS32R2-NEXT: j $BB0_10
+; MIPS32R2-NEXT: sw $1, 8($sp)
+; MIPS32R2-NEXT: $BB0_7: # %sw.bb4
+; MIPS32R2-NEXT: lui $1, %hi($.str.4)
+; MIPS32R2-NEXT: addiu $1, $1, %lo($.str.4)
+; MIPS32R2-NEXT: j $BB0_10
+; MIPS32R2-NEXT: sw $1, 8($sp)
+; MIPS32R2-NEXT: $BB0_8: # %sw.bb5
+; MIPS32R2-NEXT: lui $1, %hi($.str.5)
+; MIPS32R2-NEXT: addiu $1, $1, %lo($.str.5)
+; MIPS32R2-NEXT: j $BB0_10
+; MIPS32R2-NEXT: sw $1, 8($sp)
+; MIPS32R2-NEXT: $BB0_9: # %sw.bb6
+; MIPS32R2-NEXT: lui $1, %hi($.str.6)
+; MIPS32R2-NEXT: addiu $1, $1, %lo($.str.6)
+; MIPS32R2-NEXT: sw $1, 8($sp)
+; MIPS32R2-NEXT: $BB0_10: # %return
+; MIPS32R2-NEXT: lw $2, 8($sp)
+; MIPS32R2-NEXT: jr $ra
+; MIPS32R2-NEXT: addiu $sp, $sp, 16
+;
+; MIPS32R6-LABEL: _Z3fooi:
+; MIPS32R6: # %bb.0: # %entry
+; MIPS32R6-NEXT: addiu $sp, $sp, -16
+; MIPS32R6-NEXT: .cfi_def_cfa_offset 16
+; MIPS32R6-NEXT: sltiu $1, $4, 7
+; MIPS32R6-NEXT: beqz $1, $BB0_3
+; MIPS32R6-NEXT: sw $4, 4($sp)
+; MIPS32R6-NEXT: $BB0_1: # %entry
+; MIPS32R6-NEXT: sll $1, $4, 2
+; MIPS32R6-NEXT: lui $2, %hi($JTI0_0)
+; MIPS32R6-NEXT: addu $1, $1, $2
+; MIPS32R6-NEXT: lw $1, %lo($JTI0_0)($1)
+; MIPS32R6-NEXT: jr.hb $1
+; MIPS32R6-NEXT: nop
+; MIPS32R6-NEXT: $BB0_2: # %sw.bb
+; MIPS32R6-NEXT: lui $1, %hi($.str)
+; MIPS32R6-NEXT: addiu $1, $1, %lo($.str)
+; MIPS32R6-NEXT: j $BB0_10
+; MIPS32R6-NEXT: sw $1, 8($sp)
+; MIPS32R6-NEXT: $BB0_3: # %sw.epilog
+; MIPS32R6-NEXT: lui $1, %hi($.str.7)
+; MIPS32R6-NEXT: addiu $1, $1, %lo($.str.7)
+; MIPS32R6-NEXT: j $BB0_10
+; MIPS32R6-NEXT: sw $1, 8($sp)
+; MIPS32R6-NEXT: $BB0_4: # %sw.bb1
+; MIPS32R6-NEXT: lui $1, %hi($.str.1)
+; MIPS32R6-NEXT: addiu $1, $1, %lo($.str.1)
+; MIPS32R6-NEXT: j $BB0_10
+; MIPS32R6-NEXT: sw $1, 8($sp)
+; MIPS32R6-NEXT: $BB0_5: # %sw.bb2
+; MIPS32R6-NEXT: lui $1, %hi($.str.2)
+; MIPS32R6-NEXT: addiu $1, $1, %lo($.str.2)
+; MIPS32R6-NEXT: j $BB0_10
+; MIPS32R6-NEXT: sw $1, 8($sp)
+; MIPS32R6-NEXT: $BB0_6: # %sw.bb3
+; MIPS32R6-NEXT: lui $1, %hi($.str.3)
+; MIPS32R6-NEXT: addiu $1, $1, %lo($.str.3)
+; MIPS32R6-NEXT: j $BB0_10
+; MIPS32R6-NEXT: sw $1, 8($sp)
+; MIPS32R6-NEXT: $BB0_7: # %sw.bb4
+; MIPS32R6-NEXT: lui $1, %hi($.str.4)
+; MIPS32R6-NEXT: addiu $1, $1, %lo($.str.4)
+; MIPS32R6-NEXT: j $BB0_10
+; MIPS32R6-NEXT: sw $1, 8($sp)
+; MIPS32R6-NEXT: $BB0_8: # %sw.bb5
+; MIPS32R6-NEXT: lui $1, %hi($.str.5)
+; MIPS32R6-NEXT: addiu $1, $1, %lo($.str.5)
+; MIPS32R6-NEXT: j $BB0_10
+; MIPS32R6-NEXT: sw $1, 8($sp)
+; MIPS32R6-NEXT: $BB0_9: # %sw.bb6
+; MIPS32R6-NEXT: lui $1, %hi($.str.6)
+; MIPS32R6-NEXT: addiu $1, $1, %lo($.str.6)
+; MIPS32R6-NEXT: sw $1, 8($sp)
+; MIPS32R6-NEXT: $BB0_10: # %return
+; MIPS32R6-NEXT: lw $2, 8($sp)
+; MIPS32R6-NEXT: jr $ra
+; MIPS32R6-NEXT: addiu $sp, $sp, 16
+;
+; MIPS64R2-LABEL: _Z3fooi:
+; MIPS64R2: # %bb.0: # %entry
+; MIPS64R2-NEXT: daddiu $sp, $sp, -16
+; MIPS64R2-NEXT: .cfi_def_cfa_offset 16
+; MIPS64R2-NEXT: sw $4, 4($sp)
+; MIPS64R2-NEXT: lwu $2, 4($sp)
+; MIPS64R2-NEXT: sltiu $1, $2, 7
+; MIPS64R2-NEXT: beqz $1, .LBB0_3
+; MIPS64R2-NEXT: nop
+; MIPS64R2-NEXT: .LBB0_1: # %entry
+; MIPS64R2-NEXT: daddiu $1, $zero, 8
+; MIPS64R2-NEXT: dmult $2, $1
+; MIPS64R2-NEXT: mflo $1
+; MIPS64R2-NEXT: lui $2, %highest(.LJTI0_0)
+; MIPS64R2-NEXT: daddiu $2, $2, %higher(.LJTI0_0)
+; MIPS64R2-NEXT: dsll $2, $2, 16
+; MIPS64R2-NEXT: daddiu $2, $2, %hi(.LJTI0_0)
+; MIPS64R2-NEXT: dsll $2, $2, 16
+; MIPS64R2-NEXT: daddu $1, $1, $2
+; MIPS64R2-NEXT: ld $1, %lo(.LJTI0_0)($1)
+; MIPS64R2-NEXT: jr.hb $1
+; MIPS64R2-NEXT: nop
+; MIPS64R2-NEXT: .LBB0_2: # %sw.bb
+; MIPS64R2-NEXT: lui $1, %highest(.L.str)
+; MIPS64R2-NEXT: daddiu $1, $1, %higher(.L.str)
+; MIPS64R2-NEXT: dsll $1, $1, 16
+; MIPS64R2-NEXT: daddiu $1, $1, %hi(.L.str)
+; MIPS64R2-NEXT: dsll $1, $1, 16
+; MIPS64R2-NEXT: daddiu $1, $1, %lo(.L.str)
+; MIPS64R2-NEXT: j .LBB0_10
+; MIPS64R2-NEXT: sd $1, 8($sp)
+; MIPS64R2-NEXT: .LBB0_3: # %sw.epilog
+; MIPS64R2-NEXT: lui $1, %highest(.L.str.7)
+; MIPS64R2-NEXT: daddiu $1, $1, %higher(.L.str.7)
+; MIPS64R2-NEXT: dsll $1, $1, 16
+; MIPS64R2-NEXT: daddiu $1, $1, %hi(.L.str.7)
+; MIPS64R2-NEXT: dsll $1, $1, 16
+; MIPS64R2-NEXT: daddiu $1, $1, %lo(.L.str.7)
+; MIPS64R2-NEXT: j .LBB0_10
+; MIPS64R2-NEXT: sd $1, 8($sp)
+; MIPS64R2-NEXT: .LBB0_4: # %sw.bb1
+; MIPS64R2-NEXT: lui $1, %highest(.L.str.1)
+; MIPS64R2-NEXT: daddiu $1, $1, %higher(.L.str.1)
+; MIPS64R2-NEXT: dsll $1, $1, 16
+; MIPS64R2-NEXT: daddiu $1, $1, %hi(.L.str.1)
+; MIPS64R2-NEXT: dsll $1, $1, 16
+; MIPS64R2-NEXT: daddiu $1, $1, %lo(.L.str.1)
+; MIPS64R2-NEXT: j .LBB0_10
+; MIPS64R2-NEXT: sd $1, 8($sp)
+; MIPS64R2-NEXT: .LBB0_5: # %sw.bb2
+; MIPS64R2-NEXT: lui $1, %highest(.L.str.2)
+; MIPS64R2-NEXT: daddiu $1, $1, %higher(.L.str.2)
+; MIPS64R2-NEXT: dsll $1, $1, 16
+; MIPS64R2-NEXT: daddiu $1, $1, %hi(.L.str.2)
+; MIPS64R2-NEXT: dsll $1, $1, 16
+; MIPS64R2-NEXT: daddiu $1, $1, %lo(.L.str.2)
+; MIPS64R2-NEXT: j .LBB0_10
+; MIPS64R2-NEXT: sd $1, 8($sp)
+; MIPS64R2-NEXT: .LBB0_6: # %sw.bb3
+; MIPS64R2-NEXT: lui $1, %highest(.L.str.3)
+; MIPS64R2-NEXT: daddiu $1, $1, %higher(.L.str.3)
+; MIPS64R2-NEXT: dsll $1, $1, 16
+; MIPS64R2-NEXT: daddiu $1, $1, %hi(.L.str.3)
+; MIPS64R2-NEXT: dsll $1, $1, 16
+; MIPS64R2-NEXT: daddiu $1, $1, %lo(.L.str.3)
+; MIPS64R2-NEXT: j .LBB0_10
+; MIPS64R2-NEXT: sd $1, 8($sp)
+; MIPS64R2-NEXT: .LBB0_7: # %sw.bb4
+; MIPS64R2-NEXT: lui $1, %highest(.L.str.4)
+; MIPS64R2-NEXT: daddiu $1, $1, %higher(.L.str.4)
+; MIPS64R2-NEXT: dsll $1, $1, 16
+; MIPS64R2-NEXT: daddiu $1, $1, %hi(.L.str.4)
+; MIPS64R2-NEXT: dsll $1, $1, 16
+; MIPS64R2-NEXT: daddiu $1, $1, %lo(.L.str.4)
+; MIPS64R2-NEXT: j .LBB0_10
+; MIPS64R2-NEXT: sd $1, 8($sp)
+; MIPS64R2-NEXT: .LBB0_8: # %sw.bb5
+; MIPS64R2-NEXT: lui $1, %highest(.L.str.5)
+; MIPS64R2-NEXT: daddiu $1, $1, %higher(.L.str.5)
+; MIPS64R2-NEXT: dsll $1, $1, 16
+; MIPS64R2-NEXT: daddiu $1, $1, %hi(.L.str.5)
+; MIPS64R2-NEXT: dsll $1, $1, 16
+; MIPS64R2-NEXT: daddiu $1, $1, %lo(.L.str.5)
+; MIPS64R2-NEXT: j .LBB0_10
+; MIPS64R2-NEXT: sd $1, 8($sp)
+; MIPS64R2-NEXT: .LBB0_9: # %sw.bb6
+; MIPS64R2-NEXT: lui $1, %highest(.L.str.6)
+; MIPS64R2-NEXT: daddiu $1, $1, %higher(.L.str.6)
+; MIPS64R2-NEXT: dsll $1, $1, 16
+; MIPS64R2-NEXT: daddiu $1, $1, %hi(.L.str.6)
+; MIPS64R2-NEXT: dsll $1, $1, 16
+; MIPS64R2-NEXT: daddiu $1, $1, %lo(.L.str.6)
+; MIPS64R2-NEXT: sd $1, 8($sp)
+; MIPS64R2-NEXT: .LBB0_10: # %return
+; MIPS64R2-NEXT: ld $2, 8($sp)
+; MIPS64R2-NEXT: jr $ra
+; MIPS64R2-NEXT: daddiu $sp, $sp, 16
+;
+; MIPS64R6-LABEL: _Z3fooi:
+; MIPS64R6: # %bb.0: # %entry
+; MIPS64R6-NEXT: daddiu $sp, $sp, -16
+; MIPS64R6-NEXT: .cfi_def_cfa_offset 16
+; MIPS64R6-NEXT: sw $4, 4($sp)
+; MIPS64R6-NEXT: lwu $2, 4($sp)
+; MIPS64R6-NEXT: sltiu $1, $2, 7
+; MIPS64R6-NEXT: beqzc $1, .LBB0_3
+; MIPS64R6-NEXT: .LBB0_1: # %entry
+; MIPS64R6-NEXT: dsll $1, $2, 3
+; MIPS64R6-NEXT: lui $2, %highest(.LJTI0_0)
+; MIPS64R6-NEXT: daddiu $2, $2, %higher(.LJTI0_0)
+; MIPS64R6-NEXT: dsll $2, $2, 16
+; MIPS64R6-NEXT: daddiu $2, $2, %hi(.LJTI0_0)
+; MIPS64R6-NEXT: dsll $2, $2, 16
+; MIPS64R6-NEXT: daddu $1, $1, $2
+; MIPS64R6-NEXT: ld $1, %lo(.LJTI0_0)($1)
+; MIPS64R6-NEXT: jr.hb $1
+; MIPS64R6-NEXT: nop
+; MIPS64R6-NEXT: .LBB0_2: # %sw.bb
+; MIPS64R6-NEXT: lui $1, %highest(.L.str)
+; MIPS64R6-NEXT: daddiu $1, $1, %higher(.L.str)
+; MIPS64R6-NEXT: dsll $1, $1, 16
+; MIPS64R6-NEXT: daddiu $1, $1, %hi(.L.str)
+; MIPS64R6-NEXT: dsll $1, $1, 16
+; MIPS64R6-NEXT: daddiu $1, $1, %lo(.L.str)
+; MIPS64R6-NEXT: j .LBB0_10
+; MIPS64R6-NEXT: sd $1, 8($sp)
+; MIPS64R6-NEXT: .LBB0_3: # %sw.epilog
+; MIPS64R6-NEXT: lui $1, %highest(.L.str.7)
+; MIPS64R6-NEXT: daddiu $1, $1, %higher(.L.str.7)
+; MIPS64R6-NEXT: dsll $1, $1, 16
+; MIPS64R6-NEXT: daddiu $1, $1, %hi(.L.str.7)
+; MIPS64R6-NEXT: dsll $1, $1, 16
+; MIPS64R6-NEXT: daddiu $1, $1, %lo(.L.str.7)
+; MIPS64R6-NEXT: j .LBB0_10
+; MIPS64R6-NEXT: sd $1, 8($sp)
+; MIPS64R6-NEXT: .LBB0_4: # %sw.bb1
+; MIPS64R6-NEXT: lui $1, %highest(.L.str.1)
+; MIPS64R6-NEXT: daddiu $1, $1, %higher(.L.str.1)
+; MIPS64R6-NEXT: dsll $1, $1, 16
+; MIPS64R6-NEXT: daddiu $1, $1, %hi(.L.str.1)
+; MIPS64R6-NEXT: dsll $1, $1, 16
+; MIPS64R6-NEXT: daddiu $1, $1, %lo(.L.str.1)
+; MIPS64R6-NEXT: j .LBB0_10
+; MIPS64R6-NEXT: sd $1, 8($sp)
+; MIPS64R6-NEXT: .LBB0_5: # %sw.bb2
+; MIPS64R6-NEXT: lui $1, %highest(.L.str.2)
+; MIPS64R6-NEXT: daddiu $1, $1, %higher(.L.str.2)
+; MIPS64R6-NEXT: dsll $1, $1, 16
+; MIPS64R6-NEXT: daddiu $1, $1, %hi(.L.str.2)
+; MIPS64R6-NEXT: dsll $1, $1, 16
+; MIPS64R6-NEXT: daddiu $1, $1, %lo(.L.str.2)
+; MIPS64R6-NEXT: j .LBB0_10
+; MIPS64R6-NEXT: sd $1, 8($sp)
+; MIPS64R6-NEXT: .LBB0_6: # %sw.bb3
+; MIPS64R6-NEXT: lui $1, %highest(.L.str.3)
+; MIPS64R6-NEXT: daddiu $1, $1, %higher(.L.str.3)
+; MIPS64R6-NEXT: dsll $1, $1, 16
+; MIPS64R6-NEXT: daddiu $1, $1, %hi(.L.str.3)
+; MIPS64R6-NEXT: dsll $1, $1, 16
+; MIPS64R6-NEXT: daddiu $1, $1, %lo(.L.str.3)
+; MIPS64R6-NEXT: j .LBB0_10
+; MIPS64R6-NEXT: sd $1, 8($sp)
+; MIPS64R6-NEXT: .LBB0_7: # %sw.bb4
+; MIPS64R6-NEXT: lui $1, %highest(.L.str.4)
+; MIPS64R6-NEXT: daddiu $1, $1, %higher(.L.str.4)
+; MIPS64R6-NEXT: dsll $1, $1, 16
+; MIPS64R6-NEXT: daddiu $1, $1, %hi(.L.str.4)
+; MIPS64R6-NEXT: dsll $1, $1, 16
+; MIPS64R6-NEXT: daddiu $1, $1, %lo(.L.str.4)
+; MIPS64R6-NEXT: j .LBB0_10
+; MIPS64R6-NEXT: sd $1, 8($sp)
+; MIPS64R6-NEXT: .LBB0_8: # %sw.bb5
+; MIPS64R6-NEXT: lui $1, %highest(.L.str.5)
+; MIPS64R6-NEXT: daddiu $1, $1, %higher(.L.str.5)
+; MIPS64R6-NEXT: dsll $1, $1, 16
+; MIPS64R6-NEXT: daddiu $1, $1, %hi(.L.str.5)
+; MIPS64R6-NEXT: dsll $1, $1, 16
+; MIPS64R6-NEXT: daddiu $1, $1, %lo(.L.str.5)
+; MIPS64R6-NEXT: j .LBB0_10
+; MIPS64R6-NEXT: sd $1, 8($sp)
+; MIPS64R6-NEXT: .LBB0_9: # %sw.bb6
+; MIPS64R6-NEXT: lui $1, %highest(.L.str.6)
+; MIPS64R6-NEXT: daddiu $1, $1, %higher(.L.str.6)
+; MIPS64R6-NEXT: dsll $1, $1, 16
+; MIPS64R6-NEXT: daddiu $1, $1, %hi(.L.str.6)
+; MIPS64R6-NEXT: dsll $1, $1, 16
+; MIPS64R6-NEXT: daddiu $1, $1, %lo(.L.str.6)
+; MIPS64R6-NEXT: sd $1, 8($sp)
+; MIPS64R6-NEXT: .LBB0_10: # %return
+; MIPS64R6-NEXT: ld $2, 8($sp)
+; MIPS64R6-NEXT: jr $ra
+; MIPS64R6-NEXT: daddiu $sp, $sp, 16
+;
+; PIC-MIPS32R2-LABEL: _Z3fooi:
+; PIC-MIPS32R2: # %bb.0: # %entry
+; PIC-MIPS32R2-NEXT: lui $2, %hi(_gp_disp)
+; PIC-MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp)
+; PIC-MIPS32R2-NEXT: addiu $sp, $sp, -16
+; PIC-MIPS32R2-NEXT: .cfi_def_cfa_offset 16
+; PIC-MIPS32R2-NEXT: addu $2, $2, $25
+; PIC-MIPS32R2-NEXT: sltiu $1, $4, 7
+; PIC-MIPS32R2-NEXT: beqz $1, $BB0_3
+; PIC-MIPS32R2-NEXT: sw $4, 4($sp)
+; PIC-MIPS32R2-NEXT: $BB0_1: # %entry
+; PIC-MIPS32R2-NEXT: sll $1, $4, 2
+; PIC-MIPS32R2-NEXT: lw $3, %got($JTI0_0)($2)
+; PIC-MIPS32R2-NEXT: addu $1, $1, $3
+; PIC-MIPS32R2-NEXT: lw $1, %lo($JTI0_0)($1)
+; PIC-MIPS32R2-NEXT: addu $1, $1, $2
+; PIC-MIPS32R2-NEXT: jr.hb $1
+; PIC-MIPS32R2-NEXT: nop
+; PIC-MIPS32R2-NEXT: $BB0_2: # %sw.bb
+; PIC-MIPS32R2-NEXT: lw $1, %got($.str)($2)
+; PIC-MIPS32R2-NEXT: addiu $1, $1, %lo($.str)
+; PIC-MIPS32R2-NEXT: b $BB0_10
+; PIC-MIPS32R2-NEXT: sw $1, 8($sp)
+; PIC-MIPS32R2-NEXT: $BB0_3: # %sw.epilog
+; PIC-MIPS32R2-NEXT: lw $1, %got($.str.7)($2)
+; PIC-MIPS32R2-NEXT: addiu $1, $1, %lo($.str.7)
+; PIC-MIPS32R2-NEXT: b $BB0_10
+; PIC-MIPS32R2-NEXT: sw $1, 8($sp)
+; PIC-MIPS32R2-NEXT: $BB0_4: # %sw.bb1
+; PIC-MIPS32R2-NEXT: lw $1, %got($.str.1)($2)
+; PIC-MIPS32R2-NEXT: addiu $1, $1, %lo($.str.1)
+; PIC-MIPS32R2-NEXT: b $BB0_10
+; PIC-MIPS32R2-NEXT: sw $1, 8($sp)
+; PIC-MIPS32R2-NEXT: $BB0_5: # %sw.bb2
+; PIC-MIPS32R2-NEXT: lw $1, %got($.str.2)($2)
+; PIC-MIPS32R2-NEXT: addiu $1, $1, %lo($.str.2)
+; PIC-MIPS32R2-NEXT: b $BB0_10
+; PIC-MIPS32R2-NEXT: sw $1, 8($sp)
+; PIC-MIPS32R2-NEXT: $BB0_6: # %sw.bb3
+; PIC-MIPS32R2-NEXT: lw $1, %got($.str.3)($2)
+; PIC-MIPS32R2-NEXT: addiu $1, $1, %lo($.str.3)
+; PIC-MIPS32R2-NEXT: b $BB0_10
+; PIC-MIPS32R2-NEXT: sw $1, 8($sp)
+; PIC-MIPS32R2-NEXT: $BB0_7: # %sw.bb4
+; PIC-MIPS32R2-NEXT: lw $1, %got($.str.4)($2)
+; PIC-MIPS32R2-NEXT: addiu $1, $1, %lo($.str.4)
+; PIC-MIPS32R2-NEXT: b $BB0_10
+; PIC-MIPS32R2-NEXT: sw $1, 8($sp)
+; PIC-MIPS32R2-NEXT: $BB0_8: # %sw.bb5
+; PIC-MIPS32R2-NEXT: lw $1, %got($.str.5)($2)
+; PIC-MIPS32R2-NEXT: addiu $1, $1, %lo($.str.5)
+; PIC-MIPS32R2-NEXT: b $BB0_10
+; PIC-MIPS32R2-NEXT: sw $1, 8($sp)
+; PIC-MIPS32R2-NEXT: $BB0_9: # %sw.bb6
+; PIC-MIPS32R2-NEXT: lw $1, %got($.str.6)($2)
+; PIC-MIPS32R2-NEXT: addiu $1, $1, %lo($.str.6)
+; PIC-MIPS32R2-NEXT: sw $1, 8($sp)
+; PIC-MIPS32R2-NEXT: $BB0_10: # %return
+; PIC-MIPS32R2-NEXT: lw $2, 8($sp)
+; PIC-MIPS32R2-NEXT: jr $ra
+; PIC-MIPS32R2-NEXT: addiu $sp, $sp, 16
+;
+; PIC-MIPS32R6-LABEL: _Z3fooi:
+; PIC-MIPS32R6: # %bb.0: # %entry
+; PIC-MIPS32R6-NEXT: lui $2, %hi(_gp_disp)
+; PIC-MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp)
+; PIC-MIPS32R6-NEXT: addiu $sp, $sp, -16
+; PIC-MIPS32R6-NEXT: .cfi_def_cfa_offset 16
+; PIC-MIPS32R6-NEXT: addu $2, $2, $25
+; PIC-MIPS32R6-NEXT: sltiu $1, $4, 7
+; PIC-MIPS32R6-NEXT: beqz $1, $BB0_3
+; PIC-MIPS32R6-NEXT: sw $4, 4($sp)
+; PIC-MIPS32R6-NEXT: $BB0_1: # %entry
+; PIC-MIPS32R6-NEXT: sll $1, $4, 2
+; PIC-MIPS32R6-NEXT: lw $3, %got($JTI0_0)($2)
+; PIC-MIPS32R6-NEXT: addu $1, $1, $3
+; PIC-MIPS32R6-NEXT: lw $1, %lo($JTI0_0)($1)
+; PIC-MIPS32R6-NEXT: addu $1, $1, $2
+; PIC-MIPS32R6-NEXT: jr.hb $1
+; PIC-MIPS32R6-NEXT: nop
+; PIC-MIPS32R6-NEXT: $BB0_2: # %sw.bb
+; PIC-MIPS32R6-NEXT: lw $1, %got($.str)($2)
+; PIC-MIPS32R6-NEXT: addiu $1, $1, %lo($.str)
+; PIC-MIPS32R6-NEXT: b $BB0_10
+; PIC-MIPS32R6-NEXT: sw $1, 8($sp)
+; PIC-MIPS32R6-NEXT: $BB0_3: # %sw.epilog
+; PIC-MIPS32R6-NEXT: lw $1, %got($.str.7)($2)
+; PIC-MIPS32R6-NEXT: addiu $1, $1, %lo($.str.7)
+; PIC-MIPS32R6-NEXT: b $BB0_10
+; PIC-MIPS32R6-NEXT: sw $1, 8($sp)
+; PIC-MIPS32R6-NEXT: $BB0_4: # %sw.bb1
+; PIC-MIPS32R6-NEXT: lw $1, %got($.str.1)($2)
+; PIC-MIPS32R6-NEXT: addiu $1, $1, %lo($.str.1)
+; PIC-MIPS32R6-NEXT: b $BB0_10
+; PIC-MIPS32R6-NEXT: sw $1, 8($sp)
+; PIC-MIPS32R6-NEXT: $BB0_5: # %sw.bb2
+; PIC-MIPS32R6-NEXT: lw $1, %got($.str.2)($2)
+; PIC-MIPS32R6-NEXT: addiu $1, $1, %lo($.str.2)
+; PIC-MIPS32R6-NEXT: b $BB0_10
+; PIC-MIPS32R6-NEXT: sw $1, 8($sp)
+; PIC-MIPS32R6-NEXT: $BB0_6: # %sw.bb3
+; PIC-MIPS32R6-NEXT: lw $1, %got($.str.3)($2)
+; PIC-MIPS32R6-NEXT: addiu $1, $1, %lo($.str.3)
+; PIC-MIPS32R6-NEXT: b $BB0_10
+; PIC-MIPS32R6-NEXT: sw $1, 8($sp)
+; PIC-MIPS32R6-NEXT: $BB0_7: # %sw.bb4
+; PIC-MIPS32R6-NEXT: lw $1, %got($.str.4)($2)
+; PIC-MIPS32R6-NEXT: addiu $1, $1, %lo($.str.4)
+; PIC-MIPS32R6-NEXT: b $BB0_10
+; PIC-MIPS32R6-NEXT: sw $1, 8($sp)
+; PIC-MIPS32R6-NEXT: $BB0_8: # %sw.bb5
+; PIC-MIPS32R6-NEXT: lw $1, %got($.str.5)($2)
+; PIC-MIPS32R6-NEXT: addiu $1, $1, %lo($.str.5)
+; PIC-MIPS32R6-NEXT: b $BB0_10
+; PIC-MIPS32R6-NEXT: sw $1, 8($sp)
+; PIC-MIPS32R6-NEXT: $BB0_9: # %sw.bb6
+; PIC-MIPS32R6-NEXT: lw $1, %got($.str.6)($2)
+; PIC-MIPS32R6-NEXT: addiu $1, $1, %lo($.str.6)
+; PIC-MIPS32R6-NEXT: sw $1, 8($sp)
+; PIC-MIPS32R6-NEXT: $BB0_10: # %return
+; PIC-MIPS32R6-NEXT: lw $2, 8($sp)
+; PIC-MIPS32R6-NEXT: jr $ra
+; PIC-MIPS32R6-NEXT: addiu $sp, $sp, 16
+;
+; PIC-MIPS64R2-LABEL: _Z3fooi:
+; PIC-MIPS64R2: # %bb.0: # %entry
+; PIC-MIPS64R2-NEXT: daddiu $sp, $sp, -16
+; PIC-MIPS64R2-NEXT: .cfi_def_cfa_offset 16
+; PIC-MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(_Z3fooi)))
+; PIC-MIPS64R2-NEXT: daddu $1, $1, $25
+; PIC-MIPS64R2-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(_Z3fooi)))
+; PIC-MIPS64R2-NEXT: sw $4, 4($sp)
+; PIC-MIPS64R2-NEXT: lwu $3, 4($sp)
+; PIC-MIPS64R2-NEXT: sltiu $1, $3, 7
+; PIC-MIPS64R2-NEXT: beqz $1, .LBB0_3
+; PIC-MIPS64R2-NEXT: nop
+; PIC-MIPS64R2-NEXT: .LBB0_1: # %entry
+; PIC-MIPS64R2-NEXT: daddiu $1, $zero, 8
+; PIC-MIPS64R2-NEXT: dmult $3, $1
+; PIC-MIPS64R2-NEXT: mflo $1
+; PIC-MIPS64R2-NEXT: ld $3, %got_page(.LJTI0_0)($2)
+; PIC-MIPS64R2-NEXT: daddu $1, $1, $3
+; PIC-MIPS64R2-NEXT: ld $1, %got_ofst(.LJTI0_0)($1)
+; PIC-MIPS64R2-NEXT: daddu $1, $1, $2
+; PIC-MIPS64R2-NEXT: jr.hb $1
+; PIC-MIPS64R2-NEXT: nop
+; PIC-MIPS64R2-NEXT: .LBB0_2: # %sw.bb
+; PIC-MIPS64R2-NEXT: ld $1, %got_page(.L.str)($2)
+; PIC-MIPS64R2-NEXT: daddiu $1, $1, %got_ofst(.L.str)
+; PIC-MIPS64R2-NEXT: b .LBB0_10
+; PIC-MIPS64R2-NEXT: sd $1, 8($sp)
+; PIC-MIPS64R2-NEXT: .LBB0_3: # %sw.epilog
+; PIC-MIPS64R2-NEXT: ld $1, %got_page(.L.str.7)($2)
+; PIC-MIPS64R2-NEXT: daddiu $1, $1, %got_ofst(.L.str.7)
+; PIC-MIPS64R2-NEXT: b .LBB0_10
+; PIC-MIPS64R2-NEXT: sd $1, 8($sp)
+; PIC-MIPS64R2-NEXT: .LBB0_4: # %sw.bb1
+; PIC-MIPS64R2-NEXT: ld $1, %got_page(.L.str.1)($2)
+; PIC-MIPS64R2-NEXT: daddiu $1, $1, %got_ofst(.L.str.1)
+; PIC-MIPS64R2-NEXT: b .LBB0_10
+; PIC-MIPS64R2-NEXT: sd $1, 8($sp)
+; PIC-MIPS64R2-NEXT: .LBB0_5: # %sw.bb2
+; PIC-MIPS64R2-NEXT: ld $1, %got_page(.L.str.2)($2)
+; PIC-MIPS64R2-NEXT: daddiu $1, $1, %got_ofst(.L.str.2)
+; PIC-MIPS64R2-NEXT: b .LBB0_10
+; PIC-MIPS64R2-NEXT: sd $1, 8($sp)
+; PIC-MIPS64R2-NEXT: .LBB0_6: # %sw.bb3
+; PIC-MIPS64R2-NEXT: ld $1, %got_page(.L.str.3)($2)
+; PIC-MIPS64R2-NEXT: daddiu $1, $1, %got_ofst(.L.str.3)
+; PIC-MIPS64R2-NEXT: b .LBB0_10
+; PIC-MIPS64R2-NEXT: sd $1, 8($sp)
+; PIC-MIPS64R2-NEXT: .LBB0_7: # %sw.bb4
+; PIC-MIPS64R2-NEXT: ld $1, %got_page(.L.str.4)($2)
+; PIC-MIPS64R2-NEXT: daddiu $1, $1, %got_ofst(.L.str.4)
+; PIC-MIPS64R2-NEXT: b .LBB0_10
+; PIC-MIPS64R2-NEXT: sd $1, 8($sp)
+; PIC-MIPS64R2-NEXT: .LBB0_8: # %sw.bb5
+; PIC-MIPS64R2-NEXT: ld $1, %got_page(.L.str.5)($2)
+; PIC-MIPS64R2-NEXT: daddiu $1, $1, %got_ofst(.L.str.5)
+; PIC-MIPS64R2-NEXT: b .LBB0_10
+; PIC-MIPS64R2-NEXT: sd $1, 8($sp)
+; PIC-MIPS64R2-NEXT: .LBB0_9: # %sw.bb6
+; PIC-MIPS64R2-NEXT: ld $1, %got_page(.L.str.6)($2)
+; PIC-MIPS64R2-NEXT: daddiu $1, $1, %got_ofst(.L.str.6)
+; PIC-MIPS64R2-NEXT: sd $1, 8($sp)
+; PIC-MIPS64R2-NEXT: .LBB0_10: # %return
+; PIC-MIPS64R2-NEXT: ld $2, 8($sp)
+; PIC-MIPS64R2-NEXT: jr $ra
+; PIC-MIPS64R2-NEXT: daddiu $sp, $sp, 16
+;
+; PIC-MIPS64R6-LABEL: _Z3fooi:
+; PIC-MIPS64R6: # %bb.0: # %entry
+; PIC-MIPS64R6-NEXT: daddiu $sp, $sp, -16
+; PIC-MIPS64R6-NEXT: .cfi_def_cfa_offset 16
+; PIC-MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(_Z3fooi)))
+; PIC-MIPS64R6-NEXT: daddu $1, $1, $25
+; PIC-MIPS64R6-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(_Z3fooi)))
+; PIC-MIPS64R6-NEXT: sw $4, 4($sp)
+; PIC-MIPS64R6-NEXT: lwu $3, 4($sp)
+; PIC-MIPS64R6-NEXT: sltiu $1, $3, 7
+; PIC-MIPS64R6-NEXT: beqzc $1, .LBB0_3
+; PIC-MIPS64R6-NEXT: .LBB0_1: # %entry
+; PIC-MIPS64R6-NEXT: dsll $1, $3, 3
+; PIC-MIPS64R6-NEXT: ld $3, %got_page(.LJTI0_0)($2)
+; PIC-MIPS64R6-NEXT: daddu $1, $1, $3
+; PIC-MIPS64R6-NEXT: ld $1, %got_ofst(.LJTI0_0)($1)
+; PIC-MIPS64R6-NEXT: daddu $1, $1, $2
+; PIC-MIPS64R6-NEXT: jr.hb $1
+; PIC-MIPS64R6-NEXT: nop
+; PIC-MIPS64R6-NEXT: .LBB0_2: # %sw.bb
+; PIC-MIPS64R6-NEXT: ld $1, %got_page(.L.str)($2)
+; PIC-MIPS64R6-NEXT: daddiu $1, $1, %got_ofst(.L.str)
+; PIC-MIPS64R6-NEXT: b .LBB0_10
+; PIC-MIPS64R6-NEXT: sd $1, 8($sp)
+; PIC-MIPS64R6-NEXT: .LBB0_3: # %sw.epilog
+; PIC-MIPS64R6-NEXT: ld $1, %got_page(.L.str.7)($2)
+; PIC-MIPS64R6-NEXT: daddiu $1, $1, %got_ofst(.L.str.7)
+; PIC-MIPS64R6-NEXT: b .LBB0_10
+; PIC-MIPS64R6-NEXT: sd $1, 8($sp)
+; PIC-MIPS64R6-NEXT: .LBB0_4: # %sw.bb1
+; PIC-MIPS64R6-NEXT: ld $1, %got_page(.L.str.1)($2)
+; PIC-MIPS64R6-NEXT: daddiu $1, $1, %got_ofst(.L.str.1)
+; PIC-MIPS64R6-NEXT: b .LBB0_10
+; PIC-MIPS64R6-NEXT: sd $1, 8($sp)
+; PIC-MIPS64R6-NEXT: .LBB0_5: # %sw.bb2
+; PIC-MIPS64R6-NEXT: ld $1, %got_page(.L.str.2)($2)
+; PIC-MIPS64R6-NEXT: daddiu $1, $1, %got_ofst(.L.str.2)
+; PIC-MIPS64R6-NEXT: b .LBB0_10
+; PIC-MIPS64R6-NEXT: sd $1, 8($sp)
+; PIC-MIPS64R6-NEXT: .LBB0_6: # %sw.bb3
+; PIC-MIPS64R6-NEXT: ld $1, %got_page(.L.str.3)($2)
+; PIC-MIPS64R6-NEXT: daddiu $1, $1, %got_ofst(.L.str.3)
+; PIC-MIPS64R6-NEXT: b .LBB0_10
+; PIC-MIPS64R6-NEXT: sd $1, 8($sp)
+; PIC-MIPS64R6-NEXT: .LBB0_7: # %sw.bb4
+; PIC-MIPS64R6-NEXT: ld $1, %got_page(.L.str.4)($2)
+; PIC-MIPS64R6-NEXT: daddiu $1, $1, %got_ofst(.L.str.4)
+; PIC-MIPS64R6-NEXT: b .LBB0_10
+; PIC-MIPS64R6-NEXT: sd $1, 8($sp)
+; PIC-MIPS64R6-NEXT: .LBB0_8: # %sw.bb5
+; PIC-MIPS64R6-NEXT: ld $1, %got_page(.L.str.5)($2)
+; PIC-MIPS64R6-NEXT: daddiu $1, $1, %got_ofst(.L.str.5)
+; PIC-MIPS64R6-NEXT: b .LBB0_10
+; PIC-MIPS64R6-NEXT: sd $1, 8($sp)
+; PIC-MIPS64R6-NEXT: .LBB0_9: # %sw.bb6
+; PIC-MIPS64R6-NEXT: ld $1, %got_page(.L.str.6)($2)
+; PIC-MIPS64R6-NEXT: daddiu $1, $1, %got_ofst(.L.str.6)
+; PIC-MIPS64R6-NEXT: sd $1, 8($sp)
+; PIC-MIPS64R6-NEXT: .LBB0_10: # %return
+; PIC-MIPS64R6-NEXT: ld $2, 8($sp)
+; PIC-MIPS64R6-NEXT: jr $ra
+; PIC-MIPS64R6-NEXT: daddiu $sp, $sp, 16
+entry:
+ %retval = alloca i8*, align 8
+ %Letter.addr = alloca i32, align 4
+ store i32 %Letter, i32* %Letter.addr, align 4
+ %0 = load i32, i32* %Letter.addr, align 4
+ switch i32 %0, label %sw.epilog [
+ i32 0, label %sw.bb
+ i32 1, label %sw.bb1
+ i32 2, label %sw.bb2
+ i32 3, label %sw.bb3
+ i32 4, label %sw.bb4
+ i32 5, label %sw.bb5
+ i32 6, label %sw.bb6
+ ]
+
+sw.bb:
+ store i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str, i32 0, i32 0), i8** %retval, align 8
+ br label %return
+
+sw.bb1:
+ store i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i32 0, i32 0), i8** %retval, align 8
+ br label %return
+
+sw.bb2:
+ store i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.2, i32 0, i32 0), i8** %retval, align 8
+ br label %return
+
+sw.bb3:
+ store i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.3, i32 0, i32 0), i8** %retval, align 8
+ br label %return
+
+sw.bb4:
+ store i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.4, i32 0, i32 0), i8** %retval, align 8
+ br label %return
+
+sw.bb5:
+ store i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.5, i32 0, i32 0), i8** %retval, align 8
+ br label %return
+
+sw.bb6:
+ store i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.6, i32 0, i32 0), i8** %retval, align 8
+ br label %return
+
+sw.epilog:
+ store i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str.7, i32 0, i32 0), i8** %retval, align 8
+ br label %return
+
+return:
+ %1 = load i8*, i8** %retval, align 8
+ ret i8* %1
+}
diff --git a/test/CodeGen/Mips/indirect-jump-hazard/long-branch.ll b/test/CodeGen/Mips/indirect-jump-hazard/long-branch.ll
new file mode 100644
index 000000000000..fffda991ae4b
--- /dev/null
+++ b/test/CodeGen/Mips/indirect-jump-hazard/long-branch.ll
@@ -0,0 +1,138 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; Except for the NACL version which isn't parsed by update_llc_test_checks.py
+
+; RUN: llc -mtriple=mipsel-unknown-linux-gnu -force-mips-long-branch -O3 \
+; RUN: -mcpu=mips32r2 -mattr=+use-indirect-jump-hazard -relocation-model=pic \
+; RUN: -verify-machineinstrs < %s | FileCheck %s -check-prefix=O32-PIC
+
+; RUN: llc -mtriple=mipsel-unknown-linux-gnu -mcpu=mips32r6 \
+; RUN: -force-mips-long-branch -O3 -mattr=+use-indirect-jump-hazard \
+; RUN: -relocation-model=pic -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=O32-R6-PIC
+
+; RUN: llc -mtriple=mips64el-unknown-linux-gnu -mcpu=mips64r2 -target-abi=n64 \
+; RUN: -force-mips-long-branch -O3 -relocation-model=pic \
+; RUN: -mattr=+use-indirect-jump-hazard -verify-machineinstrs \
+; RUN: < %s | FileCheck %s -check-prefix=MIPS64
+
+; RUN: llc -mtriple=mips64el-unknown-linux-gnu -mcpu=mips64r6 -target-abi=n64 \
+; RUN: -force-mips-long-branch -O3 -mattr=+use-indirect-jump-hazard \
+; RUN: -relocation-model=pic -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=N64-R6
+
+; Test that the long branches also get changed to their hazard variants.
+
+@x = external global i32
+
+define void @test1(i32 signext %s) {
+; O32-PIC-LABEL: test1:
+; O32-PIC: # %bb.0: # %entry
+; O32-PIC-NEXT: lui $2, %hi(_gp_disp)
+; O32-PIC-NEXT: addiu $2, $2, %lo(_gp_disp)
+; O32-PIC-NEXT: bnez $4, $BB0_3
+; O32-PIC-NEXT: addu $2, $2, $25
+; O32-PIC-NEXT: # %bb.1: # %entry
+; O32-PIC-NEXT: addiu $sp, $sp, -8
+; O32-PIC-NEXT: sw $ra, 0($sp)
+; O32-PIC-NEXT: lui $1, %hi(($BB0_4)-($BB0_2))
+; O32-PIC-NEXT: bal $BB0_2
+; O32-PIC-NEXT: addiu $1, $1, %lo(($BB0_4)-($BB0_2))
+; O32-PIC-NEXT: $BB0_2: # %entry
+; O32-PIC-NEXT: addu $1, $ra, $1
+; O32-PIC-NEXT: lw $ra, 0($sp)
+; O32-PIC-NEXT: jr.hb $1
+; O32-PIC-NEXT: addiu $sp, $sp, 8
+; O32-PIC-NEXT: $BB0_3: # %then
+; O32-PIC-NEXT: lw $1, %got(x)($2)
+; O32-PIC-NEXT: addiu $2, $zero, 1
+; O32-PIC-NEXT: sw $2, 0($1)
+; O32-PIC-NEXT: $BB0_4: # %end
+; O32-PIC-NEXT: jr $ra
+; O32-PIC-NEXT: nop
+;
+; O32-R6-PIC-LABEL: test1:
+; O32-R6-PIC: # %bb.0: # %entry
+; O32-R6-PIC-NEXT: lui $2, %hi(_gp_disp)
+; O32-R6-PIC-NEXT: addiu $2, $2, %lo(_gp_disp)
+; O32-R6-PIC-NEXT: bnez $4, $BB0_3
+; O32-R6-PIC-NEXT: addu $2, $2, $25
+; O32-R6-PIC-NEXT: # %bb.1: # %entry
+; O32-R6-PIC-NEXT: addiu $sp, $sp, -8
+; O32-R6-PIC-NEXT: sw $ra, 0($sp)
+; O32-R6-PIC-NEXT: lui $1, %hi(($BB0_4)-($BB0_2))
+; O32-R6-PIC-NEXT: addiu $1, $1, %lo(($BB0_4)-($BB0_2))
+; O32-R6-PIC-NEXT: balc $BB0_2
+; O32-R6-PIC-NEXT: $BB0_2: # %entry
+; O32-R6-PIC-NEXT: addu $1, $ra, $1
+; O32-R6-PIC-NEXT: lw $ra, 0($sp)
+; O32-R6-PIC-NEXT: jr.hb $1
+; O32-R6-PIC-NEXT: addiu $sp, $sp, 8
+; O32-R6-PIC-NEXT: $BB0_3: # %then
+; O32-R6-PIC-NEXT: lw $1, %got(x)($2)
+; O32-R6-PIC-NEXT: addiu $2, $zero, 1
+; O32-R6-PIC-NEXT: sw $2, 0($1)
+; O32-R6-PIC-NEXT: $BB0_4: # %end
+; O32-R6-PIC-NEXT: jrc $ra
+;
+; MIPS64-LABEL: test1:
+; MIPS64: # %bb.0: # %entry
+; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(test1)))
+; MIPS64-NEXT: bnez $4, .LBB0_3
+; MIPS64-NEXT: daddu $2, $1, $25
+; MIPS64-NEXT: # %bb.1: # %entry
+; MIPS64-NEXT: daddiu $sp, $sp, -16
+; MIPS64-NEXT: sd $ra, 0($sp)
+; MIPS64-NEXT: daddiu $1, $zero, %hi(.LBB0_4-.LBB0_2)
+; MIPS64-NEXT: dsll $1, $1, 16
+; MIPS64-NEXT: bal .LBB0_2
+; MIPS64-NEXT: daddiu $1, $1, %lo(.LBB0_4-.LBB0_2)
+; MIPS64-NEXT: .LBB0_2: # %entry
+; MIPS64-NEXT: daddu $1, $ra, $1
+; MIPS64-NEXT: ld $ra, 0($sp)
+; MIPS64-NEXT: jr.hb $1
+; MIPS64-NEXT: daddiu $sp, $sp, 16
+; MIPS64-NEXT: .LBB0_3: # %then
+; MIPS64-NEXT: daddiu $1, $2, %lo(%neg(%gp_rel(test1)))
+; MIPS64-NEXT: addiu $2, $zero, 1
+; MIPS64-NEXT: ld $1, %got_disp(x)($1)
+; MIPS64-NEXT: sw $2, 0($1)
+; MIPS64-NEXT: .LBB0_4: # %end
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: nop
+;
+; N64-R6-LABEL: test1:
+; N64-R6: # %bb.0: # %entry
+; N64-R6-NEXT: lui $1, %hi(%neg(%gp_rel(test1)))
+; N64-R6-NEXT: bnez $4, .LBB0_3
+; N64-R6-NEXT: daddu $2, $1, $25
+; N64-R6-NEXT: # %bb.1: # %entry
+; N64-R6-NEXT: daddiu $sp, $sp, -16
+; N64-R6-NEXT: sd $ra, 0($sp)
+; N64-R6-NEXT: daddiu $1, $zero, %hi(.LBB0_4-.LBB0_2)
+; N64-R6-NEXT: dsll $1, $1, 16
+; N64-R6-NEXT: daddiu $1, $1, %lo(.LBB0_4-.LBB0_2)
+; N64-R6-NEXT: balc .LBB0_2
+; N64-R6-NEXT: .LBB0_2: # %entry
+; N64-R6-NEXT: daddu $1, $ra, $1
+; N64-R6-NEXT: ld $ra, 0($sp)
+; N64-R6-NEXT: jr.hb $1
+; N64-R6-NEXT: daddiu $sp, $sp, 16
+; N64-R6-NEXT: .LBB0_3: # %then
+; N64-R6-NEXT: daddiu $1, $2, %lo(%neg(%gp_rel(test1)))
+; N64-R6-NEXT: addiu $2, $zero, 1
+; N64-R6-NEXT: ld $1, %got_disp(x)($1)
+; N64-R6-NEXT: sw $2, 0($1)
+; N64-R6-NEXT: .LBB0_4: # %end
+; N64-R6-NEXT: jrc $ra
+entry:
+ %cmp = icmp eq i32 %s, 0
+ br i1 %cmp, label %end, label %then
+
+then:
+ store i32 1, i32* @x, align 4
+ br label %end
+
+end:
+ ret void
+
+}
diff --git a/test/CodeGen/Mips/indirect-jump-hazard/long-calls.ll b/test/CodeGen/Mips/indirect-jump-hazard/long-calls.ll
new file mode 100644
index 000000000000..88886e13f326
--- /dev/null
+++ b/test/CodeGen/Mips/indirect-jump-hazard/long-calls.ll
@@ -0,0 +1,113 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=mips-unknwon-linux-gnu -mcpu=mips32r2 \
+; RUN: -mattr=+use-indirect-jump-hazard,+long-calls,+noabicalls %s -o - \
+; RUN: -verify-machineinstrs | FileCheck -check-prefix=O32 %s
+
+; RUN: llc -mtriple=mips64-unknown-linux-gnu -mcpu=mips64r2 -target-abi n32 \
+; RUN: -mattr=+use-indirect-jump-hazard,+long-calls,+noabicalls %s -o - \
+; RUN: -verify-machineinstrs | FileCheck -check-prefix=N32 %s
+
+; RUN: llc -mtriple=mips64-unknown-linux-gnu -mcpu=mips64r2 -target-abi n64 \
+; RUN: -mattr=+use-indirect-jump-hazard,+long-calls,+noabicalls %s -o - \
+; RUN: -verify-machineinstrs | FileCheck -check-prefix=N64 %s
+
+declare void @callee()
+declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i32, i1)
+
+@val = internal unnamed_addr global [20 x i32] zeroinitializer, align 4
+
+; Test that the long call sequence uses the hazard barrier instruction variant.
+define void @caller() {
+; O32-LABEL: caller:
+; O32: # %bb.0:
+; O32-NEXT: addiu $sp, $sp, -24
+; O32-NEXT: .cfi_def_cfa_offset 24
+; O32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
+; O32-NEXT: .cfi_offset 31, -4
+; O32-NEXT: lui $1, %hi(callee)
+; O32-NEXT: addiu $25, $1, %lo(callee)
+; O32-NEXT: jalr.hb $25
+; O32-NEXT: nop
+; O32-NEXT: lui $1, %hi(val)
+; O32-NEXT: addiu $1, $1, %lo(val)
+; O32-NEXT: lui $2, 20560
+; O32-NEXT: ori $2, $2, 20560
+; O32-NEXT: sw $2, 96($1)
+; O32-NEXT: sw $2, 92($1)
+; O32-NEXT: sw $2, 88($1)
+; O32-NEXT: sw $2, 84($1)
+; O32-NEXT: sw $2, 80($1)
+; O32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
+; O32-NEXT: jr $ra
+; O32-NEXT: addiu $sp, $sp, 24
+;
+; N32-LABEL: caller:
+; N32: # %bb.0:
+; N32-NEXT: addiu $sp, $sp, -16
+; N32-NEXT: .cfi_def_cfa_offset 16
+; N32-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
+; N32-NEXT: .cfi_offset 31, -8
+; N32-NEXT: lui $1, %hi(callee)
+; N32-NEXT: addiu $25, $1, %lo(callee)
+; N32-NEXT: jalr.hb $25
+; N32-NEXT: nop
+; N32-NEXT: lui $1, %hi(val)
+; N32-NEXT: addiu $1, $1, %lo(val)
+; N32-NEXT: lui $2, 1285
+; N32-NEXT: daddiu $2, $2, 1285
+; N32-NEXT: dsll $2, $2, 16
+; N32-NEXT: daddiu $2, $2, 1285
+; N32-NEXT: dsll $2, $2, 20
+; N32-NEXT: daddiu $2, $2, 20560
+; N32-NEXT: sdl $2, 88($1)
+; N32-NEXT: sdl $2, 80($1)
+; N32-NEXT: lui $3, 20560
+; N32-NEXT: ori $3, $3, 20560
+; N32-NEXT: sw $3, 96($1)
+; N32-NEXT: sdr $2, 95($1)
+; N32-NEXT: sdr $2, 87($1)
+; N32-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
+; N32-NEXT: jr $ra
+; N32-NEXT: addiu $sp, $sp, 16
+;
+; N64-LABEL: caller:
+; N64: # %bb.0:
+; N64-NEXT: daddiu $sp, $sp, -16
+; N64-NEXT: .cfi_def_cfa_offset 16
+; N64-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
+; N64-NEXT: .cfi_offset 31, -8
+; N64-NEXT: lui $1, %highest(callee)
+; N64-NEXT: daddiu $1, $1, %higher(callee)
+; N64-NEXT: dsll $1, $1, 16
+; N64-NEXT: daddiu $1, $1, %hi(callee)
+; N64-NEXT: dsll $1, $1, 16
+; N64-NEXT: daddiu $25, $1, %lo(callee)
+; N64-NEXT: jalr.hb $25
+; N64-NEXT: nop
+; N64-NEXT: lui $1, %highest(val)
+; N64-NEXT: daddiu $1, $1, %higher(val)
+; N64-NEXT: dsll $1, $1, 16
+; N64-NEXT: daddiu $1, $1, %hi(val)
+; N64-NEXT: dsll $1, $1, 16
+; N64-NEXT: daddiu $1, $1, %lo(val)
+; N64-NEXT: lui $2, 1285
+; N64-NEXT: daddiu $2, $2, 1285
+; N64-NEXT: dsll $2, $2, 16
+; N64-NEXT: daddiu $2, $2, 1285
+; N64-NEXT: dsll $2, $2, 20
+; N64-NEXT: daddiu $2, $2, 20560
+; N64-NEXT: lui $3, 20560
+; N64-NEXT: sdl $2, 88($1)
+; N64-NEXT: sdl $2, 80($1)
+; N64-NEXT: ori $3, $3, 20560
+; N64-NEXT: sw $3, 96($1)
+; N64-NEXT: sdr $2, 95($1)
+; N64-NEXT: sdr $2, 87($1)
+; N64-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
+; N64-NEXT: jr $ra
+; N64-NEXT: daddiu $sp, $sp, 16
+ call void @callee()
+ call void @llvm.memset.p0i8.i32(i8* bitcast (i32* getelementptr inbounds ([20 x i32], [20 x i32]* @val, i64 1, i32 0) to i8*), i8 80, i32 20, i32 4, i1 false)
+ ret void
+}
+
diff --git a/test/CodeGen/Mips/indirect-jump-hazard/unsupported-micromips.ll b/test/CodeGen/Mips/indirect-jump-hazard/unsupported-micromips.ll
new file mode 100644
index 000000000000..99612525ae3c
--- /dev/null
+++ b/test/CodeGen/Mips/indirect-jump-hazard/unsupported-micromips.ll
@@ -0,0 +1,5 @@
+; RUN: not llc -mtriple=mips-unknown-linux -mcpu=mips32r2 -mattr=+micromips,+use-indirect-jump-hazard %s 2>&1 | FileCheck %s
+
+; Test that microMIPS and indirect jump with hazard barriers is not supported.
+
+; CHECK: LLVM ERROR: cannot combine indirect jumps with hazard barriers and microMIPS
diff --git a/test/CodeGen/Mips/indirect-jump-hazard/unsupported-mips32.ll b/test/CodeGen/Mips/indirect-jump-hazard/unsupported-mips32.ll
new file mode 100644
index 000000000000..48baedf53eaa
--- /dev/null
+++ b/test/CodeGen/Mips/indirect-jump-hazard/unsupported-mips32.ll
@@ -0,0 +1,5 @@
+; RUN: not llc -mtriple=mips-unknown-linux -mcpu=mips32 -mattr=+use-indirect-jump-hazard %s 2>&1 | FileCheck %s
+
+; Test that mips32 and indirect jump with hazard barriers is not supported.
+
+; CHECK: LLVM ERROR: indirect jumps with hazard barriers requires MIPS32R2 or later
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-l1.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-l1.ll
new file mode 100644
index 000000000000..1cd86d617a24
--- /dev/null
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-l1.ll
@@ -0,0 +1,13 @@
+; Negative test. The constraint 'l' represents the register 'lo'.
+; Check error message in case of invalid usage.
+;
+; RUN: not llc -march=mips -filetype=obj < %s 2>&1 | FileCheck %s
+
+define void @constraint_l() nounwind {
+entry:
+
+; CHECK: error: invalid operand for instruction
+
+ tail call i16 asm sideeffect "addiu $0,$1,$2", "=l,r,r,~{$1}"(i16 0, i16 0)
+ ret void
+}
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll
index 63ee42c0c7cd..b4c1587a8fbf 100644
--- a/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll
@@ -41,5 +41,15 @@ entry:
call i32 asm sideeffect "\09mtlo $3 \0A\09\09madd $1, $2 ", "=l,r,r,r"(i32 7, i32 6, i32 44) nounwind
store volatile i32 %4, i32* %bosco, align 4
+; Check the 'l' constraint for 16-bit type.
+; CHECK: #APP
+; CHECK: mtlo ${{[0-9]+}}
+; CHECK-NEXT: madd ${{[0-9]+}}, ${{[0-9]+}}
+; CHECK: #NO_APP
+; CHECK-NEXT: mflo ${{[0-9]+}}
+ %bosco16 = alloca i16, align 4
+ call i16 asm sideeffect "\09mtlo $3 \0A\09\09madd $1, $2 ", "=l,r,r,r"(i32 7, i32 6, i32 44) nounwind
+ store volatile i16 %5, i16* %bosco16, align 4
+
ret i32 0
}
diff --git a/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir b/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir
index 67733795ed5d..f2ca07367b99 100644
--- a/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir
+++ b/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir
@@ -561,6 +561,25 @@
}
; Function Attrs: norecurse nounwind readnone
+ define i64 @testRLDICLo2(i64 %a, i64 %b) local_unnamed_addr #0 {
+ entry:
+ %shr = lshr i64 %a, 11
+ %and = and i64 %shr, 16777215
+ %tobool = icmp eq i64 %and, 0
+ %cond = select i1 %tobool, i64 %b, i64 %and
+ ret i64 %cond
+ }
+
+ define i64 @testRLDICLo3(i64 %a, i64 %b) local_unnamed_addr #0 {
+ entry:
+ %shr = lshr i64 %a, 11
+ %and = and i64 %shr, 16777215
+ %tobool = icmp eq i64 %and, 0
+ %cond = select i1 %tobool, i64 %b, i64 %and
+ ret i64 %cond
+ }
+
+ ; Function Attrs: norecurse nounwind readnone
define zeroext i32 @testRLWINM(i32 zeroext %a) local_unnamed_addr #0 {
entry:
%shl = shl i32 %a, 4
@@ -602,6 +621,15 @@
}
; Function Attrs: norecurse nounwind readnone
+ define zeroext i32 @testRLWINMo2(i32 zeroext %a, i32 zeroext %b) local_unnamed_addr #0 {
+ entry:
+ %and = and i32 %a, 255
+ %tobool = icmp eq i32 %and, 0
+ %cond = select i1 %tobool, i32 %b, i32 %a
+ ret i32 %cond
+ }
+
+ ; Function Attrs: norecurse nounwind readnone
define i64 @testRLWINM8o(i64 %a, i64 %b) local_unnamed_addr #0 {
entry:
%a.tr = trunc i64 %a to i32
@@ -3904,6 +3932,113 @@ body: |
...
---
+name: testRLDICLo2
+# CHECK-ALL: name: testRLDICLo2
+alignment: 4
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: g8rc, preferred-register: '' }
+ - { id: 1, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+ - { id: 2, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+ - { id: 3, class: crrc, preferred-register: '' }
+ - { id: 4, class: g8rc, preferred-register: '' }
+liveins:
+ - { reg: '%x3', virtual-reg: '%0' }
+ - { reg: '%x4', virtual-reg: '%1' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ maxCallFrameSize: 4294967295
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ savePoint: ''
+ restorePoint: ''
+fixedStack:
+stack:
+constants:
+body: |
+ bb.0.entry:
+ liveins: %x3, %x4
+
+ %1 = COPY %x4
+ %0 = LI8 200
+ %2 = RLDICLo %0, 61, 3, implicit-def %cr0
+ ; CHECK-NOT: ANDI
+ ; CHECK-LATE-NOT: andi.
+ %3 = COPY killed %cr0
+ %4 = ISEL8 %1, %2, %3.sub_eq
+ %x3 = COPY %4
+ BLR8 implicit %lr8, implicit %rm, implicit %x3
+
+...
+---
+name: testRLDICLo3
+# CHECK-ALL: name: testRLDICLo3
+alignment: 4
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: g8rc, preferred-register: '' }
+ - { id: 1, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+ - { id: 2, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+ - { id: 3, class: crrc, preferred-register: '' }
+ - { id: 4, class: g8rc, preferred-register: '' }
+liveins:
+ - { reg: '%x3', virtual-reg: '%0' }
+ - { reg: '%x4', virtual-reg: '%1' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ maxCallFrameSize: 4294967295
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ savePoint: ''
+ restorePoint: ''
+fixedStack:
+stack:
+constants:
+body: |
+ bb.0.entry:
+ liveins: %x3, %x4
+
+ %1 = COPY %x4
+ %0 = LI8 2
+ %2 = RLDICLo %0, 32, 32, implicit-def %cr0
+ ; CHECK: ANDIo8 %0, 0
+ ; CHECK-LATE: li 3, 2
+ ; CHECK-LATE: andi. 3, 3, 0
+ %3 = COPY killed %cr0
+ %4 = ISEL8 %1, %2, %3.sub_eq
+ %x3 = COPY %4
+ BLR8 implicit %lr8, implicit %rm, implicit %x3
+
+...
+---
name: testRLWINM
# CHECK-ALL: name: testRLWINM
alignment: 4
@@ -4170,6 +4305,69 @@ body: |
...
---
+name: testRLWINMo2
+# CHECK-ALL: name: testRLWINMo2
+alignment: 4
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: g8rc, preferred-register: '' }
+ - { id: 1, class: g8rc, preferred-register: '' }
+ - { id: 2, class: gprc_and_gprc_nor0, preferred-register: '' }
+ - { id: 3, class: gprc_and_gprc_nor0, preferred-register: '' }
+ - { id: 4, class: gprc, preferred-register: '' }
+ - { id: 5, class: crrc, preferred-register: '' }
+ - { id: 6, class: gprc, preferred-register: '' }
+ - { id: 7, class: g8rc, preferred-register: '' }
+ - { id: 8, class: g8rc, preferred-register: '' }
+ - { id: 9, class: g8rc, preferred-register: '' }
+liveins:
+ - { reg: '%x3', virtual-reg: '%0' }
+ - { reg: '%x4', virtual-reg: '%1' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ maxCallFrameSize: 4294967295
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ savePoint: ''
+ restorePoint: ''
+fixedStack:
+stack:
+constants:
+body: |
+ bb.0.entry:
+ liveins: %x3, %x4
+
+ %1 = COPY %x4
+ %0 = COPY %x3
+ %2 = COPY %1.sub_32
+ %3 = LI -22
+ %4 = RLWINMo %3, 5, 24, 31, implicit-def %cr0
+ ; CHECK-NOT: ANDI
+ ; CHECK-LATE-NOT: andi.
+ %5 = COPY killed %cr0
+ %6 = ISEL %2, %3, %5.sub_eq
+ %8 = IMPLICIT_DEF
+ %7 = INSERT_SUBREG %8, killed %6, 1
+ %9 = RLDICL killed %7, 0, 32
+ %x3 = COPY %9
+ BLR8 implicit %lr8, implicit %rm, implicit %x3
+
+...
+---
name: testRLWINM8o
# CHECK-ALL: name: testRLWINM8o
alignment: 4
diff --git a/test/CodeGen/PowerPC/no-dup-of-bdnz.ll b/test/CodeGen/PowerPC/no-dup-of-bdnz.ll
new file mode 100644
index 000000000000..7d72242aa457
--- /dev/null
+++ b/test/CodeGen/PowerPC/no-dup-of-bdnz.ll
@@ -0,0 +1,75 @@
+; RUN: opt -early-cse-memssa -loop-rotate -licm -loop-rotate -S %s -o - | FileCheck %s
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+source_filename = "bugpoint-output-8903f29.bc"
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+define void @test(i64 %arg.ssa, i64 %arg.nb) local_unnamed_addr {
+; Ensure that loop rotation doesn't duplicate the call to
+; llvm.ppc.is.decremented.ctr.nonzero
+; CHECK-LABEL: test
+; CHECK: call i1 @llvm.ppc.is.decremented.ctr.nonzero
+; CHECK-NOT: call i1 @llvm.ppc.is.decremented.ctr.nonzero
+; CHECK: declare i1 @llvm.ppc.is.decremented.ctr.nonzero
+entry:
+ switch i32 undef, label %BB_8 [
+ i32 -2, label %BB_9
+ i32 0, label %BB_9
+ ]
+
+BB_1: ; preds = %BB_12, %BB_4
+ %bcount.1.us = phi i64 [ %.810.us, %BB_4 ], [ 0, %BB_12 ]
+ %0 = add i64 %arg.ssa, %bcount.1.us
+ %.568.us = load i32, i32* undef, align 4
+ %.15.i.us = icmp slt i32 0, %.568.us
+ br i1 %.15.i.us, label %BB_3, label %BB_2
+
+BB_2: ; preds = %BB_1
+ %.982.us = add nsw i64 %0, 1
+ unreachable
+
+BB_3: ; preds = %BB_1
+ %1 = add i64 %arg.ssa, %bcount.1.us
+ %2 = add i64 %1, 1
+ %3 = call i1 @llvm.ppc.is.decremented.ctr.nonzero()
+ br i1 %3, label %BB_4, label %BB_7
+
+BB_4: ; preds = %BB_3
+ %.810.us = add nuw nsw i64 %bcount.1.us, 1
+ br label %BB_1
+
+BB_5: ; preds = %BB_7, %BB_5
+ %lsr.iv20.i116 = phi i64 [ %2, %BB_7 ], [ %lsr.iv.next21.i126, %BB_5 ]
+ %lsr.iv.next21.i126 = add i64 %lsr.iv20.i116, 1
+ br i1 undef, label %BB_5, label %BB_6
+
+BB_6: ; preds = %BB_5
+ ret void
+
+BB_7: ; preds = %BB_3
+ br label %BB_5
+
+BB_8: ; preds = %entry
+ ret void
+
+BB_9: ; preds = %entry, %entry
+ br label %BB_10
+
+BB_10: ; preds = %BB_9
+ br label %BB_11
+
+BB_11: ; preds = %BB_11, %BB_10
+ br i1 undef, label %BB_11, label %BB_12
+
+BB_12: ; preds = %BB_11
+ call void @llvm.ppc.mtctr.i64(i64 %arg.nb)
+ br label %BB_1
+}
+
+; Function Attrs: nounwind
+declare void @llvm.ppc.mtctr.i64(i64) #0
+
+; Function Attrs: nounwind
+declare i1 @llvm.ppc.is.decremented.ctr.nonzero() #0
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/PowerPC/pr35402.ll b/test/CodeGen/PowerPC/pr35402.ll
new file mode 100644
index 000000000000..06e6d963b13f
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr35402.ll
@@ -0,0 +1,18 @@
+; RUN: llc -O2 < %s | FileCheck %s
+target triple = "powerpc64le-linux-gnu"
+
+define void @test(i8* %p, i64 %data) {
+entry:
+ %0 = tail call i64 @llvm.bswap.i64(i64 %data)
+ %ptr = bitcast i8* %p to i48*
+ %val = trunc i64 %0 to i48
+ store i48 %val, i48* %ptr, align 1
+ ret void
+
+; CHECK: sth
+; CHECK: stw
+; CHECK-NOT: stdbrx
+
+}
+
+declare i64 @llvm.bswap.i64(i64)
diff --git a/test/CodeGen/Thumb/PR36658.mir b/test/CodeGen/Thumb/PR36658.mir
new file mode 100644
index 000000000000..15a3c7f407b1
--- /dev/null
+++ b/test/CodeGen/Thumb/PR36658.mir
@@ -0,0 +1,359 @@
+# REQUIRES: asserts
+# RUN: llc -run-pass arm-cp-islands %s -o - | FileCheck %s
+#
+# This is a reduced test made to expose a bug in
+# ARMConstantIslandPass in Thumb1 mode, see PR36658.
+
+# Verify optimized JT code uses TBB instructions.
+# CHECK-LABEL: bb.7.entry:
+# CHECK: tTBB_JT %pc, killed %r2, %jump-table.1, 0
+# CHECK-LABEL: bb.8:
+# CHECK: JUMPTABLE_TBB 0, %jump-table.1, 44
+
+# CHECK-LABEL: bb.11.entry:
+# CHECK: %r1 = tMOVSr %r0, implicit-def dead %cpsr
+# CHECK: tTBB_JT %pc, killed %r2, %jump-table.0, 1
+# CHECK-LABEL: bb.12:
+# CHECK: JUMPTABLE_TBB 1, %jump-table.0, 44
+
+--- |
+ ; ModuleID = 'PR36658.ll'
+ source_filename = "PR36658.ll"
+ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+ target triple = "thumbv5e-none-linux-gnueabi"
+
+ declare i32 @foo1(...)
+
+ declare i32 @foo2(i32)
+
+ declare i32 @foo3(i32*)
+
+ ; Function Attrs: nounwind optsize
+ define internal fastcc i32 @foo4(i32* nocapture %ignore_ptr) #0 {
+ entry:
+ %call = tail call i32 @foo3(i32* undef)
+ switch i32 %call, label %sw.epilog [
+ i32 120, label %sw.bb
+ i32 48, label %sw.bb73
+ i32 49, label %sw.bb73
+ i32 50, label %sw.bb73
+ i32 51, label %sw.bb73
+ i32 52, label %sw.bb73
+ i32 53, label %sw.bb73
+ i32 54, label %sw.bb73
+ i32 55, label %sw.bb73
+ i32 92, label %cleanup
+ i32 39, label %cleanup
+ i32 34, label %cleanup
+ i32 10, label %sw.bb91
+ i32 110, label %sw.bb93
+ i32 116, label %sw.bb94
+ i32 114, label %sw.bb95
+ i32 102, label %sw.bb96
+ i32 98, label %sw.bb97
+ i32 97, label %sw.bb98
+ i32 118, label %sw.bb106
+ i32 101, label %sw.bb107
+ i32 69, label %sw.bb107
+ i32 63, label %cleanup
+ ]
+
+ sw.bb: ; preds = %entry
+ br label %while.cond
+
+ while.cond: ; preds = %while.cond, %sw.bb
+ %call5 = tail call i32 @foo3(i32* null)
+ br label %while.cond
+
+ sw.bb73: ; preds = %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry
+ %0 = and i32 %call, -8
+ %1 = icmp eq i32 %0, 48
+ br i1 %1, label %while.body83.preheader, label %while.end88
+
+ while.body83.preheader: ; preds = %sw.bb73
+ br label %while.body83
+
+ while.body83: ; preds = %while.body83.preheader, %while.body83
+ %call87 = tail call i32 @foo3(i32* null)
+ br label %while.body83
+
+ while.end88: ; preds = %sw.bb73
+ %call89 = tail call i32 @foo2(i32 %call)
+ unreachable
+
+ sw.bb91: ; preds = %entry
+ store i32 1, i32* %ignore_ptr, align 4
+ br label %cleanup
+
+ sw.bb93: ; preds = %entry
+ br label %cleanup
+
+ sw.bb94: ; preds = %entry
+ br label %cleanup
+
+ sw.bb95: ; preds = %entry
+ br label %cleanup
+
+ sw.bb96: ; preds = %entry
+ br label %cleanup
+
+ sw.bb97: ; preds = %entry
+ br label %cleanup
+
+ sw.bb98: ; preds = %entry
+ br label %cleanup
+
+ sw.bb106: ; preds = %entry
+ br label %cleanup
+
+ sw.bb107: ; preds = %entry, %entry
+ br i1 undef, label %cleanup, label %if.then109
+
+ if.then109: ; preds = %sw.bb107
+ %call110 = tail call i32 bitcast (i32 (...)* @foo1 to i32 (i8*, i32)*)(i8* undef, i32 %call)
+ unreachable
+
+ sw.epilog: ; preds = %entry
+ %call.off = add i32 %call, -32
+ unreachable
+
+ cleanup: ; preds = %sw.bb107, %sw.bb106, %sw.bb98, %sw.bb97, %sw.bb96, %sw.bb95, %sw.bb94, %sw.bb93, %sw.bb91, %entry, %entry, %entry, %entry
+ %retval.0 = phi i32 [ 11, %sw.bb106 ], [ 7, %sw.bb98 ], [ 8, %sw.bb97 ], [ 12, %sw.bb96 ], [ 13, %sw.bb95 ], [ 9, %sw.bb94 ], [ 10, %sw.bb93 ], [ 0, %sw.bb91 ], [ %call, %entry ], [ %call, %entry ], [ %call, %entry ], [ 27, %sw.bb107 ], [ %call, %entry ]
+ ret i32 %retval.0
+ }
+
+ ; Function Attrs: nounwind
+ declare void @llvm.stackprotector(i8*, i8**) #1
+
+ attributes #0 = { nounwind optsize }
+ attributes #1 = { nounwind }
+
+...
+---
+name: foo4
+alignment: 1
+tracksRegLiveness: true
+liveins:
+ - { reg: '%r0' }
+frameInfo:
+ stackSize: 8
+ maxAlignment: 4
+ adjustsStack: true
+ hasCalls: true
+ maxCallFrameSize: 0
+stack:
+ - { id: 0, type: spill-slot, offset: -4, size: 4, alignment: 4, stack-id: 0,
+ callee-saved-register: '%lr', callee-saved-restored: false }
+ - { id: 1, type: spill-slot, offset: -8, size: 4, alignment: 4, stack-id: 0,
+ callee-saved-register: '%r4' }
+jumpTable:
+ kind: inline
+ entries:
+ - id: 0
+ blocks: [ '%bb.28', '%bb.26', '%bb.26', '%bb.26', '%bb.26',
+ '%bb.24', '%bb.23', '%bb.26', '%bb.26', '%bb.12',
+ '%bb.22' ]
+ - id: 1
+ blocks: [ '%bb.19', '%bb.26', '%bb.26', '%bb.26', '%bb.21',
+ '%bb.26', '%bb.20', '%bb.26', '%bb.25', '%bb.26',
+ '%bb.15' ]
+body: |
+ bb.0.entry:
+ successors: %bb.1(0x42c8590b), %bb.9(0x3d37a6f5)
+ liveins: %r0, %r4, %lr
+
+ frame-setup tPUSH 14, %noreg, killed %r4, killed %lr, implicit-def %sp, implicit %sp
+ frame-setup CFI_INSTRUCTION def_cfa_offset 8
+ frame-setup CFI_INSTRUCTION offset %lr, -4
+ frame-setup CFI_INSTRUCTION offset %r4, -8
+ %r4 = tMOVSr %r0, implicit-def dead %cpsr
+ tBL 14, %noreg, @foo3, csr_aapcs, implicit-def dead %lr, implicit %sp, implicit undef %r0, implicit-def %sp, implicit-def %r0
+ %r1 = tMOVSr %r0, implicit-def dead %cpsr
+ tCMPi8 %r0, 68, 14, %noreg, implicit-def %cpsr
+ tBcc %bb.9, 12, killed %cpsr
+
+ bb.1.entry:
+ successors: %bb.2(0x20000000), %bb.7(0x60000000)
+ liveins: %r0, %r1, %r4
+
+ tCMPi8 renamable %r1, 47, 14, %noreg, implicit-def %cpsr
+ tBcc %bb.2, 13, killed %cpsr
+
+ bb.7.entry:
+ successors: %bb.16(0x71c71c72), %bb.8(0x0e38e38e)
+ liveins: %r0, %r1
+
+ %r2 = tMOVSr %r1, implicit-def dead %cpsr
+ renamable %r2, dead %cpsr = tSUBi8 killed renamable %r2, 48, 14, %noreg
+ tCMPi8 killed renamable %r2, 8, 14, %noreg, implicit-def %cpsr
+ tBcc %bb.8, 2, killed %cpsr
+
+ bb.16.sw.bb73:
+ successors: %bb.17(0x7fffffff), %bb.18(0x00000001)
+ liveins: %r0, %r1
+
+ renamable %r2, dead %cpsr = tMOVi8 7, 14, %noreg
+ renamable %r1, dead %cpsr = tBIC killed renamable %r1, killed renamable %r2, 14, %noreg
+ tCMPi8 killed renamable %r1, 48, 14, %noreg, implicit-def %cpsr
+ tBcc %bb.18, 1, killed %cpsr
+
+ bb.17.while.body83:
+ renamable %r0, dead %cpsr = tMOVi8 0, 14, %noreg
+ tBL 14, %noreg, @foo3, csr_aapcs, implicit-def dead %lr, implicit %sp, implicit %r0, implicit-def %sp, implicit-def dead %r0
+ tB %bb.17, 14, %noreg
+
+ bb.9.entry:
+ successors: %bb.10(0x45d1745d), %bb.29(0x3a2e8ba3)
+ liveins: %r0, %r1
+
+ %r2 = tMOVSr %r1, implicit-def dead %cpsr
+ renamable %r2, dead %cpsr = tSUBi8 killed renamable %r2, 92, 14, %noreg
+ tCMPi8 renamable %r2, 10, 14, %noreg, implicit-def %cpsr
+ tBcc %bb.29, 9, killed %cpsr
+
+ bb.10.entry:
+ successors: %bb.11(0x15555555), %bb.14(0x6aaaaaab)
+ liveins: %r0, %r1
+
+ %r2 = tMOVSr %r1, implicit-def dead %cpsr
+ renamable %r2, dead %cpsr = tSUBi8 killed renamable %r2, 110, 14, %noreg
+ tCMPi8 renamable %r2, 10, 14, %noreg, implicit-def %cpsr
+ tBcc %bb.11, 8, killed %cpsr
+
+ bb.14.entry:
+ successors: %bb.19(0x1999999a), %bb.26(0x00000000), %bb.21(0x1999999a), %bb.20(0x1999999a), %bb.25(0x1999999a), %bb.15(0x1999999a)
+ liveins: %r2
+
+ renamable %r0, dead %cpsr = tLSLri killed renamable %r2, 2, 14, %noreg
+ renamable %r1 = tLEApcrelJT %jump-table.1, 14, %noreg
+ renamable %r0 = tLDRr killed renamable %r1, killed renamable %r0, 14, %noreg :: (load 4 from jump-table)
+ tBR_JTr killed renamable %r0, %jump-table.1
+
+ bb.19.sw.bb93:
+ renamable %r1, dead %cpsr = tMOVi8 10, 14, %noreg
+ tB %bb.28, 14, %noreg
+
+ bb.15.while.cond:
+ renamable %r0, dead %cpsr = tMOVi8 0, 14, %noreg
+ tBL 14, %noreg, @foo3, csr_aapcs, implicit-def dead %lr, implicit %sp, implicit %r0, implicit-def %sp, implicit-def dead %r0
+ tB %bb.15, 14, %noreg
+
+ bb.29.entry:
+ successors: %bb.28(0x1999999a), %bb.26(0x00000000), %bb.24(0x1999999a), %bb.23(0x1999999a), %bb.12(0x1999999a), %bb.22(0x1999999a)
+ liveins: %r0, %r2
+
+ renamable %r1, dead %cpsr = tLSLri killed renamable %r2, 2, 14, %noreg
+ renamable %r2 = tLEApcrelJT %jump-table.0, 14, %noreg
+ renamable %r2 = tLDRr killed renamable %r2, killed renamable %r1, 14, %noreg :: (load 4 from jump-table)
+ %r1 = tMOVSr %r0, implicit-def dead %cpsr
+ tBR_JTr killed renamable %r2, %jump-table.0
+
+ bb.24.sw.bb98:
+ renamable %r1, dead %cpsr = tMOVi8 7, 14, %noreg
+ tB %bb.28, 14, %noreg
+
+ bb.2.entry:
+ successors: %bb.27(0x2aaaaaab), %bb.3(0x55555555)
+ liveins: %r0, %r1, %r4
+
+ tCMPi8 renamable %r1, 10, 14, %noreg, implicit-def %cpsr
+ tBcc %bb.27, 0, killed %cpsr
+
+ bb.3.entry:
+ successors: %bb.4, %bb.5
+ liveins: %r0, %r1
+
+ tCMPi8 renamable %r1, 34, 14, %noreg, implicit-def %cpsr
+ tBcc %bb.5, 1, killed %cpsr
+
+ bb.4:
+ liveins: %r0
+
+ %r1 = tMOVSr killed %r0, implicit-def dead %cpsr
+ tB %bb.28, 14, %noreg
+
+ bb.25.sw.bb106:
+ renamable %r1, dead %cpsr = tMOVi8 11, 14, %noreg
+ tB %bb.28, 14, %noreg
+
+ bb.23.sw.bb97:
+ renamable %r1, dead %cpsr = tMOVi8 8, 14, %noreg
+ tB %bb.28, 14, %noreg
+
+ bb.27.sw.bb91:
+ liveins: %r4
+
+ renamable %r0, dead %cpsr = tMOVi8 1, 14, %noreg
+ tSTRi killed renamable %r0, killed renamable %r4, 0, 14, %noreg :: (store 4 into %ir.ignore_ptr)
+ renamable %r1, dead %cpsr = tMOVi8 0, 14, %noreg
+ tB %bb.28, 14, %noreg
+
+ bb.21.sw.bb95:
+ renamable %r1, dead %cpsr = tMOVi8 13, 14, %noreg
+ tB %bb.28, 14, %noreg
+
+ bb.20.sw.bb94:
+ renamable %r1, dead %cpsr = tMOVi8 9, 14, %noreg
+ tB %bb.28, 14, %noreg
+
+ bb.5.entry:
+ liveins: %r0, %r1
+
+ tCMPi8 killed renamable %r1, 39, 14, %noreg, implicit-def %cpsr
+ tB %bb.6, 14, %noreg
+
+ bb.11.entry:
+ successors: %bb.12(0x80000000), %bb.26(0x00000000)
+ liveins: %r0, %r1
+
+ tCMPi8 killed renamable %r1, 69, 14, %noreg, implicit-def %cpsr
+ tBcc %bb.26, 1, killed %cpsr
+
+ bb.12.sw.bb107:
+ successors: %bb.28(0x7fffffff), %bb.13(0x00000001)
+ liveins: %r0
+
+ renamable %r1, dead %cpsr = tMOVi8 27, 14, %noreg
+ renamable %r2, dead %cpsr = tMOVi8 0, 14, %noreg
+ tCMPi8 killed renamable %r2, 0, 14, %noreg, implicit-def %cpsr
+ tBcc %bb.28, 1, killed %cpsr
+
+ bb.13.if.then109:
+ successors:
+ liveins: %r0
+
+ %r1 = tMOVSr killed %r0, implicit-def dead %cpsr
+ tBL 14, %noreg, @foo1, csr_aapcs, implicit-def dead %lr, implicit %sp, implicit undef %r0, implicit %r1, implicit-def %sp, implicit-def dead %r0
+
+ bb.8.entry:
+ liveins: %r0, %r1
+
+ tCMPi8 killed renamable %r1, 63, 14, %noreg, implicit-def %cpsr
+
+ bb.6.entry:
+ successors: %bb.28(0x80000000), %bb.26(0x00000000)
+ liveins: %cpsr, %r0
+
+ tPUSH 14, %noreg, killed %r0, implicit-def %sp, implicit %sp
+ tPOP 14, %noreg, def %r1, implicit-def %sp, implicit %sp
+ tBcc %bb.28, 0, killed %cpsr
+
+ bb.26.sw.epilog:
+ successors:
+
+
+ bb.22.sw.bb96:
+ renamable %r1, dead %cpsr = tMOVi8 12, 14, %noreg
+
+ bb.28.cleanup:
+ liveins: %r1
+
+ %r0 = tMOVSr killed %r1, implicit-def dead %cpsr
+ tPOP_RET 14, %noreg, def %r4, def %pc, implicit-def %sp, implicit %sp, implicit %r0
+
+ bb.18.while.end88:
+ liveins: %r0
+
+ tBL 14, %noreg, @foo2, csr_aapcs, implicit-def dead %lr, implicit %sp, implicit %r0, implicit-def %sp, implicit-def dead %r0
+
+...
diff --git a/test/CodeGen/X86/GlobalISel/add-scalar.ll b/test/CodeGen/X86/GlobalISel/add-scalar.ll
index 0ef7c956d493..3d41d759409d 100644
--- a/test/CodeGen/X86/GlobalISel/add-scalar.ll
+++ b/test/CodeGen/X86/GlobalISel/add-scalar.ll
@@ -10,16 +10,10 @@ define i64 @test_add_i64(i64 %arg1, i64 %arg2) {
;
; X32-LABEL: test_add_i64:
; X32: # %bb.0:
-; X32-NEXT: pushl %ebp
-; X32-NEXT: .cfi_def_cfa_offset 8
-; X32-NEXT: .cfi_offset %ebp, -8
-; X32-NEXT: movl %esp, %ebp
-; X32-NEXT: .cfi_def_cfa_register %ebp
-; X32-NEXT: movl 16(%ebp), %eax
-; X32-NEXT: movl 20(%ebp), %edx
-; X32-NEXT: addl 8(%ebp), %eax
-; X32-NEXT: adcl 12(%ebp), %edx
-; X32-NEXT: popl %ebp
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; X32-NEXT: retl
%ret = add i64 %arg1, %arg2
ret i64 %ret
diff --git a/test/CodeGen/X86/O0-pipeline.ll b/test/CodeGen/X86/O0-pipeline.ll
index 3a720a5288a2..53707cb31380 100644
--- a/test/CodeGen/X86/O0-pipeline.ll
+++ b/test/CodeGen/X86/O0-pipeline.ll
@@ -37,6 +37,8 @@
; CHECK-NEXT: X86 PIC Global Base Reg Initialization
; CHECK-NEXT: Expand ISel Pseudo-instructions
; CHECK-NEXT: Local Stack Slot Allocation
+; CHECK-NEXT: MachineDominator Tree Construction
+; CHECK-NEXT: X86 EFLAGS copy lowering
; CHECK-NEXT: X86 WinAlloca Expander
; CHECK-NEXT: Eliminate PHI nodes for register allocation
; CHECK-NEXT: Two-Address instruction pass
diff --git a/test/CodeGen/X86/clobber-fi0.ll b/test/CodeGen/X86/clobber-fi0.ll
deleted file mode 100644
index b69b18531601..000000000000
--- a/test/CodeGen/X86/clobber-fi0.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mcpu=generic -mtriple=x86_64-linux | FileCheck %s
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.7.0"
-
-; In the code below we need to copy the EFLAGS because of scheduling constraints.
-; When copying the EFLAGS we need to write to the stack with push/pop. This forces
-; us to emit the prolog.
-
-; CHECK: main
-; CHECK: subq{{.*}}rsp
-; CHECK: ret
-define i32 @main(i32 %arg, i8** %arg1) nounwind {
-bb:
- %tmp = alloca i32, align 4 ; [#uses=3 type=i32*]
- %tmp2 = alloca i32, align 4 ; [#uses=3 type=i32*]
- %tmp3 = alloca i32 ; [#uses=1 type=i32*]
- store volatile i32 1, i32* %tmp, align 4
- store volatile i32 1, i32* %tmp2, align 4
- br label %bb4
-
-bb4: ; preds = %bb4, %bb
- %tmp6 = load volatile i32, i32* %tmp2, align 4 ; [#uses=1 type=i32]
- %tmp7 = add i32 %tmp6, -1 ; [#uses=2 type=i32]
- store volatile i32 %tmp7, i32* %tmp2, align 4
- %tmp8 = icmp eq i32 %tmp7, 0 ; [#uses=1 type=i1]
- %tmp9 = load volatile i32, i32* %tmp ; [#uses=1 type=i32]
- %tmp10 = add i32 %tmp9, -1 ; [#uses=1 type=i32]
- store volatile i32 %tmp10, i32* %tmp3
- br i1 %tmp8, label %bb11, label %bb4
-
-bb11: ; preds = %bb4
- %tmp12 = load volatile i32, i32* %tmp, align 4 ; [#uses=1 type=i32]
- ret i32 %tmp12
-}
-
-
diff --git a/test/CodeGen/X86/cmpxchg-clobber-flags.ll b/test/CodeGen/X86/cmpxchg-clobber-flags.ll
index 8d289fa9fb03..827aba78699c 100644
--- a/test/CodeGen/X86/cmpxchg-clobber-flags.ll
+++ b/test/CodeGen/X86/cmpxchg-clobber-flags.ll
@@ -1,100 +1,110 @@
-; RUN: llc -mtriple=i386-linux-gnu %s -o - | FileCheck %s -check-prefix=i386
-; RUN: llc -mtriple=i386-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=i386f
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=32-ALL,32-GOOD-RA
+; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=32-ALL,32-FAST-RA
-; RUN: llc -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s -check-prefix=x8664
-; RUN: llc -mtriple=x86_64-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=x8664
-; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sahf %s -o - | FileCheck %s -check-prefix=x8664-sahf
-; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sahf -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=x8664-sahf
-; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=corei7 %s -o - | FileCheck %s -check-prefix=x8664-sahf
-
-; TODO: Reenable verify-machineinstr once the if (!AXDead) // FIXME
-; in X86InstrInfo::copyPhysReg() is resolved.
+; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA
+; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=64-ALL,64-FAST-RA
+; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mattr=+sahf %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA-SAHF
+; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mattr=+sahf -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=64-ALL,64-FAST-RA-SAHF
+; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mcpu=corei7 %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA-SAHF
declare i32 @foo()
declare i32 @bar(i64)
-define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) {
-; i386-LABEL: test_intervening_call:
-; i386: cmpxchg8b
-; i386-NEXT: pushl %eax
-; i386-NEXT: seto %al
-; i386-NEXT: lahf
-; i386-NEXT: movl %eax, [[FLAGS:%.*]]
-; i386-NEXT: popl %eax
-; i386-NEXT: subl $8, %esp
-; i386-NEXT: pushl %edx
-; i386-NEXT: pushl %eax
-; i386-NEXT: calll bar
-; i386-NEXT: addl $16, %esp
-; i386-NEXT: movl [[FLAGS]], %eax
-; i386-NEXT: addb $127, %al
-; i386-NEXT: sahf
-; i386-NEXT: jne
-
-; In the following case we get a long chain of EFLAGS save/restore due to
-; a sequence of:
+; In the following case when using fast scheduling we get a long chain of
+; EFLAGS save/restore due to a sequence of:
; cmpxchg8b (implicit-def eflags)
; eax = copy eflags
; adjcallstackdown32
; ...
; use of eax
; During PEI the adjcallstackdown32 is replaced with the subl which
-; clobbers eflags, effectively interfering in the liveness interval.
-; Is this a case we care about? Maybe no, considering this issue
-; happens with the fast pre-regalloc scheduler enforced. A more
-; performant scheduler would move the adjcallstackdown32 out of the
-; eflags liveness interval.
-
-; i386f-LABEL: test_intervening_call:
-; i386f: cmpxchg8b
-; i386f-NEXT: pushl %eax
-; i386f-NEXT: seto %al
-; i386f-NEXT: lahf
-; i386f-NEXT: movl %eax, [[FLAGS:%.*]]
-; i386f-NEXT: popl %eax
-; i386f-NEXT: subl $8, %esp
-; i386f-NEXT: pushl %eax
-; i386f-NEXT: movl %ecx, %eax
-; i386f-NEXT: addb $127, %al
-; i386f-NEXT: sahf
-; i386f-NEXT: popl %eax
-; i386f-NEXT: pushl %eax
-; i386f-NEXT: seto %al
-; i386f-NEXT: lahf
-; i386f-NEXT: movl %eax, %esi
-; i386f-NEXT: popl %eax
-; i386f-NEXT: pushl %edx
-; i386f-NEXT: pushl %eax
-; i386f-NEXT: calll bar
-; i386f-NEXT: addl $16, %esp
-; i386f-NEXT: movl %esi, %eax
-; i386f-NEXT: addb $127, %al
-
-; x8664-LABEL: test_intervening_call:
-; x8664: cmpxchgq
-; x8664: pushfq
-; x8664-NEXT: popq [[FLAGS:%.*]]
-; x8664-NEXT: movq %rax, %rdi
-; x8664-NEXT: callq bar
-; x8664-NEXT: pushq [[FLAGS]]
-; x8664-NEXT: popfq
-; x8664-NEXT: jne
-
-; x8664-sahf-LABEL: test_intervening_call:
-; x8664-sahf: cmpxchgq
-; x8664-sahf: pushq %rax
-; x8664-sahf-NEXT: seto %al
-; x8664-sahf-NEXT: lahf
-; x8664-sahf-NEXT: movq %rax, [[FLAGS:%.*]]
-; x8664-sahf-NEXT: popq %rax
-; x8664-sahf-NEXT: movq %rax, %rdi
-; x8664-sahf-NEXT: callq bar
-; RAX is dead, no need to push and pop it.
-; x8664-sahf-NEXT: movq [[FLAGS]], %rax
-; x8664-sahf-NEXT: addb $127, %al
-; x8664-sahf-NEXT: sahf
-; x8664-sahf-NEXT: jne
-
+; clobbers eflags, effectively interfering in the liveness interval. However,
+; we then promote these copies into independent conditions in GPRs that avoids
+; repeated saving and restoring logic and can be trivially managed by the
+; register allocator.
+define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) nounwind {
+; 32-GOOD-RA-LABEL: test_intervening_call:
+; 32-GOOD-RA: # %bb.0: # %entry
+; 32-GOOD-RA-NEXT: pushl %ebx
+; 32-GOOD-RA-NEXT: pushl %esi
+; 32-GOOD-RA-NEXT: pushl %eax
+; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
+; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %edx
+; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
+; 32-GOOD-RA-NEXT: lock cmpxchg8b (%esi)
+; 32-GOOD-RA-NEXT: setne %bl
+; 32-GOOD-RA-NEXT: subl $8, %esp
+; 32-GOOD-RA-NEXT: pushl %edx
+; 32-GOOD-RA-NEXT: pushl %eax
+; 32-GOOD-RA-NEXT: calll bar
+; 32-GOOD-RA-NEXT: addl $16, %esp
+; 32-GOOD-RA-NEXT: testb %bl, %bl
+; 32-GOOD-RA-NEXT: jne .LBB0_3
+; 32-GOOD-RA-NEXT: # %bb.1: # %t
+; 32-GOOD-RA-NEXT: movl $42, %eax
+; 32-GOOD-RA-NEXT: jmp .LBB0_2
+; 32-GOOD-RA-NEXT: .LBB0_3: # %f
+; 32-GOOD-RA-NEXT: xorl %eax, %eax
+; 32-GOOD-RA-NEXT: .LBB0_2: # %t
+; 32-GOOD-RA-NEXT: xorl %edx, %edx
+; 32-GOOD-RA-NEXT: addl $4, %esp
+; 32-GOOD-RA-NEXT: popl %esi
+; 32-GOOD-RA-NEXT: popl %ebx
+; 32-GOOD-RA-NEXT: retl
+;
+; 32-FAST-RA-LABEL: test_intervening_call:
+; 32-FAST-RA: # %bb.0: # %entry
+; 32-FAST-RA-NEXT: pushl %ebx
+; 32-FAST-RA-NEXT: pushl %esi
+; 32-FAST-RA-NEXT: pushl %eax
+; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
+; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
+; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %edx
+; 32-FAST-RA-NEXT: lock cmpxchg8b (%esi)
+; 32-FAST-RA-NEXT: setne %bl
+; 32-FAST-RA-NEXT: subl $8, %esp
+; 32-FAST-RA-NEXT: pushl %edx
+; 32-FAST-RA-NEXT: pushl %eax
+; 32-FAST-RA-NEXT: calll bar
+; 32-FAST-RA-NEXT: addl $16, %esp
+; 32-FAST-RA-NEXT: testb %bl, %bl
+; 32-FAST-RA-NEXT: jne .LBB0_3
+; 32-FAST-RA-NEXT: # %bb.1: # %t
+; 32-FAST-RA-NEXT: movl $42, %eax
+; 32-FAST-RA-NEXT: jmp .LBB0_2
+; 32-FAST-RA-NEXT: .LBB0_3: # %f
+; 32-FAST-RA-NEXT: xorl %eax, %eax
+; 32-FAST-RA-NEXT: .LBB0_2: # %t
+; 32-FAST-RA-NEXT: xorl %edx, %edx
+; 32-FAST-RA-NEXT: addl $4, %esp
+; 32-FAST-RA-NEXT: popl %esi
+; 32-FAST-RA-NEXT: popl %ebx
+; 32-FAST-RA-NEXT: retl
+;
+; 64-ALL-LABEL: test_intervening_call:
+; 64-ALL: # %bb.0: # %entry
+; 64-ALL-NEXT: pushq %rbx
+; 64-ALL-NEXT: movq %rsi, %rax
+; 64-ALL-NEXT: lock cmpxchgq %rdx, (%rdi)
+; 64-ALL-NEXT: setne %bl
+; 64-ALL-NEXT: movq %rax, %rdi
+; 64-ALL-NEXT: callq bar
+; 64-ALL-NEXT: testb %bl, %bl
+; 64-ALL-NEXT: jne .LBB0_2
+; 64-ALL-NEXT: # %bb.1: # %t
+; 64-ALL-NEXT: movl $42, %eax
+; 64-ALL-NEXT: popq %rbx
+; 64-ALL-NEXT: retq
+; 64-ALL-NEXT: .LBB0_2: # %f
+; 64-ALL-NEXT: xorl %eax, %eax
+; 64-ALL-NEXT: popq %rbx
+; 64-ALL-NEXT: retq
+entry:
%cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst
%v = extractvalue { i64, i1 } %cx, 0
%p = extractvalue { i64, i1 } %cx, 1
@@ -109,23 +119,62 @@ f:
}
; Interesting in producing a clobber without any function calls.
-define i32 @test_control_flow(i32* %p, i32 %i, i32 %j) {
-; i386-LABEL: test_control_flow:
-; i386: cmpxchg
-; i386-NEXT: jne
-
-; i386f-LABEL: test_control_flow:
-; i386f: cmpxchg
-; i386f-NEXT: jne
-
-; x8664-LABEL: test_control_flow:
-; x8664: cmpxchg
-; x8664-NEXT: jne
-
-; x8664-sahf-LABEL: test_control_flow:
-; x8664-sahf: cmpxchg
-; x8664-sahf-NEXT: jne
-
+define i32 @test_control_flow(i32* %p, i32 %i, i32 %j) nounwind {
+; 32-ALL-LABEL: test_control_flow:
+; 32-ALL: # %bb.0: # %entry
+; 32-ALL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; 32-ALL-NEXT: cmpl {{[0-9]+}}(%esp), %eax
+; 32-ALL-NEXT: jle .LBB1_6
+; 32-ALL-NEXT: # %bb.1: # %loop_start
+; 32-ALL-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; 32-ALL-NEXT: .p2align 4, 0x90
+; 32-ALL-NEXT: .LBB1_2: # %while.condthread-pre-split.i
+; 32-ALL-NEXT: # =>This Loop Header: Depth=1
+; 32-ALL-NEXT: # Child Loop BB1_3 Depth 2
+; 32-ALL-NEXT: movl (%ecx), %edx
+; 32-ALL-NEXT: .p2align 4, 0x90
+; 32-ALL-NEXT: .LBB1_3: # %while.cond.i
+; 32-ALL-NEXT: # Parent Loop BB1_2 Depth=1
+; 32-ALL-NEXT: # => This Inner Loop Header: Depth=2
+; 32-ALL-NEXT: movl %edx, %eax
+; 32-ALL-NEXT: xorl %edx, %edx
+; 32-ALL-NEXT: testl %eax, %eax
+; 32-ALL-NEXT: je .LBB1_3
+; 32-ALL-NEXT: # %bb.4: # %while.body.i
+; 32-ALL-NEXT: # in Loop: Header=BB1_2 Depth=1
+; 32-ALL-NEXT: lock cmpxchgl %eax, (%ecx)
+; 32-ALL-NEXT: jne .LBB1_2
+; 32-ALL-NEXT: # %bb.5:
+; 32-ALL-NEXT: xorl %eax, %eax
+; 32-ALL-NEXT: .LBB1_6: # %cond.end
+; 32-ALL-NEXT: retl
+;
+; 64-ALL-LABEL: test_control_flow:
+; 64-ALL: # %bb.0: # %entry
+; 64-ALL-NEXT: cmpl %edx, %esi
+; 64-ALL-NEXT: jle .LBB1_5
+; 64-ALL-NEXT: .p2align 4, 0x90
+; 64-ALL-NEXT: .LBB1_1: # %while.condthread-pre-split.i
+; 64-ALL-NEXT: # =>This Loop Header: Depth=1
+; 64-ALL-NEXT: # Child Loop BB1_2 Depth 2
+; 64-ALL-NEXT: movl (%rdi), %ecx
+; 64-ALL-NEXT: .p2align 4, 0x90
+; 64-ALL-NEXT: .LBB1_2: # %while.cond.i
+; 64-ALL-NEXT: # Parent Loop BB1_1 Depth=1
+; 64-ALL-NEXT: # => This Inner Loop Header: Depth=2
+; 64-ALL-NEXT: movl %ecx, %eax
+; 64-ALL-NEXT: xorl %ecx, %ecx
+; 64-ALL-NEXT: testl %eax, %eax
+; 64-ALL-NEXT: je .LBB1_2
+; 64-ALL-NEXT: # %bb.3: # %while.body.i
+; 64-ALL-NEXT: # in Loop: Header=BB1_1 Depth=1
+; 64-ALL-NEXT: lock cmpxchgl %eax, (%rdi)
+; 64-ALL-NEXT: jne .LBB1_1
+; 64-ALL-NEXT: # %bb.4:
+; 64-ALL-NEXT: xorl %esi, %esi
+; 64-ALL-NEXT: .LBB1_5: # %cond.end
+; 64-ALL-NEXT: movl %esi, %eax
+; 64-ALL-NEXT: retq
entry:
%cmp = icmp sgt i32 %i, %j
br i1 %cmp, label %loop_start, label %cond.end
@@ -158,52 +207,68 @@ cond.end:
; This one is an interesting case because CMOV doesn't have a chain
; operand. Naive attempts to limit cmpxchg EFLAGS use are likely to fail here.
-define i32 @test_feed_cmov(i32* %addr, i32 %desired, i32 %new) {
-; i386-LABEL: test_feed_cmov:
-; i386: cmpxchgl
-; i386-NEXT: seto %al
-; i386-NEXT: lahf
-; i386-NEXT: movl %eax, [[FLAGS:%.*]]
-; i386-NEXT: calll foo
-; i386-NEXT: pushl %eax
-; i386-NEXT: movl [[FLAGS]], %eax
-; i386-NEXT: addb $127, %al
-; i386-NEXT: sahf
-; i386-NEXT: popl %eax
-
-; i386f-LABEL: test_feed_cmov:
-; i386f: cmpxchgl
-; i386f-NEXT: seto %al
-; i386f-NEXT: lahf
-; i386f-NEXT: movl %eax, [[FLAGS:%.*]]
-; i386f-NEXT: calll foo
-; i386f-NEXT: pushl %eax
-; i386f-NEXT: movl [[FLAGS]], %eax
-; i386f-NEXT: addb $127, %al
-; i386f-NEXT: sahf
-; i386f-NEXT: popl %eax
-
-; x8664-LABEL: test_feed_cmov:
-; x8664: cmpxchg
-; x8664: pushfq
-; x8664-NEXT: popq [[FLAGS:%.*]]
-; x8664-NEXT: callq foo
-; x8664-NEXT: pushq [[FLAGS]]
-; x8664-NEXT: popfq
-
-; x8664-sahf-LABEL: test_feed_cmov:
-; x8664-sahf: cmpxchgl
-; RAX is dead, do not push or pop it.
-; x8664-sahf-NEXT: seto %al
-; x8664-sahf-NEXT: lahf
-; x8664-sahf-NEXT: movq %rax, [[FLAGS:%.*]]
-; x8664-sahf-NEXT: callq foo
-; x8664-sahf-NEXT: pushq %rax
-; x8664-sahf-NEXT: movq [[FLAGS]], %rax
-; x8664-sahf-NEXT: addb $127, %al
-; x8664-sahf-NEXT: sahf
-; x8664-sahf-NEXT: popq %rax
-
+define i32 @test_feed_cmov(i32* %addr, i32 %desired, i32 %new) nounwind {
+; 32-GOOD-RA-LABEL: test_feed_cmov:
+; 32-GOOD-RA: # %bb.0: # %entry
+; 32-GOOD-RA-NEXT: pushl %ebx
+; 32-GOOD-RA-NEXT: pushl %esi
+; 32-GOOD-RA-NEXT: pushl %eax
+; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
+; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
+; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; 32-GOOD-RA-NEXT: lock cmpxchgl %esi, (%ecx)
+; 32-GOOD-RA-NEXT: sete %bl
+; 32-GOOD-RA-NEXT: calll foo
+; 32-GOOD-RA-NEXT: testb %bl, %bl
+; 32-GOOD-RA-NEXT: jne .LBB2_2
+; 32-GOOD-RA-NEXT: # %bb.1: # %entry
+; 32-GOOD-RA-NEXT: movl %eax, %esi
+; 32-GOOD-RA-NEXT: .LBB2_2: # %entry
+; 32-GOOD-RA-NEXT: movl %esi, %eax
+; 32-GOOD-RA-NEXT: addl $4, %esp
+; 32-GOOD-RA-NEXT: popl %esi
+; 32-GOOD-RA-NEXT: popl %ebx
+; 32-GOOD-RA-NEXT: retl
+;
+; 32-FAST-RA-LABEL: test_feed_cmov:
+; 32-FAST-RA: # %bb.0: # %entry
+; 32-FAST-RA-NEXT: pushl %ebx
+; 32-FAST-RA-NEXT: pushl %esi
+; 32-FAST-RA-NEXT: pushl %eax
+; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
+; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
+; 32-FAST-RA-NEXT: lock cmpxchgl %esi, (%ecx)
+; 32-FAST-RA-NEXT: sete %bl
+; 32-FAST-RA-NEXT: calll foo
+; 32-FAST-RA-NEXT: testb %bl, %bl
+; 32-FAST-RA-NEXT: jne .LBB2_2
+; 32-FAST-RA-NEXT: # %bb.1: # %entry
+; 32-FAST-RA-NEXT: movl %eax, %esi
+; 32-FAST-RA-NEXT: .LBB2_2: # %entry
+; 32-FAST-RA-NEXT: movl %esi, %eax
+; 32-FAST-RA-NEXT: addl $4, %esp
+; 32-FAST-RA-NEXT: popl %esi
+; 32-FAST-RA-NEXT: popl %ebx
+; 32-FAST-RA-NEXT: retl
+;
+; 64-ALL-LABEL: test_feed_cmov:
+; 64-ALL: # %bb.0: # %entry
+; 64-ALL-NEXT: pushq %rbp
+; 64-ALL-NEXT: pushq %rbx
+; 64-ALL-NEXT: pushq %rax
+; 64-ALL-NEXT: movl %edx, %ebx
+; 64-ALL-NEXT: movl %esi, %eax
+; 64-ALL-NEXT: lock cmpxchgl %ebx, (%rdi)
+; 64-ALL-NEXT: sete %bpl
+; 64-ALL-NEXT: callq foo
+; 64-ALL-NEXT: testb %bpl, %bpl
+; 64-ALL-NEXT: cmovnel %ebx, %eax
+; 64-ALL-NEXT: addq $8, %rsp
+; 64-ALL-NEXT: popq %rbx
+; 64-ALL-NEXT: popq %rbp
+; 64-ALL-NEXT: retq
+entry:
%res = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
%success = extractvalue { i32, i1 } %res, 1
diff --git a/test/CodeGen/X86/copy-eflags.ll b/test/CodeGen/X86/copy-eflags.ll
index d98d8a7839b1..1f44559368a7 100644
--- a/test/CodeGen/X86/copy-eflags.ll
+++ b/test/CodeGen/X86/copy-eflags.ll
@@ -1,6 +1,8 @@
-; RUN: llc -o - %s | FileCheck %s
-; This tests for the problem originally reported in http://llvm.org/PR25951
-target triple = "i686-unknown-linux-gnu"
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -o - -mtriple=i686-unknown-unknown %s | FileCheck %s --check-prefixes=ALL,X32
+; RUN: llc -o - -mtriple=x86_64-unknown-unknown %s | FileCheck %s --check-prefixes=ALL,X64
+;
+; Test patterns that require preserving and restoring flags.
@b = common global i8 0, align 1
@c = common global i32 0, align 4
@@ -8,13 +10,61 @@ target triple = "i686-unknown-linux-gnu"
@d = common global i8 0, align 1
@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
-; CHECK-LABEL: func:
-; This tests whether eax is properly saved/restored around the
-; lahf/sahf instruction sequences. We make mem op volatile to prevent
-; their reordering to avoid spills.
+declare void @external(i32)
-
-define i32 @func() {
+; A test that re-uses flags in interesting ways due to volatile accesses.
+; Specifically, the first increment's flags are reused for the branch despite
+; being clobbered by the second increment.
+define i32 @test1() nounwind {
+; X32-LABEL: test1:
+; X32: # %bb.0: # %entry
+; X32-NEXT: movb b, %cl
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: incb %al
+; X32-NEXT: movb %al, b
+; X32-NEXT: incl c
+; X32-NEXT: sete %dl
+; X32-NEXT: movb a, %ah
+; X32-NEXT: movb %ah, %ch
+; X32-NEXT: incb %ch
+; X32-NEXT: cmpb %cl, %ah
+; X32-NEXT: sete d
+; X32-NEXT: movb %ch, a
+; X32-NEXT: testb %dl, %dl
+; X32-NEXT: jne .LBB0_2
+; X32-NEXT: # %bb.1: # %if.then
+; X32-NEXT: movsbl %al, %eax
+; X32-NEXT: pushl %eax
+; X32-NEXT: calll external
+; X32-NEXT: addl $4, %esp
+; X32-NEXT: .LBB0_2: # %if.end
+; X32-NEXT: xorl %eax, %eax
+; X32-NEXT: retl
+;
+; X64-LABEL: test1:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movb {{.*}}(%rip), %dil
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: incb %al
+; X64-NEXT: movb %al, {{.*}}(%rip)
+; X64-NEXT: incl {{.*}}(%rip)
+; X64-NEXT: sete %sil
+; X64-NEXT: movb {{.*}}(%rip), %cl
+; X64-NEXT: movl %ecx, %edx
+; X64-NEXT: incb %dl
+; X64-NEXT: cmpb %dil, %cl
+; X64-NEXT: sete {{.*}}(%rip)
+; X64-NEXT: movb %dl, {{.*}}(%rip)
+; X64-NEXT: testb %sil, %sil
+; X64-NEXT: jne .LBB0_2
+; X64-NEXT: # %bb.1: # %if.then
+; X64-NEXT: pushq %rax
+; X64-NEXT: movsbl %al, %edi
+; X64-NEXT: callq external
+; X64-NEXT: addq $8, %rsp
+; X64-NEXT: .LBB0_2: # %if.end
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: retq
entry:
%bval = load i8, i8* @b
%inc = add i8 %bval, 1
@@ -25,33 +75,290 @@ entry:
%aval = load volatile i8, i8* @a
%inc2 = add i8 %aval, 1
store volatile i8 %inc2, i8* @a
-; Copy flags produced by the incb of %inc1 to a register, need to save+restore
-; eax around it. The flags will be reused by %tobool.
-; CHECK: pushl %eax
-; CHECK: seto %al
-; CHECK: lahf
-; CHECK: movl %eax, [[REG:%[a-z]+]]
-; CHECK: popl %eax
%cmp = icmp eq i8 %aval, %bval
%conv5 = zext i1 %cmp to i8
store i8 %conv5, i8* @d
%tobool = icmp eq i32 %inc1, 0
-; We restore flags with an 'addb, sahf' sequence, need to save+restore eax
-; around it.
-; CHECK: pushl %eax
-; CHECK: movl [[REG]], %eax
-; CHECK: addb $127, %al
-; CHECK: sahf
-; CHECK: popl %eax
br i1 %tobool, label %if.end, label %if.then
if.then:
%conv6 = sext i8 %inc to i32
- %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %conv6)
+ call void @external(i32 %conv6)
br label %if.end
if.end:
ret i32 0
}
-declare i32 @printf(i8* nocapture readonly, ...)
+; Preserve increment flags across a call.
+define i32 @test2(i32* %ptr) nounwind {
+; X32-LABEL: test2:
+; X32: # %bb.0: # %entry
+; X32-NEXT: pushl %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: incl (%eax)
+; X32-NEXT: setne %bl
+; X32-NEXT: pushl $42
+; X32-NEXT: calll external
+; X32-NEXT: addl $4, %esp
+; X32-NEXT: testb %bl, %bl
+; X32-NEXT: je .LBB1_1
+; X32-NEXT: # %bb.2: # %else
+; X32-NEXT: xorl %eax, %eax
+; X32-NEXT: popl %ebx
+; X32-NEXT: retl
+; X32-NEXT: .LBB1_1: # %then
+; X32-NEXT: movl $64, %eax
+; X32-NEXT: popl %ebx
+; X32-NEXT: retl
+;
+; X64-LABEL: test2:
+; X64: # %bb.0: # %entry
+; X64-NEXT: pushq %rbx
+; X64-NEXT: incl (%rdi)
+; X64-NEXT: setne %bl
+; X64-NEXT: movl $42, %edi
+; X64-NEXT: callq external
+; X64-NEXT: testb %bl, %bl
+; X64-NEXT: je .LBB1_1
+; X64-NEXT: # %bb.2: # %else
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: popq %rbx
+; X64-NEXT: retq
+; X64-NEXT: .LBB1_1: # %then
+; X64-NEXT: movl $64, %eax
+; X64-NEXT: popq %rbx
+; X64-NEXT: retq
+entry:
+ %val = load i32, i32* %ptr
+ %inc = add i32 %val, 1
+ store i32 %inc, i32* %ptr
+ %cmp = icmp eq i32 %inc, 0
+ call void @external(i32 42)
+ br i1 %cmp, label %then, label %else
+
+then:
+ ret i32 64
+
+else:
+ ret i32 0
+}
+
+declare void @external_a()
+declare void @external_b()
+
+; This lowers to a conditional tail call instead of a conditional branch. This
+; is tricky because we can only do this from a leaf function, and so we have to
+; use volatile stores similar to test1 to force the save and restore of
+; a condition without calling another function. We then set up subsequent calls
+; in tail position.
+define void @test_tail_call(i32* %ptr) nounwind optsize {
+; X32-LABEL: test_tail_call:
+; X32: # %bb.0: # %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: incl (%eax)
+; X32-NEXT: setne %al
+; X32-NEXT: incb a
+; X32-NEXT: sete d
+; X32-NEXT: testb %al, %al
+; X32-NEXT: jne external_b # TAILCALL
+; X32-NEXT: # %bb.1: # %then
+; X32-NEXT: jmp external_a # TAILCALL
+;
+; X64-LABEL: test_tail_call:
+; X64: # %bb.0: # %entry
+; X64-NEXT: incl (%rdi)
+; X64-NEXT: setne %al
+; X64-NEXT: incb {{.*}}(%rip)
+; X64-NEXT: sete {{.*}}(%rip)
+; X64-NEXT: testb %al, %al
+; X64-NEXT: jne external_b # TAILCALL
+; X64-NEXT: # %bb.1: # %then
+; X64-NEXT: jmp external_a # TAILCALL
+entry:
+ %val = load i32, i32* %ptr
+ %inc = add i32 %val, 1
+ store i32 %inc, i32* %ptr
+ %cmp = icmp eq i32 %inc, 0
+ %aval = load volatile i8, i8* @a
+ %inc2 = add i8 %aval, 1
+ store volatile i8 %inc2, i8* @a
+ %cmp2 = icmp eq i8 %inc2, 0
+ %conv5 = zext i1 %cmp2 to i8
+ store i8 %conv5, i8* @d
+ br i1 %cmp, label %then, label %else
+
+then:
+ tail call void @external_a()
+ ret void
+
+else:
+ tail call void @external_b()
+ ret void
+}
+
+; Test a function that gets special select lowering into CFG with copied EFLAGS
+; threaded across the CFG. This requires our EFLAGS copy rewriting to handle
+; cross-block rewrites in at least some narrow cases.
+define void @PR37100(i8 %arg1, i16 %arg2, i64 %arg3, i8 %arg4, i8* %ptr1, i32* %ptr2) {
+; X32-LABEL: PR37100:
+; X32: # %bb.0: # %bb
+; X32-NEXT: pushl %ebp
+; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: pushl %ebx
+; X32-NEXT: .cfi_def_cfa_offset 12
+; X32-NEXT: pushl %edi
+; X32-NEXT: .cfi_def_cfa_offset 16
+; X32-NEXT: pushl %esi
+; X32-NEXT: .cfi_def_cfa_offset 20
+; X32-NEXT: .cfi_offset %esi, -20
+; X32-NEXT: .cfi_offset %edi, -16
+; X32-NEXT: .cfi_offset %ebx, -12
+; X32-NEXT: .cfi_offset %ebp, -8
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X32-NEXT: movb {{[0-9]+}}(%esp), %ch
+; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X32-NEXT: jmp .LBB3_1
+; X32-NEXT: .p2align 4, 0x90
+; X32-NEXT: .LBB3_5: # %bb1
+; X32-NEXT: # in Loop: Header=BB3_1 Depth=1
+; X32-NEXT: xorl %eax, %eax
+; X32-NEXT: xorl %edx, %edx
+; X32-NEXT: idivl %ebp
+; X32-NEXT: .LBB3_1: # %bb1
+; X32-NEXT: # =>This Inner Loop Header: Depth=1
+; X32-NEXT: movsbl %cl, %eax
+; X32-NEXT: movl %eax, %edx
+; X32-NEXT: sarl $31, %edx
+; X32-NEXT: cmpl %eax, %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: sbbl %edx, %eax
+; X32-NEXT: setl %al
+; X32-NEXT: setl %dl
+; X32-NEXT: movzbl %dl, %ebp
+; X32-NEXT: negl %ebp
+; X32-NEXT: testb %al, %al
+; X32-NEXT: jne .LBB3_3
+; X32-NEXT: # %bb.2: # %bb1
+; X32-NEXT: # in Loop: Header=BB3_1 Depth=1
+; X32-NEXT: movb %ch, %cl
+; X32-NEXT: .LBB3_3: # %bb1
+; X32-NEXT: # in Loop: Header=BB3_1 Depth=1
+; X32-NEXT: movb %cl, (%ebx)
+; X32-NEXT: movl (%edi), %edx
+; X32-NEXT: testb %al, %al
+; X32-NEXT: jne .LBB3_5
+; X32-NEXT: # %bb.4: # %bb1
+; X32-NEXT: # in Loop: Header=BB3_1 Depth=1
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: jmp .LBB3_5
+;
+; X64-LABEL: PR37100:
+; X64: # %bb.0: # %bb
+; X64-NEXT: movq %rdx, %r10
+; X64-NEXT: jmp .LBB3_1
+; X64-NEXT: .p2align 4, 0x90
+; X64-NEXT: .LBB3_5: # %bb1
+; X64-NEXT: # in Loop: Header=BB3_1 Depth=1
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: xorl %edx, %edx
+; X64-NEXT: idivl %esi
+; X64-NEXT: .LBB3_1: # %bb1
+; X64-NEXT: # =>This Inner Loop Header: Depth=1
+; X64-NEXT: movsbq %dil, %rax
+; X64-NEXT: xorl %esi, %esi
+; X64-NEXT: cmpq %rax, %r10
+; X64-NEXT: setl %sil
+; X64-NEXT: negl %esi
+; X64-NEXT: cmpq %rax, %r10
+; X64-NEXT: jl .LBB3_3
+; X64-NEXT: # %bb.2: # %bb1
+; X64-NEXT: # in Loop: Header=BB3_1 Depth=1
+; X64-NEXT: movl %ecx, %edi
+; X64-NEXT: .LBB3_3: # %bb1
+; X64-NEXT: # in Loop: Header=BB3_1 Depth=1
+; X64-NEXT: movb %dil, (%r8)
+; X64-NEXT: jl .LBB3_5
+; X64-NEXT: # %bb.4: # %bb1
+; X64-NEXT: # in Loop: Header=BB3_1 Depth=1
+; X64-NEXT: movl (%r9), %esi
+; X64-NEXT: jmp .LBB3_5
+bb:
+ br label %bb1
+
+bb1:
+ %tmp = phi i8 [ %tmp8, %bb1 ], [ %arg1, %bb ]
+ %tmp2 = phi i16 [ %tmp12, %bb1 ], [ %arg2, %bb ]
+ %tmp3 = icmp sgt i16 %tmp2, 7
+ %tmp4 = select i1 %tmp3, i16 %tmp2, i16 7
+ %tmp5 = sext i8 %tmp to i64
+ %tmp6 = icmp slt i64 %arg3, %tmp5
+ %tmp7 = sext i1 %tmp6 to i32
+ %tmp8 = select i1 %tmp6, i8 %tmp, i8 %arg4
+ store volatile i8 %tmp8, i8* %ptr1
+ %tmp9 = load volatile i32, i32* %ptr2
+ %tmp10 = select i1 %tmp6, i32 %tmp7, i32 %tmp9
+ %tmp11 = srem i32 0, %tmp10
+ %tmp12 = trunc i32 %tmp11 to i16
+ br label %bb1
+}
+
+; Use a particular instruction pattern in order to lower to the post-RA pseudo
+; used to lower SETB into an SBB pattern in order to make sure that kind of
+; usage of a copied EFLAGS continues to work.
+define void @PR37431(i32* %arg1, i8* %arg2, i8* %arg3) {
+; X32-LABEL: PR37431:
+; X32: # %bb.0: # %entry
+; X32-NEXT: pushl %esi
+; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: .cfi_offset %esi, -8
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl (%eax), %eax
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: sarl $31, %ecx
+; X32-NEXT: cmpl %eax, %eax
+; X32-NEXT: sbbl %ecx, %eax
+; X32-NEXT: setb %al
+; X32-NEXT: sbbb %cl, %cl
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: movb %cl, (%edx)
+; X32-NEXT: movzbl %al, %eax
+; X32-NEXT: xorl %ecx, %ecx
+; X32-NEXT: subl %eax, %ecx
+; X32-NEXT: xorl %eax, %eax
+; X32-NEXT: xorl %edx, %edx
+; X32-NEXT: idivl %ecx
+; X32-NEXT: movb %dl, (%esi)
+; X32-NEXT: popl %esi
+; X32-NEXT: retl
+;
+; X64-LABEL: PR37431:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movq %rdx, %rcx
+; X64-NEXT: movslq (%rdi), %rax
+; X64-NEXT: cmpq %rax, %rax
+; X64-NEXT: sbbb %dl, %dl
+; X64-NEXT: cmpq %rax, %rax
+; X64-NEXT: movb %dl, (%rsi)
+; X64-NEXT: sbbl %esi, %esi
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: xorl %edx, %edx
+; X64-NEXT: idivl %esi
+; X64-NEXT: movb %dl, (%rcx)
+; X64-NEXT: retq
+entry:
+ %tmp = load i32, i32* %arg1
+ %tmp1 = sext i32 %tmp to i64
+ %tmp2 = icmp ugt i64 %tmp1, undef
+ %tmp3 = zext i1 %tmp2 to i8
+ %tmp4 = sub i8 0, %tmp3
+ store i8 %tmp4, i8* %arg2
+ %tmp5 = sext i8 %tmp4 to i32
+ %tmp6 = srem i32 0, %tmp5
+ %tmp7 = trunc i32 %tmp6 to i8
+ store i8 %tmp7, i8* %arg3
+ ret void
+}
diff --git a/test/CodeGen/X86/domain-reassignment-implicit-def.ll b/test/CodeGen/X86/domain-reassignment-implicit-def.ll
new file mode 100644
index 000000000000..1716b042d8ee
--- /dev/null
+++ b/test/CodeGen/X86/domain-reassignment-implicit-def.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mcpu=skylake-avx512 -mtriple=x86_64-unknown-linux-gnu %s -o - | FileCheck %s
+
+; Check that the X86 Domain Reassignment pass doesn't drop IMPLICIT_DEF nodes,
+; which would later cause crashes (e.g. in LiveVariables) - see PR37430
+define void @domain_reassignment_implicit_def(i1 %cond, i8 *%mem, float %arg) {
+; CHECK: vxorps %xmm1, %xmm1, %xmm1
+; CHECK: vcmpneqss %xmm1, %xmm0, %k0
+; CHECK: kmovb %k0, (%rsi)
+top:
+ br i1 %cond, label %L19, label %L15
+
+L15: ; preds = %top
+ %tmp47 = fcmp une float 0.000000e+00, %arg
+ %tmp48 = zext i1 %tmp47 to i8
+ br label %L21
+
+L19: ; preds = %top
+ br label %L21
+
+L21: ; preds = %L19, %L15
+ %.sroa.0.0 = phi i8 [ undef, %L19 ], [ %tmp48, %L15 ]
+ store i8 %.sroa.0.0, i8* %mem, align 1
+ ret void
+}
diff --git a/test/CodeGen/X86/domain-reassignment-test.ll b/test/CodeGen/X86/domain-reassignment-test.ll
new file mode 100644
index 000000000000..2ff5aea9606d
--- /dev/null
+++ b/test/CodeGen/X86/domain-reassignment-test.ll
@@ -0,0 +1,37 @@
+; RUN: llc -mcpu=skylake-avx512 -mtriple=x86_64-unknown-linux-gnu %s -o - | FileCheck %s
+; RUN: llc -mcpu=skylake-avx512 -mtriple=x86_64-unknown-linux-gnu %s -o - | llvm-mc -triple=x86_64-unknown-linux-gnu -mcpu=skylake-avx512
+
+; Check that the X86 domain reassignment pass doesn't introduce an illegal
+; test instruction. See PR37396
+define void @japi1_foo2_34617() {
+pass2:
+ br label %if5
+
+L174:
+ %tmp = icmp sgt <2 x i64> undef, zeroinitializer
+ %tmp1 = icmp sle <2 x i64> undef, undef
+ %tmp2 = and <2 x i1> %tmp, %tmp1
+ %tmp3 = extractelement <2 x i1> %tmp2, i32 0
+ %tmp4 = extractelement <2 x i1> %tmp2, i32 1
+ %tmp106 = and i1 %tmp4, %tmp3
+ %tmp107 = zext i1 %tmp106 to i8
+ %tmp108 = and i8 %tmp122, %tmp107
+ %tmp109 = icmp eq i8 %tmp108, 0
+; CHECK-NOT: testb {{%k[0-7]}}
+ br i1 %tmp109, label %L188, label %L190
+
+if5:
+ %b.055 = phi i8 [ 1, %pass2 ], [ %tmp122, %if5 ]
+ %tmp118 = icmp sgt i64 undef, 0
+ %tmp119 = icmp sle i64 undef, undef
+ %tmp120 = and i1 %tmp118, %tmp119
+ %tmp121 = zext i1 %tmp120 to i8
+ %tmp122 = and i8 %b.055, %tmp121
+ br i1 undef, label %L174, label %if5
+
+L188:
+ unreachable
+
+L190:
+ ret void
+}
diff --git a/test/CodeGen/X86/eflags-copy-expansion.mir b/test/CodeGen/X86/eflags-copy-expansion.mir
deleted file mode 100644
index 11d4c81b9253..000000000000
--- a/test/CodeGen/X86/eflags-copy-expansion.mir
+++ /dev/null
@@ -1,64 +0,0 @@
-# RUN: llc -run-pass postrapseudos -mtriple=i386-apple-macosx -o - %s | FileCheck %s
-
-# Verify that we correctly save and restore eax when copying eflags,
-# even when only a smaller alias of eax is used. We used to check only
-# eax and not its aliases.
-# PR27624.
-
---- |
- target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
-
- define void @foo() {
- entry:
- br label %false
- false:
- ret void
- }
-
-...
-
----
-name: foo
-tracksRegLiveness: true
-liveins:
- - { reg: '%edi' }
-body: |
- bb.0.entry:
- liveins: %edi
- NOOP implicit-def %al
-
- ; The bug was triggered only when LivePhysReg is used, which
- ; happens only when the heuristic for the liveness computation
- ; failed. The liveness computation heuristic looks at 10 instructions
- ; before and after the copy. Make sure we do not reach the definition of
- ; AL in 10 instructions, otherwise the heuristic will see that it is live.
- NOOP
- NOOP
- NOOP
- NOOP
- NOOP
- NOOP
- NOOP
- NOOP
- NOOP
- NOOP
- NOOP
- NOOP
- NOOP
- ; Save AL.
- ; CHECK: PUSH32r killed %eax
-
- ; Copy edi into EFLAGS
- ; CHECK-NEXT: %eax = MOV32rr %edi
- ; CHECK-NEXT: %al = ADD8ri %al, 127, implicit-def %eflags
- ; CHECK-NEXT: SAHF implicit-def %eflags, implicit %ah
- %eflags = COPY %edi
-
- ; Restore AL.
- ; CHECK-NEXT: %eax = POP32r
- bb.1.false:
- liveins: %al
- NOOP implicit %al
- RETQ
-
-...
diff --git a/test/CodeGen/X86/fast-isel-shift.ll b/test/CodeGen/X86/fast-isel-shift.ll
index 5d416e18260c..e9f01035b53a 100644
--- a/test/CodeGen/X86/fast-isel-shift.ll
+++ b/test/CodeGen/X86/fast-isel-shift.ll
@@ -381,3 +381,15 @@ define i64 @ashr_imm4_i64(i64 %a) {
%c = ashr i64 %a, 4
ret i64 %c
}
+
+; Make sure we don't crash on out of bounds i8 shifts.
+define i8 @PR36731(i8 %a) {
+; CHECK-LABEL: PR36731:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movb $255, %cl
+; CHECK-NEXT: shlb %cl, %dil
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: retq
+ %b = shl i8 %a, -1
+ ret i8 %b
+}
diff --git a/test/CodeGen/X86/flags-copy-lowering.mir b/test/CodeGen/X86/flags-copy-lowering.mir
new file mode 100644
index 000000000000..3d8a4ed3c734
--- /dev/null
+++ b/test/CodeGen/X86/flags-copy-lowering.mir
@@ -0,0 +1,555 @@
+# RUN: llc -run-pass x86-flags-copy-lowering -verify-machineinstrs -o - %s | FileCheck %s
+#
+# Lower various interesting copy patterns of EFLAGS without using LAHF/SAHF.
+
+--- |
+ target triple = "x86_64-unknown-unknown"
+
+ declare void @foo()
+
+ define i32 @test_branch(i64 %a, i64 %b) {
+ entry:
+ call void @foo()
+ ret i32 0
+ }
+
+ define i32 @test_branch_fallthrough(i64 %a, i64 %b) {
+ entry:
+ call void @foo()
+ ret i32 0
+ }
+
+ define void @test_setcc(i64 %a, i64 %b) {
+ entry:
+ call void @foo()
+ ret void
+ }
+
+ define void @test_cmov(i64 %a, i64 %b) {
+ entry:
+ call void @foo()
+ ret void
+ }
+
+ define void @test_adc(i64 %a, i64 %b) {
+ entry:
+ call void @foo()
+ ret void
+ }
+
+ define void @test_sbb(i64 %a, i64 %b) {
+ entry:
+ call void @foo()
+ ret void
+ }
+
+ define void @test_adcx(i64 %a, i64 %b) {
+ entry:
+ call void @foo()
+ ret void
+ }
+
+ define void @test_adox(i64 %a, i64 %b) {
+ entry:
+ call void @foo()
+ ret void
+ }
+
+ define void @test_rcl(i64 %a, i64 %b) {
+ entry:
+ call void @foo()
+ ret void
+ }
+
+ define void @test_rcr(i64 %a, i64 %b) {
+ entry:
+ call void @foo()
+ ret void
+ }
+
+ define void @test_setb_c(i64 %a, i64 %b) {
+ entry:
+ call void @foo()
+ ret void
+ }
+...
+---
+name: test_branch
+# CHECK-LABEL: name: test_branch
+liveins:
+ - { reg: '%rdi', virtual-reg: '%0' }
+ - { reg: '%rsi', virtual-reg: '%1' }
+body: |
+ bb.0:
+ successors: %bb.1, %bb.2, %bb.3
+ liveins: %rdi, %rsi
+
+ %0:gr64 = COPY %rdi
+ %1:gr64 = COPY %rsi
+ CMP64rr %0, %1, implicit-def %eflags
+ %2:gr64 = COPY %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+ ; CHECK: %[[A_REG:[^:]*]]:gr8 = SETAr implicit %eflags
+ ; CHECK-NEXT: %[[B_REG:[^:]*]]:gr8 = SETBr implicit %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+ CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+
+ %eflags = COPY %2
+ JA_1 %bb.1, implicit %eflags
+ JB_1 %bb.2, implicit %eflags
+ JMP_1 %bb.3
+ ; CHECK-NOT: %eflags =
+ ;
+ ; CHECK: TEST8rr %[[A_REG]], %[[A_REG]], implicit-def %eflags
+ ; CHECK-NEXT: JNE_1 %bb.1, implicit killed %eflags
+ ; CHECK-SAME: {{$[[:space:]]}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: {{.*$}}
+ ; CHECK-SAME: {{$[[:space:]]}}
+ ; CHECK-NEXT: TEST8rr %[[B_REG]], %[[B_REG]], implicit-def %eflags
+ ; CHECK-NEXT: JNE_1 %bb.2, implicit killed %eflags
+ ; CHECK-NEXT: JMP_1 %bb.3
+
+ bb.1:
+ %3:gr32 = MOV32ri64 42
+ %eax = COPY %3
+ RET 0, %eax
+
+ bb.2:
+ %4:gr32 = MOV32ri64 43
+ %eax = COPY %4
+ RET 0, %eax
+
+ bb.3:
+ %5:gr32 = MOV32r0 implicit-def dead %eflags
+ %eax = COPY %5
+ RET 0, %eax
+
+...
+---
+name: test_branch_fallthrough
+# CHECK-LABEL: name: test_branch_fallthrough
+liveins:
+ - { reg: '%rdi', virtual-reg: '%0' }
+ - { reg: '%rsi', virtual-reg: '%1' }
+body: |
+ bb.0:
+ successors: %bb.1, %bb.2, %bb.3
+ liveins: %rdi, %rsi
+
+ %0:gr64 = COPY %rdi
+ %1:gr64 = COPY %rsi
+ CMP64rr %0, %1, implicit-def %eflags
+ %2:gr64 = COPY %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+ ; CHECK: %[[A_REG:[^:]*]]:gr8 = SETAr implicit %eflags
+ ; CHECK-NEXT: %[[B_REG:[^:]*]]:gr8 = SETBr implicit %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+ CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+
+ %eflags = COPY %2
+ JA_1 %bb.2, implicit %eflags
+ JB_1 %bb.3, implicit %eflags
+ ; CHECK-NOT: %eflags =
+ ;
+ ; CHECK: TEST8rr %[[A_REG]], %[[A_REG]], implicit-def %eflags
+ ; CHECK-NEXT: JNE_1 %bb.2, implicit killed %eflags
+ ; CHECK-SAME: {{$[[:space:]]}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: {{.*$}}
+ ; CHECK-SAME: {{$[[:space:]]}}
+ ; CHECK-NEXT: TEST8rr %[[B_REG]], %[[B_REG]], implicit-def %eflags
+ ; CHECK-NEXT: JNE_1 %bb.3, implicit killed %eflags
+ ; CHECK-SAME: {{$[[:space:]]}}
+ ; CHECK-NEXT: bb.1:
+
+ bb.1:
+ %5:gr32 = MOV32r0 implicit-def dead %eflags
+ %eax = COPY %5
+ RET 0, %eax
+
+ bb.2:
+ %3:gr32 = MOV32ri64 42
+ %eax = COPY %3
+ RET 0, %eax
+
+ bb.3:
+ %4:gr32 = MOV32ri64 43
+ %eax = COPY %4
+ RET 0, %eax
+
+...
+---
+name: test_setcc
+# CHECK-LABEL: name: test_setcc
+liveins:
+ - { reg: '%rdi', virtual-reg: '%0' }
+ - { reg: '%rsi', virtual-reg: '%1' }
+body: |
+ bb.0:
+ liveins: %rdi, %rsi
+
+ %0:gr64 = COPY %rdi
+ %1:gr64 = COPY %rsi
+ CMP64rr %0, %1, implicit-def %eflags
+ %2:gr64 = COPY %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+ ; CHECK: %[[A_REG:[^:]*]]:gr8 = SETAr implicit %eflags
+ ; CHECK-NEXT: %[[B_REG:[^:]*]]:gr8 = SETBr implicit %eflags
+ ; CHECK-NEXT: %[[E_REG:[^:]*]]:gr8 = SETEr implicit %eflags
+ ; CHECK-NEXT: %[[NE_REG:[^:]*]]:gr8 = SETNEr implicit %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+ CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+
+ %eflags = COPY %2
+ %3:gr8 = SETAr implicit %eflags
+ %4:gr8 = SETBr implicit %eflags
+ %5:gr8 = SETEr implicit %eflags
+ SETNEm %rsp, 1, %noreg, -16, %noreg, implicit killed %eflags
+ MOV8mr %rsp, 1, %noreg, -16, %noreg, killed %3
+ MOV8mr %rsp, 1, %noreg, -16, %noreg, killed %4
+ MOV8mr %rsp, 1, %noreg, -16, %noreg, killed %5
+ ; CHECK-NOT: %eflags =
+ ; CHECK-NOT: = SET{{.*}}
+ ; CHECK: MOV8mr {{.*}}, killed %[[A_REG]]
+ ; CHECK-CHECK: MOV8mr {{.*}}, killed %[[B_REG]]
+ ; CHECK-CHECK: MOV8mr {{.*}}, killed %[[E_REG]]
+ ; CHECK-CHECK: MOV8mr {{.*}}, killed %[[NE_REG]]
+
+ RET 0
+
+...
+---
+name: test_cmov
+# CHECK-LABEL: name: test_cmov
+liveins:
+ - { reg: '%rdi', virtual-reg: '%0' }
+ - { reg: '%rsi', virtual-reg: '%1' }
+body: |
+ bb.0:
+ liveins: %rdi, %rsi
+
+ %0:gr64 = COPY %rdi
+ %1:gr64 = COPY %rsi
+ CMP64rr %0, %1, implicit-def %eflags
+ %2:gr64 = COPY %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+ ; CHECK: %[[A_REG:[^:]*]]:gr8 = SETAr implicit %eflags
+ ; CHECK-NEXT: %[[B_REG:[^:]*]]:gr8 = SETBr implicit %eflags
+ ; CHECK-NEXT: %[[E_REG:[^:]*]]:gr8 = SETEr implicit %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+ CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+
+ %eflags = COPY %2
+ %3:gr64 = CMOVA64rr %0, %1, implicit %eflags
+ %4:gr64 = CMOVB64rr %0, %1, implicit %eflags
+ %5:gr64 = CMOVE64rr %0, %1, implicit %eflags
+ %6:gr64 = CMOVNE64rr %0, %1, implicit killed %eflags
+ ; CHECK-NOT: %eflags =
+ ; CHECK: TEST8rr %[[A_REG]], %[[A_REG]], implicit-def %eflags
+ ; CHECK-NEXT: %3:gr64 = CMOVNE64rr %0, %1, implicit killed %eflags
+ ; CHECK-NEXT: TEST8rr %[[B_REG]], %[[B_REG]], implicit-def %eflags
+ ; CHECK-NEXT: %4:gr64 = CMOVNE64rr %0, %1, implicit killed %eflags
+ ; CHECK-NEXT: TEST8rr %[[E_REG]], %[[E_REG]], implicit-def %eflags
+ ; CHECK-NEXT: %5:gr64 = CMOVNE64rr %0, %1, implicit killed %eflags
+ ; CHECK-NEXT: TEST8rr %[[E_REG]], %[[E_REG]], implicit-def %eflags
+ ; CHECK-NEXT: %6:gr64 = CMOVE64rr %0, %1, implicit killed %eflags
+ MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %3
+ MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %4
+ MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %5
+ MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %6
+
+ RET 0
+
+...
+---
+name: test_adc
+# CHECK-LABEL: name: test_adc
+liveins:
+ - { reg: '%rdi', virtual-reg: '%0' }
+ - { reg: '%rsi', virtual-reg: '%1' }
+body: |
+ bb.0:
+ liveins: %rdi, %rsi
+
+ %0:gr64 = COPY %rdi
+ %1:gr64 = COPY %rsi
+ %2:gr64 = ADD64rr %0, %1, implicit-def %eflags
+ %3:gr64 = COPY %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+ ; CHECK: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+ CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+
+ %eflags = COPY %3
+ %4:gr64 = ADC64ri32 %2:gr64, 42, implicit-def %eflags, implicit %eflags
+ %5:gr64 = ADC64ri32 %4:gr64, 42, implicit-def %eflags, implicit %eflags
+ ; CHECK-NOT: %eflags =
+ ; CHECK: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def %eflags
+ ; CHECK-NEXT: %4:gr64 = ADC64ri32 %2, 42, implicit-def %eflags, implicit killed %eflags
+ ; CHECK-NEXT: %5:gr64 = ADC64ri32 %4, 42, implicit-def{{( dead)?}} %eflags, implicit{{( killed)?}} %eflags
+ MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %5
+
+ RET 0
+
+...
+---
+name: test_sbb
+# CHECK-LABEL: name: test_sbb
+liveins:
+ - { reg: '%rdi', virtual-reg: '%0' }
+ - { reg: '%rsi', virtual-reg: '%1' }
+body: |
+ bb.0:
+ liveins: %rdi, %rsi
+
+ %0:gr64 = COPY %rdi
+ %1:gr64 = COPY %rsi
+ %2:gr64 = SUB64rr %0, %1, implicit-def %eflags
+ %3:gr64 = COPY killed %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+ ; CHECK: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+ CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+
+ %eflags = COPY %3
+ %4:gr64 = SBB64ri32 %2:gr64, 42, implicit-def %eflags, implicit killed %eflags
+ %5:gr64 = SBB64ri32 %4:gr64, 42, implicit-def dead %eflags, implicit killed %eflags
+ ; CHECK-NOT: %eflags =
+ ; CHECK: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def %eflags
+ ; CHECK-NEXT: %4:gr64 = SBB64ri32 %2, 42, implicit-def %eflags, implicit killed %eflags
+ ; CHECK-NEXT: %5:gr64 = SBB64ri32 %4, 42, implicit-def{{( dead)?}} %eflags, implicit{{( killed)?}} %eflags
+ MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %5
+
+ RET 0
+
+...
+---
+name: test_adcx
+# CHECK-LABEL: name: test_adcx
+liveins:
+ - { reg: '%rdi', virtual-reg: '%0' }
+ - { reg: '%rsi', virtual-reg: '%1' }
+body: |
+ bb.0:
+ liveins: %rdi, %rsi
+
+ %0:gr64 = COPY %rdi
+ %1:gr64 = COPY %rsi
+ %2:gr64 = ADD64rr %0, %1, implicit-def %eflags
+ %3:gr64 = COPY %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+ ; CHECK: %[[E_REG:[^:]*]]:gr8 = SETEr implicit %eflags
+ ; CHECK-NEXT: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+ CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+
+ %eflags = COPY %3
+ %4:gr64 = CMOVE64rr %0, %1, implicit %eflags
+ %5:gr64 = MOV64ri32 42
+ %6:gr64 = ADCX64rr %2, %5, implicit-def %eflags, implicit %eflags
+ ; CHECK-NOT: %eflags =
+ ; CHECK: TEST8rr %[[E_REG]], %[[E_REG]], implicit-def %eflags
+ ; CHECK-NEXT: %4:gr64 = CMOVNE64rr %0, %1, implicit killed %eflags
+ ; CHECK-NEXT: %5:gr64 = MOV64ri32 42
+ ; CHECK-NEXT: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def %eflags
+ ; CHECK-NEXT: %6:gr64 = ADCX64rr %2, %5, implicit-def{{( dead)?}} %eflags, implicit killed %eflags
+ MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %4
+ MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %6
+
+ RET 0
+
+...
+---
+name: test_adox
+# CHECK-LABEL: name: test_adox
+liveins:
+ - { reg: '%rdi', virtual-reg: '%0' }
+ - { reg: '%rsi', virtual-reg: '%1' }
+body: |
+ bb.0:
+ liveins: %rdi, %rsi
+
+ %0:gr64 = COPY %rdi
+ %1:gr64 = COPY %rsi
+ %2:gr64 = ADD64rr %0, %1, implicit-def %eflags
+ %3:gr64 = COPY %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+ ; CHECK: %[[E_REG:[^:]*]]:gr8 = SETEr implicit %eflags
+ ; CHECK-NEXT: %[[OF_REG:[^:]*]]:gr8 = SETOr implicit %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+ CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+
+ %eflags = COPY %3
+ %4:gr64 = CMOVE64rr %0, %1, implicit %eflags
+ %5:gr64 = MOV64ri32 42
+ %6:gr64 = ADOX64rr %2, %5, implicit-def %eflags, implicit %eflags
+ ; CHECK-NOT: %eflags =
+ ; CHECK: TEST8rr %[[E_REG]], %[[E_REG]], implicit-def %eflags
+ ; CHECK-NEXT: %4:gr64 = CMOVNE64rr %0, %1, implicit killed %eflags
+ ; CHECK-NEXT: %5:gr64 = MOV64ri32 42
+ ; CHECK-NEXT: dead %{{[^:]*}}:gr8 = ADD8ri %[[OF_REG]], 127, implicit-def %eflags
+ ; CHECK-NEXT: %6:gr64 = ADOX64rr %2, %5, implicit-def{{( dead)?}} %eflags, implicit killed %eflags
+ MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %4
+ MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %6
+
+ RET 0
+
+...
+---
+name: test_rcl
+# CHECK-LABEL: name: test_rcl
+liveins:
+ - { reg: '%rdi', virtual-reg: '%0' }
+ - { reg: '%rsi', virtual-reg: '%1' }
+body: |
+ bb.0:
+ liveins: %rdi, %rsi
+
+ %0:gr64 = COPY %rdi
+ %1:gr64 = COPY %rsi
+ %2:gr64 = ADD64rr %0, %1, implicit-def %eflags
+ %3:gr64 = COPY %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+ ; CHECK: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+ CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+
+ %eflags = COPY %3
+ %4:gr64 = RCL64r1 %2:gr64, implicit-def %eflags, implicit %eflags
+ %5:gr64 = RCL64r1 %4:gr64, implicit-def %eflags, implicit %eflags
+ ; CHECK-NOT: %eflags =
+ ; CHECK: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def %eflags
+ ; CHECK-NEXT: %4:gr64 = RCL64r1 %2, implicit-def %eflags, implicit killed %eflags
+ ; CHECK-NEXT: %5:gr64 = RCL64r1 %4, implicit-def{{( dead)?}} %eflags, implicit{{( killed)?}} %eflags
+ MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %5
+
+ RET 0
+
+...
+---
+name: test_rcr
+# CHECK-LABEL: name: test_rcr
+liveins:
+ - { reg: '%rdi', virtual-reg: '%0' }
+ - { reg: '%rsi', virtual-reg: '%1' }
+body: |
+ bb.0:
+ liveins: %rdi, %rsi
+
+ %0:gr64 = COPY %rdi
+ %1:gr64 = COPY %rsi
+ %2:gr64 = ADD64rr %0, %1, implicit-def %eflags
+ %3:gr64 = COPY %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+ ; CHECK: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+ CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+
+ %eflags = COPY %3
+ %4:gr64 = RCR64r1 %2:gr64, implicit-def %eflags, implicit %eflags
+ %5:gr64 = RCR64r1 %4:gr64, implicit-def %eflags, implicit %eflags
+ ; CHECK-NOT: %eflags =
+ ; CHECK: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def %eflags
+ ; CHECK-NEXT: %4:gr64 = RCR64r1 %2, implicit-def %eflags, implicit killed %eflags
+ ; CHECK-NEXT: %5:gr64 = RCR64r1 %4, implicit-def{{( dead)?}} %eflags, implicit{{( killed)?}} %eflags
+ MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %5
+
+ RET 0
+
+...
+---
+name: test_setb_c
+# CHECK-LABEL: name: test_setb_c
+liveins:
+ - { reg: '%rdi', virtual-reg: '%0' }
+ - { reg: '%rsi', virtual-reg: '%1' }
+body: |
+ bb.0:
+ liveins: %rdi, %rsi
+
+ %0:gr64 = COPY %rdi
+ %1:gr64 = COPY %rsi
+ %2:gr64 = ADD64rr %0, %1, implicit-def %eflags
+ %3:gr64 = COPY %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+ ; CHECK: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+ CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+
+ %eflags = COPY %3
+ %4:gr8 = SETB_C8r implicit-def %eflags, implicit %eflags
+ MOV8mr %rsp, 1, %noreg, -16, %noreg, killed %4
+ ; CHECK-NOT: %eflags =
+ ; CHECK: %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def %eflags
+ ; CHECK-NEXT: %[[ZERO_SUBREG:[^:]*]]:gr8 = EXTRACT_SUBREG %[[ZERO]], %subreg.sub_8bit
+ ; CHECK-NEXT: %[[REPLACEMENT:[^:]*]]:gr8 = SUB8rr %[[ZERO_SUBREG]], %[[CF_REG]]
+ ; CHECK-NEXT: MOV8mr %rsp, 1, %noreg, -16, %noreg, killed %[[REPLACEMENT]]
+
+ %eflags = COPY %3
+ %5:gr16 = SETB_C16r implicit-def %eflags, implicit %eflags
+ MOV16mr %rsp, 1, %noreg, -16, %noreg, killed %5
+ ; CHECK-NOT: %eflags =
+ ; CHECK: %[[CF_EXT:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]]
+ ; CHECK-NEXT: %[[CF_TRUNC:[^:]*]]:gr16 = EXTRACT_SUBREG %[[CF_EXT]], %subreg.sub_16bit
+ ; CHECK-NEXT: %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def %eflags
+ ; CHECK-NEXT: %[[ZERO_SUBREG:[^:]*]]:gr16 = EXTRACT_SUBREG %[[ZERO]], %subreg.sub_16bit
+ ; CHECK-NEXT: %[[REPLACEMENT:[^:]*]]:gr16 = SUB16rr %[[ZERO_SUBREG]], %[[CF_TRUNC]]
+ ; CHECK-NEXT: MOV16mr %rsp, 1, %noreg, -16, %noreg, killed %[[REPLACEMENT]]
+
+ %eflags = COPY %3
+ %6:gr32 = SETB_C32r implicit-def %eflags, implicit %eflags
+ MOV32mr %rsp, 1, %noreg, -16, %noreg, killed %6
+ ; CHECK-NOT: %eflags =
+ ; CHECK: %[[CF_EXT:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]]
+ ; CHECK-NEXT: %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def %eflags
+ ; CHECK-NEXT: %[[REPLACEMENT:[^:]*]]:gr32 = SUB32rr %[[ZERO]], %[[CF_EXT]]
+ ; CHECK-NEXT: MOV32mr %rsp, 1, %noreg, -16, %noreg, killed %[[REPLACEMENT]]
+
+ %eflags = COPY %3
+ %7:gr64 = SETB_C64r implicit-def %eflags, implicit %eflags
+ MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %7
+ ; CHECK-NOT: %eflags =
+ ; CHECK: %[[CF_EXT1:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]]
+ ; CHECK-NEXT: %[[CF_EXT2:[^:]*]]:gr64 = SUBREG_TO_REG 0, %[[CF_EXT1]], %subreg.sub_32bit
+ ; CHECK-NEXT: %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def %eflags
+ ; CHECK-NEXT: %[[ZERO_EXT:[^:]*]]:gr64 = SUBREG_TO_REG 0, %[[ZERO]], %subreg.sub_32bit
+ ; CHECK-NEXT: %[[REPLACEMENT:[^:]*]]:gr64 = SUB64rr %[[ZERO_EXT]], %[[CF_EXT2]]
+ ; CHECK-NEXT: MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %[[REPLACEMENT]]
+
+ RET 0
+
+...
diff --git a/test/CodeGen/X86/ipra-reg-usage.ll b/test/CodeGen/X86/ipra-reg-usage.ll
index 50c066de9656..e6cf4c023348 100644
--- a/test/CodeGen/X86/ipra-reg-usage.ll
+++ b/test/CodeGen/X86/ipra-reg-usage.ll
@@ -3,7 +3,7 @@
target triple = "x86_64-unknown-unknown"
declare void @bar1()
define preserve_allcc void @foo()#0 {
-; CHECK: foo Clobbered Registers: %cs %ds %eflags %eip %eiz %es %fpsw %fs %gs %ip %rip %riz %ss %ssp %bnd0 %bnd1 %bnd2 %bnd3 %cr0 %cr1 %cr2 %cr3 %cr4 %cr5 %cr6 %cr7 %cr8 %cr9 %cr10 %cr11 %cr12 %cr13 %cr14 %cr15 %dr0 %dr1 %dr2 %dr3 %dr4 %dr5 %dr6 %dr7 %dr8 %dr9 %dr10 %dr11 %dr12 %dr13 %dr14 %dr15 %fp0 %fp1 %fp2 %fp3 %fp4 %fp5 %fp6 %fp7 %k0 %k1 %k2 %k3 %k4 %k5 %k6 %k7 %mm0 %mm1 %mm2 %mm3 %mm4 %mm5 %mm6 %mm7 %r11 %st0 %st1 %st2 %st3 %st4 %st5 %st6 %st7 %xmm16 %xmm17 %xmm18 %xmm19 %xmm20 %xmm21 %xmm22 %xmm23 %xmm24 %xmm25 %xmm26 %xmm27 %xmm28 %xmm29 %xmm30 %xmm31 %ymm0 %ymm1 %ymm2 %ymm3 %ymm4 %ymm5 %ymm6 %ymm7 %ymm8 %ymm9 %ymm10 %ymm11 %ymm12 %ymm13 %ymm14 %ymm15 %ymm16 %ymm17 %ymm18 %ymm19 %ymm20 %ymm21 %ymm22 %ymm23 %ymm24 %ymm25 %ymm26 %ymm27 %ymm28 %ymm29 %ymm30 %ymm31 %zmm0 %zmm1 %zmm2 %zmm3 %zmm4 %zmm5 %zmm6 %zmm7 %zmm8 %zmm9 %zmm10 %zmm11 %zmm12 %zmm13 %zmm14 %zmm15 %zmm16 %zmm17 %zmm18 %zmm19 %zmm20 %zmm21 %zmm22 %zmm23 %zmm24 %zmm25 %zmm26 %zmm27 %zmm28 %zmm29 %zmm30 %zmm31 %r11b %r11d %r11w
+; CHECK: foo Clobbered Registers: %cs %df %ds %eflags %eip %eiz %es %fpsw %fs %gs %ip %rip %riz %ss %ssp %bnd0 %bnd1 %bnd2 %bnd3 %cr0 %cr1 %cr2 %cr3 %cr4 %cr5 %cr6 %cr7 %cr8 %cr9 %cr10 %cr11 %cr12 %cr13 %cr14 %cr15 %dr0 %dr1 %dr2 %dr3 %dr4 %dr5 %dr6 %dr7 %dr8 %dr9 %dr10 %dr11 %dr12 %dr13 %dr14 %dr15 %fp0 %fp1 %fp2 %fp3 %fp4 %fp5 %fp6 %fp7 %k0 %k1 %k2 %k3 %k4 %k5 %k6 %k7 %mm0 %mm1 %mm2 %mm3 %mm4 %mm5 %mm6 %mm7 %r11 %st0 %st1 %st2 %st3 %st4 %st5 %st6 %st7 %xmm16 %xmm17 %xmm18 %xmm19 %xmm20 %xmm21 %xmm22 %xmm23 %xmm24 %xmm25 %xmm26 %xmm27 %xmm28 %xmm29 %xmm30 %xmm31 %ymm0 %ymm1 %ymm2 %ymm3 %ymm4 %ymm5 %ymm6 %ymm7 %ymm8 %ymm9 %ymm10 %ymm11 %ymm12 %ymm13 %ymm14 %ymm15 %ymm16 %ymm17 %ymm18 %ymm19 %ymm20 %ymm21 %ymm22 %ymm23 %ymm24 %ymm25 %ymm26 %ymm27 %ymm28 %ymm29 %ymm30 %ymm31 %zmm0 %zmm1 %zmm2 %zmm3 %zmm4 %zmm5 %zmm6 %zmm7 %zmm8 %zmm9 %zmm10 %zmm11 %zmm12 %zmm13 %zmm14 %zmm15 %zmm16 %zmm17 %zmm18 %zmm19 %zmm20 %zmm21 %zmm22 %zmm23 %zmm24 %zmm25 %zmm26 %zmm27 %zmm28 %zmm29 %zmm30 %zmm31 %r11b %r11d %r11w
call void @bar1()
call void @bar2()
ret void
diff --git a/test/CodeGen/X86/mul-i1024.ll b/test/CodeGen/X86/mul-i1024.ll
index 9980042a4ccc..16fb112efadb 100644
--- a/test/CodeGen/X86/mul-i1024.ll
+++ b/test/CodeGen/X86/mul-i1024.ll
@@ -6,4687 +6,4637 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
; X32-LABEL: test_1024:
; X32: # %bb.0:
; X32-NEXT: pushl %ebp
-; X32-NEXT: movl %esp, %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %edi
; X32-NEXT: pushl %esi
-; X32-NEXT: subl $996, %esp # imm = 0x3E4
-; X32-NEXT: movl 12(%ebp), %eax
-; X32-NEXT: movl 32(%eax), %eax
-; X32-NEXT: movl %eax, -188(%ebp) # 4-byte Spill
-; X32-NEXT: xorl %ecx, %ecx
-; X32-NEXT: mull %ecx
+; X32-NEXT: subl $1000, %esp # imm = 0x3E8
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl 48(%eax), %ecx
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl 32(%edx), %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: xorl %edi, %edi
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl 8(%ebp), %esi
-; X32-NEXT: movl 48(%esi), %eax
-; X32-NEXT: movl %eax, -440(%ebp) # 4-byte Spill
-; X32-NEXT: mull %ecx
-; X32-NEXT: xorl %ecx, %ecx
-; X32-NEXT: movl %edx, -140(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, -132(%ebp) # 4-byte Spill
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: adcl %edi, %edx
-; X32-NEXT: movl %edx, -884(%ebp) # 4-byte Spill
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %ebx, %ecx
+; X32-NEXT: movl %edx, %eax
+; X32-NEXT: adcl %ebp, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl 32(%esi), %eax
-; X32-NEXT: movl %eax, -416(%ebp) # 4-byte Spill
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, -400(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, -324(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: addl %ebx, %ecx
; X32-NEXT: movl %edx, %eax
-; X32-NEXT: adcl %edi, %eax
-; X32-NEXT: movl %edi, %ecx
-; X32-NEXT: movl %ecx, -204(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, -892(%ebp) # 4-byte Spill
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: adcl %ebp, %eax
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 36(%eax), %eax
-; X32-NEXT: movl %eax, -148(%ebp) # 4-byte Spill
-; X32-NEXT: xorl %edx, %edx
-; X32-NEXT: mull %edx
-; X32-NEXT: movl %edx, -236(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: movl %edi, -304(%ebp) # 4-byte Spill
-; X32-NEXT: addl %ecx, %edi
-; X32-NEXT: movl %edi, -80(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, %eax
; X32-NEXT: adcl $0, %eax
-; X32-NEXT: movl %eax, -220(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl 36(%esi), %eax
-; X32-NEXT: movl %eax, -316(%ebp) # 4-byte Spill
-; X32-NEXT: xorl %ecx, %ecx
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %ecx, -124(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, -184(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, %edx
-; X32-NEXT: movl -400(%ebp), %esi # 4-byte Reload
-; X32-NEXT: addl %esi, %edx
-; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: movl %ecx, -64(%ebp) # 4-byte Spill
-; X32-NEXT: movl -324(%ebp), %ecx # 4-byte Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: addl %edi, %ebp
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl %ebx, -100(%ebp) # 4-byte Spill
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, -656(%ebp) # 4-byte Spill
-; X32-NEXT: leal (%ebx,%edi), %eax
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: leal (%ecx,%edi), %edx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: leal (%ebx,%eax), %eax
+; X32-NEXT: leal (%ecx,%ebp), %edx
; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: movl %edx, -700(%ebp) # 4-byte Spill
-; X32-NEXT: seto %al
-; X32-NEXT: lahf
-; X32-NEXT: movl %eax, %eax
-; X32-NEXT: movl %eax, -640(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, -96(%ebp) # 4-byte Spill
-; X32-NEXT: addl %ecx, %edi
-; X32-NEXT: movl %edi, -112(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %esi, -64(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl %esi, %ebx
-; X32-NEXT: setb -160(%ebp) # 1-byte Folded Spill
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl (%eax), %eax
-; X32-NEXT: movl %eax, -168(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, %esi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl 8(%ebp), %ecx
-; X32-NEXT: movl 16(%ecx), %eax
-; X32-NEXT: movl %eax, -348(%ebp) # 4-byte Spill
-; X32-NEXT: xorl %edx, %edx
-; X32-NEXT: mull %edx
-; X32-NEXT: movl %edx, -320(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, -180(%ebp) # 4-byte Spill
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: adcl %edi, %edx
-; X32-NEXT: movl %edx, -428(%ebp) # 4-byte Spill
-; X32-NEXT: movl (%ecx), %eax
-; X32-NEXT: movl %eax, -260(%ebp) # 4-byte Spill
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X32-NEXT: movl 16(%ebp), %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %esi, %ecx
+; X32-NEXT: adcl %ebx, %edx
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl (%ebp), %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, -264(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, -136(%ebp) # 4-byte Spill
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %ebp
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %edx, %eax
-; X32-NEXT: adcl %edi, %eax
-; X32-NEXT: movl %eax, -452(%ebp) # 4-byte Spill
-; X32-NEXT: movl -132(%ebp), %eax # 4-byte Reload
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movl -140(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl %edi, %eax
-; X32-NEXT: movl %eax, -764(%ebp) # 4-byte Spill
-; X32-NEXT: movl -324(%ebp), %eax # 4-byte Reload
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movl %esi, %ecx
-; X32-NEXT: adcl %edi, %ebx
-; X32-NEXT: movl %ebx, -424(%ebp) # 4-byte Spill
-; X32-NEXT: movl %edi, %ebx
-; X32-NEXT: movl %ebx, -256(%ebp) # 4-byte Spill
-; X32-NEXT: movl -100(%ebp), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, -80(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -204(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, -220(%ebp) # 4-byte Folded Spill
-; X32-NEXT: setb -388(%ebp) # 1-byte Folded Spill
-; X32-NEXT: movl 12(%ebp), %eax
-; X32-NEXT: movl 4(%eax), %eax
-; X32-NEXT: movl %eax, -92(%ebp) # 4-byte Spill
-; X32-NEXT: xorl %edx, %edx
-; X32-NEXT: mull %edx
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ebx, %edi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ecx, -28(%ebp) # 4-byte Spill
-; X32-NEXT: addl %ecx, %edi
-; X32-NEXT: movl %edi, -16(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %esi
-; X32-NEXT: setb %bh
-; X32-NEXT: addl %eax, %esi
-; X32-NEXT: movl %esi, -76(%ebp) # 4-byte Spill
-; X32-NEXT: movzbl %bh, %eax
-; X32-NEXT: adcl %edx, %eax
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: movl %edi, -72(%ebp) # 4-byte Spill
-; X32-NEXT: movl 12(%ebp), %eax
-; X32-NEXT: movl 8(%eax), %eax
-; X32-NEXT: movl %eax, -108(%ebp) # 4-byte Spill
-; X32-NEXT: xorl %ebx, %ebx
-; X32-NEXT: mull %ebx
-; X32-NEXT: movl %eax, -104(%ebp) # 4-byte Spill
-; X32-NEXT: movl %edx, -156(%ebp) # 4-byte Spill
-; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: movl -256(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl %edx, %eax
-; X32-NEXT: addl %esi, %ecx
-; X32-NEXT: movl %ecx, -120(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %edi, %eax
-; X32-NEXT: movl %eax, -60(%ebp) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
-; X32-NEXT: movl 52(%eax), %eax
-; X32-NEXT: movl %eax, -340(%ebp) # 4-byte Spill
-; X32-NEXT: mull %ebx
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: movl -140(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: addl %ecx, %edi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl -132(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: addl %ebx, %edi
-; X32-NEXT: movl %edi, -192(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %ecx, %esi
-; X32-NEXT: movl %ecx, %edi
+; X32-NEXT: adcl %ebx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %ebx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: adcl %ebx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT: movl 4(%esi), %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: xorl %ecx, %ecx
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %ebx, %ecx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: addl %ebp, %ecx
+; X32-NEXT: movl %ebp, %esi
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebx, %edi
+; X32-NEXT: movl %ebx, %ebp
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb %cl
-; X32-NEXT: addl %eax, %esi
+; X32-NEXT: addl %eax, %edi
+; X32-NEXT: movl %edi, (%esp) # 4-byte Spill
; X32-NEXT: movzbl %cl, %eax
; X32-NEXT: adcl %edx, %eax
-; X32-NEXT: movl %eax, -216(%ebp) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
-; X32-NEXT: movl 56(%eax), %eax
-; X32-NEXT: movl %eax, -408(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl 8(%eax), %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %eax, -392(%ebp) # 4-byte Spill
-; X32-NEXT: movl %edx, -412(%ebp) # 4-byte Spill
-; X32-NEXT: movl %ebx, %ecx
-; X32-NEXT: addl %eax, %ebx
-; X32-NEXT: adcl %edx, %edi
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %eax, %esi
+; X32-NEXT: adcl %edx, %ebp
+; X32-NEXT: addl %edi, %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebx, %ebp
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X32-NEXT: movl 52(%ebp), %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: xorl %ecx, %ecx
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: addl %edi, %ebx
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: addl %esi, %ebx
-; X32-NEXT: movl %ebx, -272(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -216(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: movl %edi, -24(%ebp) # 4-byte Spill
-; X32-NEXT: addl -28(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, -68(%ebp) # 4-byte Spill
-; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl -16(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -420(%ebp) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: adcl -120(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -616(%ebp) # 4-byte Spill
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: setb %bl
+; X32-NEXT: addl %eax, %ecx
+; X32-NEXT: movzbl %bl, %ebx
+; X32-NEXT: adcl %edx, %ebx
+; X32-NEXT: movl 56(%ebp), %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: xorl %edx, %edx
+; X32-NEXT: mull %edx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %ebp
+; X32-NEXT: addl %eax, %ebp
+; X32-NEXT: adcl %edx, %edi
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebx, %edi
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edi, %eax
-; X32-NEXT: adcl -60(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -612(%ebp) # 4-byte Spill
-; X32-NEXT: movl -64(%ebp), %esi # 4-byte Reload
-; X32-NEXT: addl -184(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, -64(%ebp) # 4-byte Spill
-; X32-NEXT: movzbl -160(%ebp), %eax # 1-byte Folded Reload
-; X32-NEXT: adcl -124(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -152(%ebp) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 1-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 40(%eax), %eax
-; X32-NEXT: movl %eax, -352(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %eax, -364(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %ebx, -396(%ebp) # 4-byte Spill
-; X32-NEXT: movl -324(%ebp), %edx # 4-byte Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl %edx, %edi
; X32-NEXT: addl %eax, %edi
-; X32-NEXT: movl -400(%ebp), %ecx # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl %ebx, %ecx
; X32-NEXT: addl %esi, %edi
-; X32-NEXT: movl %edi, -44(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -152(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, -52(%ebp) # 4-byte Spill
-; X32-NEXT: addl -28(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, -32(%ebp) # 4-byte Spill
-; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl -16(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -196(%ebp) # 4-byte Spill
-; X32-NEXT: seto %al
-; X32-NEXT: lahf
-; X32-NEXT: movl %eax, %eax
-; X32-NEXT: movl %eax, -456(%ebp) # 4-byte Spill
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebp, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl %edi, %eax
-; X32-NEXT: adcl -120(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -504(%ebp) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: adcl -60(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -508(%ebp) # 4-byte Spill
-; X32-NEXT: movl 12(%ebp), %ecx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl 16(%ecx), %eax
-; X32-NEXT: movl %eax, -212(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ebx, %ebx
; X32-NEXT: mull %ebx
; X32-NEXT: movl %eax, %edi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %esi, -84(%ebp) # 4-byte Spill
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl 20(%ecx), %eax
-; X32-NEXT: movl %eax, -252(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: mull %ebx
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %esi, %ebx
+; X32-NEXT: addl %ebp, %ebx
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: addl %edi, %ebx
-; X32-NEXT: movl %ebx, -164(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebp, %ecx
; X32-NEXT: setb %bl
; X32-NEXT: addl %eax, %ecx
; X32-NEXT: movzbl %bl, %esi
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 24(%eax), %eax
-; X32-NEXT: movl %eax, -284(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: mull %edx
-; X32-NEXT: movl %eax, -308(%ebp) # 4-byte Spill
-; X32-NEXT: movl %edx, -208(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edi, %ebx
; X32-NEXT: addl %eax, %ebx
-; X32-NEXT: movl -84(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: adcl %edx, %eax
; X32-NEXT: addl %ecx, %ebx
-; X32-NEXT: movl %ebx, -40(%ebp) # 4-byte Spill
; X32-NEXT: adcl %esi, %eax
-; X32-NEXT: movl %eax, %edx
-; X32-NEXT: movl -324(%ebp), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl %edi, -116(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl -400(%ebp), %eax # 4-byte Reload
-; X32-NEXT: movl -84(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: adcl %ecx, %eax
-; X32-NEXT: movl %eax, -768(%ebp) # 4-byte Spill
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebp, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, -296(%ebp) # 4-byte Spill
-; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload
-; X32-NEXT: movl -164(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl %esi, %eax
-; X32-NEXT: movl %eax, -776(%ebp) # 4-byte Spill
-; X32-NEXT: movl -44(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl %ebx, %eax
-; X32-NEXT: movl %eax, -772(%ebp) # 4-byte Spill
-; X32-NEXT: movl -52(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: adcl %edx, %eax
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %ebx, -56(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, -780(%ebp) # 4-byte Spill
-; X32-NEXT: movl -132(%ebp), %edx # 4-byte Reload
-; X32-NEXT: movl %edx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %esi, %eax
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl -140(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl %ecx, %eax
-; X32-NEXT: movl %eax, -448(%ebp) # 4-byte Spill
-; X32-NEXT: movl %edx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %ebp, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, -332(%ebp) # 4-byte Spill
-; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl %esi, %eax
-; X32-NEXT: movl %eax, -648(%ebp) # 4-byte Spill
-; X32-NEXT: movl -272(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl -40(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -644(%ebp) # 4-byte Spill
-; X32-NEXT: movl -24(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %edx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %ebx, %eax
-; X32-NEXT: movl %eax, -572(%ebp) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
-; X32-NEXT: movl 20(%eax), %eax
-; X32-NEXT: movl %eax, -216(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %esi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X32-NEXT: movl 20(%edi), %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, %esi
-; X32-NEXT: movl -320(%ebp), %ebx # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: addl %ebx, %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: movl -180(%ebp), %edi # 4-byte Reload
-; X32-NEXT: addl %edi, %esi
-; X32-NEXT: movl %esi, -48(%ebp) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl %ebp, %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %ebx, %ecx
; X32-NEXT: setb %bl
; X32-NEXT: addl %eax, %ecx
; X32-NEXT: movzbl %bl, %esi
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl 8(%ebp), %eax
-; X32-NEXT: movl 24(%eax), %eax
-; X32-NEXT: movl %eax, -288(%ebp) # 4-byte Spill
+; X32-NEXT: movl 24(%edi), %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: mull %edx
-; X32-NEXT: movl %eax, -280(%ebp) # 4-byte Spill
-; X32-NEXT: movl %edx, -312(%ebp) # 4-byte Spill
-; X32-NEXT: movl %edi, %edx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebp, %edi
; X32-NEXT: addl %eax, %edi
-; X32-NEXT: movl -320(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: adcl -312(%ebp), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl %edx, %ebx
; X32-NEXT: addl %ecx, %edi
-; X32-NEXT: movl %edi, -36(%ebp) # 4-byte Spill
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ebx
-; X32-NEXT: movl %ebx, -20(%ebp) # 4-byte Spill
-; X32-NEXT: addl -28(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, -228(%ebp) # 4-byte Spill
-; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl -16(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -596(%ebp) # 4-byte Spill
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edi, %eax
-; X32-NEXT: adcl -120(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -464(%ebp) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: adcl -60(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -536(%ebp) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl 4(%eax), %eax
-; X32-NEXT: movl %eax, -124(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, %esi
-; X32-NEXT: movl -264(%ebp), %ecx # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl %ecx, %esi
; X32-NEXT: movl %edx, %edi
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl -136(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: addl %ebx, %esi
-; X32-NEXT: movl %esi, -276(%ebp) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl %ebp, %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %ecx, %edi
; X32-NEXT: setb %cl
; X32-NEXT: addl %eax, %edi
-; X32-NEXT: movl %edi, -584(%ebp) # 4-byte Spill
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movzbl %cl, %eax
; X32-NEXT: adcl %edx, %eax
-; X32-NEXT: movl %eax, -432(%ebp) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 8(%eax), %eax
-; X32-NEXT: movl %eax, -184(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: movl %ecx, -160(%ebp) # 4-byte Spill
-; X32-NEXT: movl %edx, -268(%ebp) # 4-byte Spill
-; X32-NEXT: movl %ebx, %esi
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebp, %esi
; X32-NEXT: movl %esi, %eax
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movl -264(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %ecx
; X32-NEXT: adcl %edx, %ecx
; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, -240(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -432(%ebp), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl %ebx, %ecx
; X32-NEXT: movl %esi, %edx
-; X32-NEXT: addl -28(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, -344(%ebp) # 4-byte Spill
-; X32-NEXT: movl -276(%ebp), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %edx
-; X32-NEXT: adcl -16(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, -404(%ebp) # 4-byte Spill
-; X32-NEXT: pushl %eax
-; X32-NEXT: seto %al
-; X32-NEXT: lahf
-; X32-NEXT: movl %eax, %edx
-; X32-NEXT: popl %eax
-; X32-NEXT: movl %edx, -736(%ebp) # 4-byte Spill
+; X32-NEXT: movl %esi, %ebx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %edx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl %eax, %edx
-; X32-NEXT: adcl -120(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, -532(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl %ecx, -172(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -60(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -592(%ebp) # 4-byte Spill
-; X32-NEXT: movl %esi, %edx
-; X32-NEXT: movl %edx, %eax
-; X32-NEXT: movl -116(%ebp), %esi # 4-byte Reload
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl -84(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: adcl %ebx, %eax
-; X32-NEXT: movl %eax, -328(%ebp) # 4-byte Spill
-; X32-NEXT: movl %edx, %eax
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movl %eax, -368(%ebp) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl %ebp, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edi, %eax
-; X32-NEXT: adcl -164(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -620(%ebp) # 4-byte Spill
-; X32-NEXT: movl -240(%ebp), %eax # 4-byte Reload
-; X32-NEXT: movl -40(%ebp), %edi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: adcl %edi, %eax
-; X32-NEXT: movl %eax, -788(%ebp) # 4-byte Spill
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: adcl -56(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -784(%ebp) # 4-byte Spill
-; X32-NEXT: movl -180(%ebp), %eax # 4-byte Reload
-; X32-NEXT: movl -100(%ebp), %edx # 4-byte Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: addl %edx, %eax
-; X32-NEXT: movl -320(%ebp), %esi # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl -204(%ebp), %ecx # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl %ecx, %eax
-; X32-NEXT: movl %eax, -804(%ebp) # 4-byte Spill
-; X32-NEXT: movl -136(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: addl %edx, %eax
-; X32-NEXT: movl -264(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %ecx, %eax
-; X32-NEXT: movl %eax, -820(%ebp) # 4-byte Spill
-; X32-NEXT: movl -180(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl -116(%ebp), %edx # 4-byte Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: addl %edx, %eax
-; X32-NEXT: adcl %ebx, %esi
-; X32-NEXT: movl %esi, -576(%ebp) # 4-byte Spill
-; X32-NEXT: addl %edx, %ecx
-; X32-NEXT: movl %ecx, -540(%ebp) # 4-byte Spill
-; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl -164(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -800(%ebp) # 4-byte Spill
-; X32-NEXT: movl -36(%ebp), %eax # 4-byte Reload
+; X32-NEXT: adcl %ebp, %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %edx, %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %edi, %eax
-; X32-NEXT: movl %eax, -796(%ebp) # 4-byte Spill
-; X32-NEXT: movl -20(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl -56(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -792(%ebp) # 4-byte Spill
-; X32-NEXT: movl -220(%ebp), %esi # 4-byte Reload
-; X32-NEXT: addl -304(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, -220(%ebp) # 4-byte Spill
-; X32-NEXT: movzbl -388(%ebp), %eax # 1-byte Folded Reload
-; X32-NEXT: adcl -236(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -376(%ebp) # 4-byte Spill
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 40(%eax), %eax
-; X32-NEXT: movl %eax, -236(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %eax, -304(%ebp) # 4-byte Spill
-; X32-NEXT: movl %edx, -128(%ebp) # 4-byte Spill
-; X32-NEXT: movl -100(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %edi
-; X32-NEXT: addl %eax, %edi
-; X32-NEXT: movl -204(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: addl %esi, %edi
-; X32-NEXT: adcl -376(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, %edx
-; X32-NEXT: movl -180(%ebp), %eax # 4-byte Reload
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, -468(%ebp) # 4-byte Spill
-; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload
-; X32-NEXT: movl -80(%ebp), %ecx # 4-byte Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %edx
+; X32-NEXT: addl %eax, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl %ecx, %ebp
+; X32-NEXT: addl %edi, %edx
+; X32-NEXT: adcl %ebx, %ebp
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl %ecx, %eax
-; X32-NEXT: movl %eax, -816(%ebp) # 4-byte Spill
-; X32-NEXT: movl -36(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl %edi, %eax
-; X32-NEXT: movl %edi, -372(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, -812(%ebp) # 4-byte Spill
-; X32-NEXT: movl -20(%ebp), %eax # 4-byte Reload
-; X32-NEXT: movl %edx, -292(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %edx, %eax
-; X32-NEXT: movl %eax, -808(%ebp) # 4-byte Spill
-; X32-NEXT: movl -136(%ebp), %eax # 4-byte Reload
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, -512(%ebp) # 4-byte Spill
-; X32-NEXT: movl -276(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %ebp, %eax
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %ecx, %eax
-; X32-NEXT: movl %eax, -676(%ebp) # 4-byte Spill
-; X32-NEXT: seto %al
-; X32-NEXT: lahf
-; X32-NEXT: movl %eax, %eax
-; X32-NEXT: movl %eax, -740(%ebp) # 4-byte Spill
-; X32-NEXT: movl -240(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl %edi, %eax
-; X32-NEXT: movl %eax, -624(%ebp) # 4-byte Spill
-; X32-NEXT: movl -172(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %edx, %eax
-; X32-NEXT: movl %eax, -628(%ebp) # 4-byte Spill
-; X32-NEXT: movl 12(%ebp), %esi
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %ebp, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movl 48(%esi), %eax
-; X32-NEXT: movl %eax, -300(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: movl %ebx, -336(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, %ebp
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl 52(%esi), %eax
-; X32-NEXT: movl %eax, -144(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, %esi
; X32-NEXT: addl %edi, %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl %ebx, %esi
-; X32-NEXT: movl %esi, -200(%ebp) # 4-byte Spill
+; X32-NEXT: addl %ebp, %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb %bl
; X32-NEXT: addl %eax, %ecx
; X32-NEXT: movzbl %bl, %esi
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 56(%eax), %eax
-; X32-NEXT: movl %eax, -244(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: mull %edx
-; X32-NEXT: movl %eax, -224(%ebp) # 4-byte Spill
-; X32-NEXT: movl %edx, -360(%ebp) # 4-byte Spill
-; X32-NEXT: movl -336(%ebp), %ebx # 4-byte Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebp, %ebx
; X32-NEXT: addl %eax, %ebx
-; X32-NEXT: movl %edi, %edx
-; X32-NEXT: movl %edx, -176(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -360(%ebp), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl %edx, %edi
; X32-NEXT: addl %ecx, %ebx
-; X32-NEXT: movl %ebx, -472(%ebp) # 4-byte Spill
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %edi
-; X32-NEXT: movl %edi, -436(%ebp) # 4-byte Spill
-; X32-NEXT: movl -136(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl -336(%ebp), %esi # 4-byte Reload
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movl -264(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl %edx, %eax
-; X32-NEXT: movl %eax, -824(%ebp) # 4-byte Spill
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movl %eax, -588(%ebp) # 4-byte Spill
-; X32-NEXT: movl -276(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl -200(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -632(%ebp) # 4-byte Spill
-; X32-NEXT: movl -240(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT: movl %edx, %eax
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %eax
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %ebx, %eax
-; X32-NEXT: movl %eax, -828(%ebp) # 4-byte Spill
-; X32-NEXT: movl -172(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %edi, %eax
-; X32-NEXT: movl %eax, -636(%ebp) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
-; X32-NEXT: movl 64(%eax), %eax
-; X32-NEXT: movl %eax, -476(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X32-NEXT: movl 64(%edi), %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %esi, -384(%ebp) # 4-byte Spill
-; X32-NEXT: movl -116(%ebp), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %ecx
-; X32-NEXT: movl %eax, %edx
-; X32-NEXT: movl %edx, -480(%ebp) # 4-byte Spill
-; X32-NEXT: addl %edx, %ecx
-; X32-NEXT: movl -84(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: adcl %esi, %eax
-; X32-NEXT: movl %eax, -920(%ebp) # 4-byte Spill
-; X32-NEXT: movl -28(%ebp), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: addl %edx, %eax
-; X32-NEXT: movl -256(%ebp), %ecx # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %ecx
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %ebx, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: adcl -384(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -932(%ebp) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
-; X32-NEXT: movl 80(%eax), %eax
-; X32-NEXT: movl %eax, -548(%ebp) # 4-byte Spill
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: adcl %edx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl 80(%edi), %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: mull %edx
-; X32-NEXT: movl %eax, -380(%ebp) # 4-byte Spill
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl -380(%ebp), %esi # 4-byte Reload
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movl %edx, -356(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl %ecx, -948(%ebp) # 4-byte Spill
-; X32-NEXT: addl %esi, %edi
+; X32-NEXT: movl %ebp, %edi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %ebp, %edi
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %ebx
-; X32-NEXT: movl %ebx, -960(%ebp) # 4-byte Spill
-; X32-NEXT: movl 12(%ebp), %ecx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %ebp, %esi
+; X32-NEXT: adcl %edx, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl 80(%ecx), %eax
-; X32-NEXT: movl %eax, -552(%ebp) # 4-byte Spill
-; X32-NEXT: xorl %ebx, %ebx
-; X32-NEXT: mull %ebx
-; X32-NEXT: movl %edx, -528(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, -524(%ebp) # 4-byte Spill
-; X32-NEXT: movl -136(%ebp), %esi # 4-byte Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: xorl %edi, %edi
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl %edx, %eax
-; X32-NEXT: movl -264(%ebp), %edi # 4-byte Reload
-; X32-NEXT: adcl %edi, %eax
-; X32-NEXT: movl %eax, -976(%ebp) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl %ebx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl 64(%ecx), %eax
-; X32-NEXT: movl %eax, -520(%ebp) # 4-byte Spill
-; X32-NEXT: mull %ebx
-; X32-NEXT: movl %eax, -500(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: addl %esi, %ecx
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %esi, -496(%ebp) # 4-byte Spill
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %esi, %ecx
-; X32-NEXT: adcl %edi, %ecx
-; X32-NEXT: movl %ecx, -992(%ebp) # 4-byte Spill
+; X32-NEXT: adcl %ebx, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: movl -180(%ebp), %edx # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: addl %edx, %ecx
; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl -320(%ebp), %ecx # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl %ecx, %eax
-; X32-NEXT: movl %eax, -1008(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, %eax
-; X32-NEXT: movl -336(%ebp), %edi # 4-byte Reload
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: adcl -176(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, -832(%ebp) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, %eax
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, -672(%ebp) # 4-byte Spill
-; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl -200(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -836(%ebp) # 4-byte Spill
-; X32-NEXT: movl -36(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl -472(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -840(%ebp) # 4-byte Spill
-; X32-NEXT: movl -20(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl -436(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -844(%ebp) # 4-byte Spill
-; X32-NEXT: movl -132(%ebp), %eax # 4-byte Reload
-; X32-NEXT: addl -100(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -680(%ebp) # 4-byte Spill
-; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl -80(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -856(%ebp) # 4-byte Spill
-; X32-NEXT: movl -272(%ebp), %eax # 4-byte Reload
-; X32-NEXT: movl -372(%ebp), %edx # 4-byte Reload
-; X32-NEXT: adcl %edx, %eax
-; X32-NEXT: movl %eax, -852(%ebp) # 4-byte Spill
-; X32-NEXT: movl -24(%ebp), %eax # 4-byte Reload
-; X32-NEXT: movl -292(%ebp), %ecx # 4-byte Reload
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl %ebp, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl %ecx, %eax
-; X32-NEXT: movl %eax, -848(%ebp) # 4-byte Spill
-; X32-NEXT: movl -44(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl -96(%ebp), %esi # 4-byte Reload
-; X32-NEXT: pushl %eax
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: addb $127, %al
-; X32-NEXT: sahf
-; X32-NEXT: popl %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: adcl %edx, %eax
-; X32-NEXT: movl %eax, -860(%ebp) # 4-byte Spill
-; X32-NEXT: movl -52(%ebp), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
+; X32-NEXT: addb $255, %al
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: adcl %ecx, %eax
-; X32-NEXT: movl %eax, -864(%ebp) # 4-byte Spill
-; X32-NEXT: movl -324(%ebp), %ecx # 4-byte Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: adcl %edx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl -400(%ebp), %eax # 4-byte Reload
-; X32-NEXT: movl -176(%ebp), %edx # 4-byte Reload
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: adcl %edx, %eax
-; X32-NEXT: movl %eax, -868(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, -684(%ebp) # 4-byte Spill
-; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl -200(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -876(%ebp) # 4-byte Spill
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %ebp, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl -472(%ebp), %ebx # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: adcl %ebx, %eax
-; X32-NEXT: movl %eax, -872(%ebp) # 4-byte Spill
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl -436(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl %esi, %eax
-; X32-NEXT: movl %eax, -880(%ebp) # 4-byte Spill
-; X32-NEXT: movl -132(%ebp), %ecx # 4-byte Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: adcl %edi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl -140(%ebp), %eax # 4-byte Reload
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %edx, %eax
-; X32-NEXT: movl %eax, -888(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, -688(%ebp) # 4-byte Spill
-; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl -200(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -900(%ebp) # 4-byte Spill
-; X32-NEXT: movl -272(%ebp), %eax # 4-byte Reload
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %ebp, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %ebx, %eax
-; X32-NEXT: movl %eax, -896(%ebp) # 4-byte Spill
-; X32-NEXT: movl -24(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl %esi, %eax
-; X32-NEXT: movl %eax, -904(%ebp) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %edi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 68(%eax), %eax
-; X32-NEXT: movl %eax, -248(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: movl -384(%ebp), %edi # 4-byte Reload
-; X32-NEXT: addl %edi, %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl %ebp, %edi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: movl -480(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: addl %ebx, %esi
-; X32-NEXT: movl %esi, -652(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
-; X32-NEXT: setb -96(%ebp) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: addl %ebx, %edi
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebp, %ecx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: movzbl -96(%ebp), %edi # 1-byte Folded Reload
-; X32-NEXT: adcl %edx, %edi
-; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
+; X32-NEXT: adcl %edx, %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 72(%eax), %eax
-; X32-NEXT: movl %eax, -516(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: mull %edx
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: movl %esi, -484(%ebp) # 4-byte Spill
-; X32-NEXT: movl %edx, -488(%ebp) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movl -384(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: adcl %edx, %ebx
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: adcl %edi, %ebx
-; X32-NEXT: movl -116(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: movl -480(%ebp), %edx # 4-byte Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebx, %edx
+; X32-NEXT: addl %eax, %ebx
+; X32-NEXT: adcl %edi, %ebp
+; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: adcl %esi, %ebp
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: addl %edx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: adcl %ebx, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: adcl %ebp, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl %edx, %ecx
-; X32-NEXT: movl %ecx, -692(%ebp) # 4-byte Spill
-; X32-NEXT: movl -164(%ebp), %esi # 4-byte Reload
-; X32-NEXT: movl -652(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: adcl %ecx, %esi
-; X32-NEXT: movl %esi, -908(%ebp) # 4-byte Spill
-; X32-NEXT: movl -40(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl %eax, %esi
-; X32-NEXT: movl %esi, -916(%ebp) # 4-byte Spill
-; X32-NEXT: movl -56(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl %ebx, %esi
-; X32-NEXT: movl %esi, -912(%ebp) # 4-byte Spill
-; X32-NEXT: movl -28(%ebp), %esi # 4-byte Reload
-; X32-NEXT: addl %edx, %esi
-; X32-NEXT: movl %esi, -696(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -16(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, -652(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -120(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -924(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -60(%ebp), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl %ebx, -928(%ebp) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl 84(%eax), %eax
-; X32-NEXT: movl %eax, -544(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, %esi
-; X32-NEXT: movl -356(%ebp), %ebx # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: addl %ebx, %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: movl -380(%ebp), %edi # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: addl %edi, %esi
-; X32-NEXT: movl %esi, -660(%ebp) # 4-byte Spill
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %ebx, %ecx
-; X32-NEXT: setb %bl
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: movzbl %bl, %esi
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 1-byte Folded Reload
+; X32-NEXT: adcl %edx, %ebp
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 88(%eax), %eax
-; X32-NEXT: movl %eax, -580(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: mull %edx
-; X32-NEXT: movl %eax, -600(%ebp) # 4-byte Spill
-; X32-NEXT: movl %edx, -604(%ebp) # 4-byte Spill
-; X32-NEXT: movl %edi, %ebx
-; X32-NEXT: addl %eax, %edi
-; X32-NEXT: movl -356(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl %edx, %eax
-; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edi, %esi
+; X32-NEXT: addl %eax, %esi
+; X32-NEXT: adcl %edx, %ebx
+; X32-NEXT: addl %ecx, %esi
+; X32-NEXT: adcl %ebp, %ebx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl %ebp, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %esi, %eax
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: movl -28(%ebp), %eax # 4-byte Reload
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, -704(%ebp) # 4-byte Spill
-; X32-NEXT: movl -16(%ebp), %edx # 4-byte Reload
-; X32-NEXT: movl -660(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: movl %edx, -940(%ebp) # 4-byte Spill
-; X32-NEXT: movl -120(%ebp), %edx # 4-byte Reload
-; X32-NEXT: adcl %edi, %edx
-; X32-NEXT: movl %edx, -944(%ebp) # 4-byte Spill
-; X32-NEXT: movl %edi, %edx
-; X32-NEXT: movl -60(%ebp), %edi # 4-byte Reload
-; X32-NEXT: adcl %esi, %edi
-; X32-NEXT: movl %edi, -936(%ebp) # 4-byte Spill
-; X32-NEXT: movl -116(%ebp), %edi # 4-byte Reload
-; X32-NEXT: addl %ebx, %edi
-; X32-NEXT: movl %edi, -708(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -164(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -660(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -40(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, -952(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -56(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, -956(%ebp) # 4-byte Spill
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT: adcl %ebx, %edx
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: addl %edi, %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 84(%eax), %eax
-; X32-NEXT: movl %eax, -460(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, %edi
-; X32-NEXT: movl -528(%ebp), %esi # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: addl %esi, %edi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl -524(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: movl %edi, -668(%ebp) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl %ebp, %edi
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: setb %bl
; X32-NEXT: addl %eax, %ecx
; X32-NEXT: movzbl %bl, %edi
; X32-NEXT: adcl %edx, %edi
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 88(%eax), %eax
-; X32-NEXT: movl %eax, -492(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: mull %edx
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: movl %esi, -556(%ebp) # 4-byte Spill
-; X32-NEXT: movl %edx, -560(%ebp) # 4-byte Spill
-; X32-NEXT: movl -524(%ebp), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %esi, %ebx
-; X32-NEXT: movl -528(%ebp), %esi # 4-byte Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebp, %ebx
+; X32-NEXT: addl %eax, %ebx
; X32-NEXT: adcl %edx, %esi
; X32-NEXT: addl %ecx, %ebx
-; X32-NEXT: movl %ebx, -732(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %edi, %esi
-; X32-NEXT: movl %esi, %edx
-; X32-NEXT: movl %edx, -728(%ebp) # 4-byte Spill
-; X32-NEXT: addl -136(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -712(%ebp) # 4-byte Spill
-; X32-NEXT: movl -668(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: adcl -276(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, -968(%ebp) # 4-byte Spill
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: adcl -240(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -964(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -172(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, -972(%ebp) # 4-byte Spill
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebp, %ecx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl 68(%eax), %eax
-; X32-NEXT: movl %eax, -444(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
-; X32-NEXT: xorl %ebx, %ebx
; X32-NEXT: movl %eax, %esi
-; X32-NEXT: movl -496(%ebp), %edi # 4-byte Reload
-; X32-NEXT: addl %edi, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: addl %ebx, %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl -500(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, -664(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
-; X32-NEXT: setb -96(%ebp) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl %ebp, %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebx, %ecx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: movzbl -96(%ebp), %esi # 1-byte Folded Reload
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 72(%eax), %eax
-; X32-NEXT: movl %eax, -388(%ebp) # 4-byte Spill
-; X32-NEXT: mull %ebx
-; X32-NEXT: movl %eax, -564(%ebp) # 4-byte Spill
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %ebx, -568(%ebp) # 4-byte Spill
-; X32-NEXT: movl -500(%ebp), %edx # 4-byte Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: xorl %edx, %edx
+; X32-NEXT: mull %edx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: addl %eax, %edi
-; X32-NEXT: movl -496(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl %ebx, %eax
-; X32-NEXT: addl %ecx, %edi
-; X32-NEXT: adcl %esi, %eax
-; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebp, %edx
+; X32-NEXT: addl %eax, %ebp
+; X32-NEXT: adcl %edi, %ebx
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: adcl %esi, %ebx
; X32-NEXT: movl %edx, %eax
-; X32-NEXT: addl -136(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -716(%ebp) # 4-byte Spill
-; X32-NEXT: movl -664(%ebp), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: adcl -276(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, -988(%ebp) # 4-byte Spill
-; X32-NEXT: movl %edi, %esi
-; X32-NEXT: adcl -240(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, -984(%ebp) # 4-byte Spill
-; X32-NEXT: movl %ecx, %esi
-; X32-NEXT: adcl -172(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, -980(%ebp) # 4-byte Spill
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl -180(%ebp), %edx # 4-byte Reload
-; X32-NEXT: addl %edx, %esi
-; X32-NEXT: movl %esi, -720(%ebp) # 4-byte Spill
-; X32-NEXT: movl -48(%ebp), %esi # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebp, %ecx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebx, %ecx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT: addl %edx, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl %esi, %eax
-; X32-NEXT: movl %eax, -664(%ebp) # 4-byte Spill
-; X32-NEXT: movl -36(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: adcl %ebx, %edi
-; X32-NEXT: movl %edi, -996(%ebp) # 4-byte Spill
-; X32-NEXT: movl -20(%ebp), %edi # 4-byte Reload
-; X32-NEXT: adcl %edi, %ecx
-; X32-NEXT: movl %ecx, -1000(%ebp) # 4-byte Spill
-; X32-NEXT: movl -524(%ebp), %ecx # 4-byte Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: adcl %edi, %ebp
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl %ebp, %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: addl %edx, %eax
-; X32-NEXT: movl -528(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl -320(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -1004(%ebp) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: addl %edx, %eax
-; X32-NEXT: movl %eax, -724(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %esi, -668(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl %ebx, -732(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl %edi, -728(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 12(%eax), %eax
-; X32-NEXT: movl %eax, -96(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, %edi
-; X32-NEXT: movl -156(%ebp), %esi # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: addl %esi, %edi
; X32-NEXT: movl %edx, %ebx
; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl -104(%ebp), %ecx # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl %ecx, %edi
-; X32-NEXT: movl %edi, -232(%ebp) # 4-byte Spill
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ebx
-; X32-NEXT: setb -88(%ebp) # 1-byte Folded Spill
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: addl %eax, %ebx
-; X32-NEXT: movzbl -88(%ebp), %eax # 1-byte Folded Reload
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %edx, %eax
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: addl %edx, %ebx
; X32-NEXT: adcl %esi, %eax
-; X32-NEXT: movl %eax, -88(%ebp) # 4-byte Spill
-; X32-NEXT: movl -28(%ebp), %edi # 4-byte Reload
-; X32-NEXT: movl -76(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: addl %edi, %ecx
-; X32-NEXT: movl -72(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl -256(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: addl %ecx, %edx
-; X32-NEXT: movl %edx, -72(%ebp) # 4-byte Spill
-; X32-NEXT: movl -232(%ebp), %edx # 4-byte Reload
-; X32-NEXT: adcl %esi, %edx
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl %edx, -76(%ebp) # 4-byte Spill
-; X32-NEXT: movl %ebx, %edx
-; X32-NEXT: adcl $0, %edx
-; X32-NEXT: movl -88(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: addl %edi, -72(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -16(%ebp), %edi # 4-byte Reload
-; X32-NEXT: adcl %edi, -76(%ebp) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl (%esp), %edi # 4-byte Reload
+; X32-NEXT: addl %ebp, %edi
+; X32-NEXT: movl %edi, (%esp) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: addl %edi, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: movl %ebx, %ecx
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: adcl $0, %eax
-; X32-NEXT: addl %edx, %ecx
-; X32-NEXT: adcl %esi, %eax
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: addl %ebp, %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl (%esp), %esi # 4-byte Reload
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT: adcl $0, %edx
+; X32-NEXT: addl %ecx, %esi
+; X32-NEXT: adcl %edi, %edx
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: setb %dl
-; X32-NEXT: addl -104(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl -232(%ebp), %eax # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, (%esp) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movzbl %dl, %edx
; X32-NEXT: adcl %ebx, %edx
-; X32-NEXT: movl %edx, -608(%ebp) # 4-byte Spill
-; X32-NEXT: adcl $0, -88(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -28(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: addl -116(%ebp), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl -164(%ebp), %edx # 4-byte Reload
-; X32-NEXT: adcl %edi, %edx
-; X32-NEXT: movl -40(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl -120(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: movl -56(%ebp), %edi # 4-byte Reload
-; X32-NEXT: adcl -60(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: addl %ecx, %ebx
-; X32-NEXT: movl %ebx, -232(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: movl %edx, -164(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -608(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, -40(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -88(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: movl %edi, -56(%ebp) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: adcl $0, %eax
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl (%esp), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebx, %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %eax, %edi
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 44(%eax), %eax
-; X32-NEXT: movl %eax, -120(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: movl %ebx, %ecx
-; X32-NEXT: movl -396(%ebp), %esi # 4-byte Reload
-; X32-NEXT: addl %esi, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: addl %esi, %ebx
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: movl -364(%ebp), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: movl %ecx, -60(%ebp) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: addl %edi, %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %edx
-; X32-NEXT: movl %esi, %ecx
-; X32-NEXT: setb -16(%ebp) # 1-byte Folded Spill
-; X32-NEXT: addl %ebx, %edx
-; X32-NEXT: movzbl -16(%ebp), %ebx # 1-byte Folded Reload
-; X32-NEXT: adcl %edi, %ebx
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl %esi, %edx
-; X32-NEXT: adcl %ecx, %ebx
-; X32-NEXT: movl -64(%ebp), %eax # 4-byte Reload
-; X32-NEXT: addl -324(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl -152(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: adcl -400(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %esi
-; X32-NEXT: movl %esi, -64(%ebp) # 4-byte Spill
-; X32-NEXT: movl -60(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl %ecx, %esi
-; X32-NEXT: movl %esi, -16(%ebp) # 4-byte Spill
+; X32-NEXT: setb %bl
+; X32-NEXT: addl %eax, %edx
+; X32-NEXT: movzbl %bl, %eax
+; X32-NEXT: adcl %ecx, %eax
+; X32-NEXT: movl %edi, %ecx
+; X32-NEXT: addl %ecx, %edx
+; X32-NEXT: adcl %esi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %edi
+; X32-NEXT: movl %edi, (%esp) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: adcl %ebx, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, %esi
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %esi, -88(%ebp) # 4-byte Spill
-; X32-NEXT: movl %ebx, %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %ecx, %edi
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl -324(%ebp), %esi # 4-byte Reload
-; X32-NEXT: addl %esi, -64(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -16(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl -112(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, -16(%ebp) # 4-byte Spill
+; X32-NEXT: addl %ebp, (%esp) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %eax
-; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl -88(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: adcl %edi, %ecx
-; X32-NEXT: setb -88(%ebp) # 1-byte Folded Spill
-; X32-NEXT: addl -364(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: adcl -60(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: movzbl -88(%ebp), %esi # 1-byte Folded Reload
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, -60(%ebp) # 4-byte Spill
; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl -324(%ebp), %edx # 4-byte Reload
-; X32-NEXT: addl -132(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, -88(%ebp) # 4-byte Spill
-; X32-NEXT: movl -192(%ebp), %edx # 4-byte Reload
-; X32-NEXT: adcl -112(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl -44(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl -272(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: movl -52(%ebp), %edi # 4-byte Reload
-; X32-NEXT: adcl -24(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: addl %eax, -88(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl %edx, -192(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -60(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, -44(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %edi
-; X32-NEXT: movl %edi, -52(%ebp) # 4-byte Spill
-; X32-NEXT: movl -64(%ebp), %eax # 4-byte Reload
-; X32-NEXT: movl -456(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: pushl %eax
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: addb $127, %al
-; X32-NEXT: sahf
-; X32-NEXT: popl %eax
-; X32-NEXT: adcl -72(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -608(%ebp) # 4-byte Spill
-; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl -76(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -760(%ebp) # 4-byte Spill
-; X32-NEXT: movl -88(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl -232(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -756(%ebp) # 4-byte Spill
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: adcl %edi, %ebx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 1-byte Folded Reload
+; X32-NEXT: adcl %edx, %ebp
+; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebx, %edx
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebp, %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addb $255, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl (%esp), %eax # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, %eax
-; X32-NEXT: adcl -164(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -752(%ebp) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %esi, %eax
-; X32-NEXT: adcl -40(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -748(%ebp) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edi, %eax
-; X32-NEXT: adcl -56(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -744(%ebp) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 12(%eax), %eax
-; X32-NEXT: movl %eax, -60(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: movl -268(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: addl %ebx, %ecx
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: addl %edi, %ebx
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: movl -160(%ebp), %edi # 4-byte Reload
-; X32-NEXT: addl %edi, %ecx
-; X32-NEXT: movl %ecx, -24(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %edx
-; X32-NEXT: setb %cl
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: addl %esi, %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edi, %edx
+; X32-NEXT: setb %bl
; X32-NEXT: addl %eax, %edx
-; X32-NEXT: movzbl %cl, %eax
-; X32-NEXT: adcl %esi, %eax
-; X32-NEXT: movl %edi, %esi
-; X32-NEXT: addl %esi, %edx
-; X32-NEXT: adcl %ebx, %eax
-; X32-NEXT: movl %eax, -112(%ebp) # 4-byte Spill
-; X32-NEXT: movl -136(%ebp), %edi # 4-byte Reload
-; X32-NEXT: movl -584(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: addl %edi, %ecx
-; X32-NEXT: movl -432(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl -264(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: addl %ecx, %esi
-; X32-NEXT: movl %esi, -432(%ebp) # 4-byte Spill
-; X32-NEXT: movl -24(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl %eax, %esi
-; X32-NEXT: movl %esi, -456(%ebp) # 4-byte Spill
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl -112(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: addl %edi, -432(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -276(%ebp), %edi # 4-byte Reload
-; X32-NEXT: adcl %edi, -456(%ebp) # 4-byte Folded Spill
+; X32-NEXT: movzbl %bl, %ebp
+; X32-NEXT: adcl %ecx, %ebp
+; X32-NEXT: movl %esi, %ecx
+; X32-NEXT: addl %ecx, %edx
+; X32-NEXT: adcl %edi, %ebp
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ecx, %edi
+; X32-NEXT: addl %eax, %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: movl %ebp, %esi
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: addl %ebx, %edi
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, %eax
-; X32-NEXT: addl %esi, %ecx
-; X32-NEXT: adcl %ebx, %eax
-; X32-NEXT: setb %bl
-; X32-NEXT: addl -160(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl -24(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movzbl %bl, %esi
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, -24(%ebp) # 4-byte Spill
-; X32-NEXT: adcl $0, -112(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -136(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: addl -180(%ebp), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl %edi, %edx
-; X32-NEXT: adcl -48(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl -240(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl -36(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: movl -172(%ebp), %edi # 4-byte Reload
-; X32-NEXT: adcl -20(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: addl %ecx, %ebx
-; X32-NEXT: movl %ebx, -584(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: movl %edx, -276(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -24(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, -240(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -112(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: movl %edi, -172(%ebp) # 4-byte Spill
-; X32-NEXT: movl -736(%ebp), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %eax
-; X32-NEXT: addb $127, %al
-; X32-NEXT: sahf
-; X32-NEXT: movl -72(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl -432(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill
-; X32-NEXT: movl -76(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl -456(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %ebx, -232(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl %edx, -164(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl %esi, -40(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl %edi, -56(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: adcl %esi, %edi
+; X32-NEXT: setb %cl
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: adcl %edx, %ecx
+; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ecx, %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebp, %edi
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addb $255, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 44(%eax), %eax
-; X32-NEXT: movl %eax, -112(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: movl -128(%ebp), %edi # 4-byte Reload
-; X32-NEXT: addl %edi, %ebx
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl -304(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: addl %ecx, %ebx
-; X32-NEXT: movl %ebx, -36(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %edi, %esi
-; X32-NEXT: setb %bl
-; X32-NEXT: addl %eax, %esi
-; X32-NEXT: movzbl %bl, %eax
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: addl %esi, %ecx
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: addl %edi, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %esi, %ebp
+; X32-NEXT: setb %cl
+; X32-NEXT: addl %eax, %ebp
+; X32-NEXT: movzbl %cl, %eax
; X32-NEXT: adcl %edx, %eax
-; X32-NEXT: movl %ecx, %edx
-; X32-NEXT: addl %edx, %esi
-; X32-NEXT: adcl %edi, %eax
-; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill
-; X32-NEXT: movl -100(%ebp), %edi # 4-byte Reload
-; X32-NEXT: movl -220(%ebp), %eax # 4-byte Reload
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl -376(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: adcl -204(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %edx
-; X32-NEXT: movl %edx, -376(%ebp) # 4-byte Spill
-; X32-NEXT: movl -36(%ebp), %edx # 4-byte Reload
+; X32-NEXT: movl %edi, %edx
+; X32-NEXT: addl %edx, %ebp
+; X32-NEXT: adcl %esi, %eax
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %edi
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl %edx, -220(%ebp) # 4-byte Spill
-; X32-NEXT: movl %esi, %edx
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebp, %edx
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill
-; X32-NEXT: movl -48(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: addl %edi, -376(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -220(%ebp), %edx # 4-byte Reload
-; X32-NEXT: adcl -80(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, -220(%ebp) # 4-byte Spill
+; X32-NEXT: movl %ebx, %edi
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %eax
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl -20(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: adcl %ebx, %ecx
+; X32-NEXT: addl %edx, %eax
+; X32-NEXT: adcl %edi, %ecx
; X32-NEXT: setb %dl
-; X32-NEXT: addl -304(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: adcl -36(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: movzbl %dl, %edx
-; X32-NEXT: adcl %esi, %edx
-; X32-NEXT: movl %edx, -36(%ebp) # 4-byte Spill
-; X32-NEXT: adcl $0, -48(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl %edi, %ebx
-; X32-NEXT: addl -336(%ebp), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl -200(%ebp), %edx # 4-byte Reload
-; X32-NEXT: adcl -80(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl -472(%ebp), %edi # 4-byte Reload
-; X32-NEXT: adcl -372(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: movl -436(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl -292(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %ebx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movzbl %dl, %eax
+; X32-NEXT: adcl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: movl %esi, %ebx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl %edx, -200(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -36(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: adcl -48(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: movl -740(%ebp), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %eax
-; X32-NEXT: addb $127, %al
-; X32-NEXT: sahf
-; X32-NEXT: movl -376(%ebp), %edx # 4-byte Reload
-; X32-NEXT: adcl %edx, -432(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -220(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: adcl %ecx, -456(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl %ebx, -584(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -200(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, -276(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl %edi, -240(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl %esi, -172(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -640(%ebp), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %eax
-; X32-NEXT: addb $127, %al
-; X32-NEXT: sahf
-; X32-NEXT: adcl -64(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, -376(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -16(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, -220(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -88(%ebp), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl %ebx, -640(%ebp) # 4-byte Spill
-; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, -200(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl -44(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: movl %edi, -472(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -52(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, -436(%ebp) # 4-byte Spill
-; X32-NEXT: movl -408(%ebp), %ecx # 4-byte Reload
+; X32-NEXT: adcl %eax, %edi
+; X32-NEXT: adcl %ebp, %esi
+; X32-NEXT: addb $255, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addb $255, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: adcl (%esp), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl -168(%ebp), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill
-; X32-NEXT: movl %edx, -16(%ebp) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
-; X32-NEXT: movl 60(%eax), %eax
-; X32-NEXT: movl %eax, -192(%ebp) # 4-byte Spill
-; X32-NEXT: mull %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl 60(%eax), %esi
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl -16(%ebp), %ebx # 4-byte Folded Reload
+; X32-NEXT: addl %ebp, %ebx
; X32-NEXT: adcl $0, %edi
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl -92(%ebp), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edi, %ebp
; X32-NEXT: setb %bl
-; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl -392(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: addl -28(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl -412(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl -256(%ebp), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movzbl %bl, %edi
+; X32-NEXT: adcl %edi, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: movl %ecx, -80(%ebp) # 4-byte Spill
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, -16(%ebp) # 4-byte Spill
-; X32-NEXT: movl -440(%ebp), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl -168(%ebp), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill
-; X32-NEXT: movl -340(%ebp), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %edi, %ebp
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
-; X32-NEXT: adcl $0, %edi
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebx, %edi
+; X32-NEXT: setb %bl
; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl -92(%ebp), %esi # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %edi, %ebp
+; X32-NEXT: movzbl %bl, %eax
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl -340(%ebp), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl -68(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: adcl -764(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: addl -48(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -36(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, -36(%ebp) # 4-byte Spill
-; X32-NEXT: adcl $0, -80(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, -16(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -440(%ebp), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl -108(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, -44(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl -44(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl -96(%ebp), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movl %eax, -52(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %edi
-; X32-NEXT: setb %cl
-; X32-NEXT: movl -340(%ebp), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: adcl %esi, %ebx
+; X32-NEXT: setb (%esp) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl -132(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: addl -104(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl -140(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl -156(%ebp), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ecx
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl -20(%ebp), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, -48(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -36(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, -52(%ebp) # 4-byte Folded Spill
+; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: addl -80(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl -16(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: setb -36(%ebp) # 1-byte Folded Spill
-; X32-NEXT: movl -408(%ebp), %eax # 4-byte Reload
-; X32-NEXT: movl -108(%ebp), %edi # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, -80(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, -16(%ebp) # 4-byte Spill
-; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl -80(%ebp), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ebp, %edi
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill
-; X32-NEXT: movl -408(%ebp), %eax # 4-byte Reload
-; X32-NEXT: movl -96(%ebp), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, -80(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -20(%ebp), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: setb -20(%ebp) # 1-byte Folded Spill
-; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl (%esp), %ebx # 4-byte Folded Reload
+; X32-NEXT: setb (%esp) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %ebp
; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl -20(%ebp), %edi # 1-byte Folded Reload
+; X32-NEXT: movzbl (%esp), %edi # 1-byte Folded Reload
; X32-NEXT: adcl %edi, %edx
-; X32-NEXT: movl -392(%ebp), %edi # 4-byte Reload
-; X32-NEXT: addl -104(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: movl -412(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: adcl -156(%ebp), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: addl %eax, %edi
; X32-NEXT: adcl %edx, %ebx
-; X32-NEXT: movl -16(%ebp), %edx # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: addl %ecx, %edx
-; X32-NEXT: movl -80(%ebp), %ecx # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: movzbl -36(%ebp), %eax # 1-byte Folded Reload
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edi
; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: addl -68(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, -16(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -420(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, -80(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -616(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: movl %edi, -88(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -612(%ebp), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl %ebx, -272(%ebp) # 4-byte Spill
-; X32-NEXT: movl -352(%ebp), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl -168(%ebp), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill
-; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl %ecx, %esi
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl -92(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movl %eax, -68(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %edi
-; X32-NEXT: setb %bl
-; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %esi, %ebp
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movzbl %bl, %ecx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl -364(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: addl -28(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl -396(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl -256(%ebp), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: movl %ecx, -20(%ebp) # 4-byte Spill
+; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, -36(%ebp) # 4-byte Spill
-; X32-NEXT: movl -416(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl -168(%ebp), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, -616(%ebp) # 4-byte Spill
-; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %esi, %ebp
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %edi, %ebx
+; X32-NEXT: addl %ecx, %ebx
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl -92(%ebp), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, -612(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb -152(%ebp) # 1-byte Folded Spill
-; X32-NEXT: movl -316(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl -152(%ebp), %ecx # 1-byte Folded Reload
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl -32(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: adcl -424(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: addl -44(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -152(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -68(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, -44(%ebp) # 4-byte Spill
-; X32-NEXT: adcl $0, -20(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, -36(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -416(%ebp), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl -108(%ebp), %ecx # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, -424(%ebp) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %esi, %edi
+; X32-NEXT: setb %bl
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %edi, %ecx
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl -96(%ebp), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movl %eax, -420(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %esi
-; X32-NEXT: setb %cl
-; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movzbl %bl, %ecx
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl -324(%ebp), %edi # 4-byte Reload
-; X32-NEXT: addl -104(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: movl -400(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: adcl -156(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %ebx
; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl -152(%ebp), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, -424(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -44(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, -420(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, %edi
+; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, %ebx
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl -20(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: adcl -36(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: setb -68(%ebp) # 1-byte Folded Spill
-; X32-NEXT: movl -352(%ebp), %eax # 4-byte Reload
-; X32-NEXT: movl -108(%ebp), %esi # 4-byte Reload
+; X32-NEXT: addl (%esp), %ebx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: setb (%esp) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill
-; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl -20(%ebp), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: movl %edx, -44(%ebp) # 4-byte Spill
-; X32-NEXT: movl -352(%ebp), %eax # 4-byte Reload
-; X32-NEXT: movl -96(%ebp), %esi # 4-byte Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -44(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: setb -44(%ebp) # 1-byte Folded Spill
-; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl -44(%ebp), %esi # 1-byte Folded Reload
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
; X32-NEXT: adcl %esi, %edx
-; X32-NEXT: movl -364(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: addl -104(%ebp), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl -396(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl -156(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %ebx
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, -44(%ebp) # 4-byte Spill
-; X32-NEXT: movl -36(%ebp), %edx # 4-byte Reload
-; X32-NEXT: addl %edi, %edx
-; X32-NEXT: movl -20(%ebp), %esi # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %ebp
+; X32-NEXT: adcl %edx, %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT: addl %ebx, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl %ecx, %esi
-; X32-NEXT: movzbl -68(%ebp), %eax # 1-byte Folded Reload
-; X32-NEXT: adcl %eax, %ebx
-; X32-NEXT: movl -44(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl $0, %eax
-; X32-NEXT: addl -32(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: adcl -196(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: adcl -504(%ebp), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl -508(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: addl -24(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, -36(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -64(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, -20(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -48(%ebp), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl %ebx, -292(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -52(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill
-; X32-NEXT: adcl $0, -16(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, -80(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, -88(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, -272(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -352(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, -52(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill
-; X32-NEXT: movl -120(%ebp), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %esi
+; X32-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %ebp
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %esi, %ecx
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl -52(%ebp), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull -252(%ebp) # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl -252(%ebp), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: setb %cl
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl -364(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: addl -116(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl -396(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl -84(%ebp), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: movl %ecx, -24(%ebp) # 4-byte Spill
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, -52(%ebp) # 4-byte Spill
-; X32-NEXT: movl -416(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, -68(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, -508(%ebp) # 4-byte Spill
-; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl -68(%ebp), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull %edi
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, -504(%ebp) # 4-byte Spill
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl -296(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: adcl -768(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: addl -48(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -372(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -64(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, -68(%ebp) # 4-byte Spill
-; X32-NEXT: adcl $0, -24(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, -52(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -416(%ebp), %esi # 4-byte Reload
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl (%esp), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl -284(%ebp), %ecx # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, -152(%ebp) # 4-byte Spill
-; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: addl %ebx, %ecx
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 28(%eax), %ebx
; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ebx
; X32-NEXT: movl %ebx, %esi
-; X32-NEXT: movl %esi, -48(%ebp) # 4-byte Spill
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, %ebx
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ebx
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT: adcl %ebp, %ebx
; X32-NEXT: setb %cl
-; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl -324(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: addl -308(%ebp), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl -400(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: adcl -208(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %ebx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %ebp
; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl -372(%ebp), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, -152(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -64(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl -68(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill
-; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl (%esp), %eax # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl -24(%ebp), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl -52(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: setb -372(%ebp) # 1-byte Folded Spill
-; X32-NEXT: movl -352(%ebp), %eax # 4-byte Reload
-; X32-NEXT: movl -284(%ebp), %esi # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, -52(%ebp) # 4-byte Spill
-; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl -24(%ebp), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: movl %edx, -68(%ebp) # 4-byte Spill
-; X32-NEXT: movl -352(%ebp), %eax # 4-byte Reload
-; X32-NEXT: movl -48(%ebp), %esi # 4-byte Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -68(%ebp), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: setb -68(%ebp) # 1-byte Folded Spill
-; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: setb %bl
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movzbl -68(%ebp), %esi # 1-byte Folded Reload
+; X32-NEXT: movzbl %bl, %esi
; X32-NEXT: adcl %esi, %edx
-; X32-NEXT: movl -364(%ebp), %edi # 4-byte Reload
-; X32-NEXT: addl -308(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: movl -396(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl -208(%ebp), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: addl %eax, %edi
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, -68(%ebp) # 4-byte Spill
-; X32-NEXT: movl -52(%ebp), %edx # 4-byte Reload
-; X32-NEXT: addl %ebx, %edx
-; X32-NEXT: movl -24(%ebp), %esi # 4-byte Reload
+; X32-NEXT: adcl %edx, %ebx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT: addl %ebp, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl %ecx, %esi
-; X32-NEXT: movzbl -372(%ebp), %eax # 1-byte Folded Reload
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edi
-; X32-NEXT: movl -68(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl $0, %eax
-; X32-NEXT: addl -296(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: adcl -776(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: adcl -772(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: adcl -780(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl -36(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: addl %ecx, -508(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -20(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: adcl %ecx, -504(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -292(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: adcl %ecx, -152(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -44(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: adcl %ecx, -64(%ebp) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, (%esp) # 4-byte Folded Spill
; X32-NEXT: adcl $0, %edx
; X32-NEXT: adcl $0, %esi
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: adcl $0, %eax
-; X32-NEXT: addl -16(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, -52(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -80(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, -24(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -88(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: movl %edi, -44(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -272(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -68(%ebp) # 4-byte Spill
-; X32-NEXT: setb -20(%ebp) # 1-byte Folded Spill
-; X32-NEXT: movl -408(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill
-; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ecx, %edi
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl -252(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: addl %ecx, %esi
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, -272(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb -16(%ebp) # 1-byte Folded Spill
-; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload
-; X32-NEXT: mull %ebx
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebx, %ecx
+; X32-NEXT: setb %bl
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %edi
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl -16(%ebp), %ecx # 1-byte Folded Reload
+; X32-NEXT: movzbl %bl, %ecx
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl -392(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: addl -116(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl -412(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl -84(%ebp), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: movl %ecx, -80(%ebp) # 4-byte Spill
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, -16(%ebp) # 4-byte Spill
-; X32-NEXT: movl -440(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %eax, -292(%ebp) # 4-byte Spill
-; X32-NEXT: movl -340(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl -252(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: mull %ebx
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, -372(%ebp) # 4-byte Spill
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb -88(%ebp) # 1-byte Folded Spill
-; X32-NEXT: movl -340(%ebp), %edi # 4-byte Reload
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %ebx
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl -88(%ebp), %ecx # 1-byte Folded Reload
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl -332(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: adcl -448(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: addl -36(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -448(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -272(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, -36(%ebp) # 4-byte Spill
-; X32-NEXT: adcl $0, -80(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, -16(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -440(%ebp), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl -284(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, -88(%ebp) # 4-byte Spill
+; X32-NEXT: mull %ebp
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %ebx, %ecx
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl -48(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movl %eax, -296(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %esi, %ebp
; X32-NEXT: setb %cl
-; X32-NEXT: movl -340(%ebp), %eax # 4-byte Reload
-; X32-NEXT: mull %ebx
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl -132(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: addl -308(%ebp), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl -140(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: adcl -208(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %ebx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %ebp
; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl -448(%ebp), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, -88(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -36(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, -296(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl -80(%ebp), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl -16(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: setb -16(%ebp) # 1-byte Folded Spill
-; X32-NEXT: movl -408(%ebp), %eax # 4-byte Reload
-; X32-NEXT: movl -284(%ebp), %esi # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, -80(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, -272(%ebp) # 4-byte Spill
-; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl -80(%ebp), %edi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: movl %edx, -80(%ebp) # 4-byte Spill
-; X32-NEXT: movl -408(%ebp), %eax # 4-byte Reload
-; X32-NEXT: movl -48(%ebp), %esi # 4-byte Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -80(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: setb -80(%ebp) # 1-byte Folded Spill
-; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movzbl -80(%ebp), %esi # 1-byte Folded Reload
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
; X32-NEXT: adcl %esi, %edx
-; X32-NEXT: movl -392(%ebp), %edi # 4-byte Reload
-; X32-NEXT: addl -308(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: movl -412(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl -208(%ebp), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: addl %eax, %edi
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, -80(%ebp) # 4-byte Spill
-; X32-NEXT: movl -272(%ebp), %edx # 4-byte Reload
-; X32-NEXT: addl %ebx, %edx
-; X32-NEXT: movl -36(%ebp), %esi # 4-byte Reload
+; X32-NEXT: adcl %edx, %ebx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT: addl %ebp, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl %ecx, %esi
-; X32-NEXT: movzbl -16(%ebp), %eax # 1-byte Folded Reload
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edi
-; X32-NEXT: movl -80(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl $0, %eax
-; X32-NEXT: addl -332(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: adcl -648(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, -36(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -644(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: movl %edi, -332(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -572(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -80(%ebp) # 4-byte Spill
-; X32-NEXT: movl -292(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: addl -52(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl -372(%ebp), %edx # 4-byte Reload
-; X32-NEXT: adcl -24(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl -88(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl -44(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: movl -296(%ebp), %edi # 4-byte Reload
-; X32-NEXT: adcl -68(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: movzbl -20(%ebp), %eax # 1-byte Folded Reload
-; X32-NEXT: adcl %eax, %ebx
-; X32-NEXT: movl %ebx, -272(%ebp) # 4-byte Spill
-; X32-NEXT: movl -36(%ebp), %eax # 4-byte Reload
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %ebp
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl $0, %eax
-; X32-NEXT: movl -332(%ebp), %ebx # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: adcl $0, -80(%ebp) # 4-byte Folded Spill
-; X32-NEXT: addl -32(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, -292(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -196(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, -372(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -608(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, -88(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -760(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: movl %edi, -296(%ebp) # 4-byte Spill
-; X32-NEXT: movl -756(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: adcl %ecx, -272(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl -752(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -748(%ebp), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl %ebx, -332(%ebp) # 4-byte Spill
-; X32-NEXT: movl -744(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, -80(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -288(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl -168(%ebp), %edi # 4-byte Reload
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: movl %eax, -52(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl 8(%ebp), %eax
-; X32-NEXT: movl 28(%eax), %eax
-; X32-NEXT: movl %eax, -16(%ebp) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl 28(%eax), %ecx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: addl %esi, %ebx
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl -92(%ebp), %esi # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edi, %ebp
; X32-NEXT: setb %bl
-; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl -280(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: addl -28(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl -256(%ebp), %esi # 4-byte Folded Reload
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movzbl %bl, %edi
+; X32-NEXT: adcl %edi, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: movl %ecx, -44(%ebp) # 4-byte Spill
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, -20(%ebp) # 4-byte Spill
-; X32-NEXT: movl -348(%ebp), %ecx # 4-byte Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl -168(%ebp), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, -32(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, -572(%ebp) # 4-byte Spill
-; X32-NEXT: movl -216(%ebp), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl -32(%ebp), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl -92(%ebp), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, -448(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl -216(%ebp), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl -228(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: adcl -428(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: addl -52(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -24(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, -52(%ebp) # 4-byte Spill
-; X32-NEXT: adcl $0, -44(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, -20(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -348(%ebp), %esi # 4-byte Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl -108(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, -196(%ebp) # 4-byte Spill
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %ecx
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl -24(%ebp), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %edi, %ebp
; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl -96(%ebp), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movl %eax, -428(%ebp) # 4-byte Spill
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %ebx, %edi
-; X32-NEXT: setb %cl
-; X32-NEXT: movl -216(%ebp), %eax # 4-byte Reload
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %esi, %ebx
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %edi, %ebp
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: adcl %esi, %ebx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl -180(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: addl -104(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl -320(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl -156(%ebp), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ecx
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl -32(%ebp), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, -196(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -52(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, -428(%ebp) # 4-byte Folded Spill
+; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: addl -44(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl -20(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: setb -52(%ebp) # 1-byte Folded Spill
-; X32-NEXT: movl -288(%ebp), %eax # 4-byte Reload
-; X32-NEXT: movl -108(%ebp), %edi # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, -44(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill
-; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl -44(%ebp), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ebp, %edi
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill
-; X32-NEXT: movl -288(%ebp), %eax # 4-byte Reload
-; X32-NEXT: movl -96(%ebp), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -24(%ebp), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: setb -24(%ebp) # 1-byte Folded Spill
-; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %ebp
; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl -24(%ebp), %edi # 1-byte Folded Reload
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
; X32-NEXT: adcl %edi, %edx
-; X32-NEXT: movl -280(%ebp), %edi # 4-byte Reload
-; X32-NEXT: addl -104(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: movl -312(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: adcl -156(%ebp), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: addl %eax, %edi
; X32-NEXT: adcl %edx, %ebx
-; X32-NEXT: movl -20(%ebp), %edx # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: addl %ecx, %edx
-; X32-NEXT: movl -44(%ebp), %ecx # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: movzbl -52(%ebp), %eax # 1-byte Folded Reload
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edi
; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: addl -228(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -596(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, -44(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -464(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: movl %edi, -464(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -536(%ebp), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl %ebx, -68(%ebp) # 4-byte Spill
-; X32-NEXT: movl -184(%ebp), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl -168(%ebp), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill
-; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl %ecx, %esi
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl -92(%ebp), %ecx # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movl %eax, -228(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %edi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %esi, %ebx
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edi, %esi
; X32-NEXT: setb %bl
-; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl -160(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: addl -28(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl -268(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl -256(%ebp), %esi # 4-byte Folded Reload
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movzbl %bl, %esi
+; X32-NEXT: adcl %esi, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: movl %ecx, -24(%ebp) # 4-byte Spill
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, -52(%ebp) # 4-byte Spill
-; X32-NEXT: movl -260(%ebp), %edi # 4-byte Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %esi, %ebx
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl -168(%ebp), %esi # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %eax, -648(%ebp) # 4-byte Spill
-; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl -92(%ebp), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, -644(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb -536(%ebp) # 1-byte Folded Spill
-; X32-NEXT: movl -124(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl -536(%ebp), %ecx # 1-byte Folded Reload
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl -344(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: adcl -452(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: addl -32(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -452(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -228(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, -32(%ebp) # 4-byte Spill
-; X32-NEXT: adcl $0, -24(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, -52(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -260(%ebp), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl -108(%ebp), %ecx # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, -536(%ebp) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %esi, %edi
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %edi, %ecx
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl -96(%ebp), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movl %eax, -596(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %esi
-; X32-NEXT: setb %cl
-; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl -136(%ebp), %edi # 4-byte Reload
-; X32-NEXT: addl -104(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: movl -264(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: adcl -156(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %edi
-; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl -452(%ebp), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, -536(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -32(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, -596(%ebp) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %ebp
+; X32-NEXT: adcl %edx, %edi
+; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl -24(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: adcl -52(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: setb -228(%ebp) # 1-byte Folded Spill
-; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload
-; X32-NEXT: movl -108(%ebp), %esi # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, -52(%ebp) # 4-byte Spill
-; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl -24(%ebp), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: movl %edx, -32(%ebp) # 4-byte Spill
-; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload
-; X32-NEXT: movl -96(%ebp), %esi # 4-byte Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -32(%ebp), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: setb -32(%ebp) # 1-byte Folded Spill
-; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload
+; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: setb %cl
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl -32(%ebp), %esi # 1-byte Folded Reload
+; X32-NEXT: movzbl %cl, %esi
; X32-NEXT: adcl %esi, %edx
-; X32-NEXT: movl -160(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: addl -104(%ebp), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl -268(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl -156(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %ebx
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, -32(%ebp) # 4-byte Spill
-; X32-NEXT: movl -52(%ebp), %edx # 4-byte Reload
-; X32-NEXT: addl %edi, %edx
-; X32-NEXT: movl -24(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl %ecx, %esi
-; X32-NEXT: movzbl -228(%ebp), %eax # 1-byte Folded Reload
-; X32-NEXT: adcl %eax, %ebx
-; X32-NEXT: movl -32(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl $0, %eax
-; X32-NEXT: addl -344(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: adcl -404(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: adcl -532(%ebp), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl -592(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: addl -572(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, -52(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -448(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, -24(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -196(%ebp), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl %ebx, -572(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -428(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill
-; X32-NEXT: adcl $0, -20(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, -44(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, -464(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, -68(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -184(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, -228(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, -428(%ebp) # 4-byte Spill
-; X32-NEXT: movl -60(%ebp), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %esi
+; X32-NEXT: adcl %edx, %ebx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: addl %ebp, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT: adcl %edi, %edx
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %esi
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %esi, %ecx
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl -228(%ebp), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull -252(%ebp) # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, -452(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl -252(%ebp), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: setb %cl
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %ebp
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl -160(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: addl -116(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl -268(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl -84(%ebp), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: movl %ecx, -196(%ebp) # 4-byte Spill
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, -228(%ebp) # 4-byte Spill
-; X32-NEXT: movl -260(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, -532(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, -592(%ebp) # 4-byte Spill
-; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl -532(%ebp), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull %edi
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, -532(%ebp) # 4-byte Spill
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl -124(%ebp), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl -368(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: adcl -328(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: addl -428(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -448(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -452(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, -328(%ebp) # 4-byte Spill
-; X32-NEXT: adcl $0, -196(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, -228(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -260(%ebp), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl -284(%ebp), %ecx # 4-byte Reload
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl %ebp, %esi
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, -428(%ebp) # 4-byte Spill
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %ebx, %ecx
+; X32-NEXT: addl %ebp, %ecx
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl -48(%ebp), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movl %eax, -452(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ebx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %esi, %ebp
; X32-NEXT: setb %cl
-; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %ebx
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl -136(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: addl -308(%ebp), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl -264(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: adcl -208(%ebp), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ebx
; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl -448(%ebp), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, -428(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -328(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, -452(%ebp) # 4-byte Folded Spill
+; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, %ebx
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl -196(%ebp), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl -228(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: setb -448(%ebp) # 1-byte Folded Spill
-; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload
-; X32-NEXT: movl -284(%ebp), %esi # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, -196(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, -228(%ebp) # 4-byte Spill
-; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl -196(%ebp), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: movl %edx, -328(%ebp) # 4-byte Spill
-; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload
-; X32-NEXT: movl -48(%ebp), %esi # 4-byte Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, -196(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -328(%ebp), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: setb -328(%ebp) # 1-byte Folded Spill
-; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movzbl -328(%ebp), %esi # 1-byte Folded Reload
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
; X32-NEXT: adcl %esi, %edx
-; X32-NEXT: movl -160(%ebp), %edi # 4-byte Reload
-; X32-NEXT: addl -308(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: movl -268(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl -208(%ebp), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: addl %eax, %edi
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, -328(%ebp) # 4-byte Spill
-; X32-NEXT: movl -228(%ebp), %edx # 4-byte Reload
+; X32-NEXT: adcl %edx, %ebp
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: addl %ebx, %edx
-; X32-NEXT: movl -196(%ebp), %esi # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl %ecx, %esi
-; X32-NEXT: movzbl -448(%ebp), %eax # 1-byte Folded Reload
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edi
-; X32-NEXT: movl -328(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl $0, %eax
-; X32-NEXT: addl -368(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: adcl -620(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: adcl -788(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: adcl -784(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl -52(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: addl %ecx, -592(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -24(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: adcl %ecx, -532(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -572(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: adcl %ecx, -428(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -32(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: adcl %ecx, -452(%ebp) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, %edx
; X32-NEXT: adcl $0, %esi
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: adcl $0, %eax
-; X32-NEXT: addl -20(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, -228(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -44(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, -196(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -464(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: movl %edi, -620(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -68(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -328(%ebp) # 4-byte Spill
-; X32-NEXT: setb -464(%ebp) # 1-byte Folded Spill
-; X32-NEXT: movl -288(%ebp), %ebx # 4-byte Reload
+; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill
-; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ecx, %edi
-; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: addl %ecx, %esi
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl -252(%ebp), %ebx # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb -44(%ebp) # 1-byte Folded Spill
-; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebp, %ecx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %ebx
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl -44(%ebp), %ecx # 1-byte Folded Reload
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl -280(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: addl -116(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl -84(%ebp), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: movl %ecx, -52(%ebp) # 4-byte Spill
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, -44(%ebp) # 4-byte Spill
-; X32-NEXT: movl -348(%ebp), %ebx # 4-byte Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill
-; X32-NEXT: movl -216(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl -252(%ebp), %ebx # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, -68(%ebp) # 4-byte Spill
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb -368(%ebp) # 1-byte Folded Spill
-; X32-NEXT: movl -216(%ebp), %esi # 4-byte Reload
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %edi, %esi
; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ebx
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl -368(%ebp), %ecx # 1-byte Folded Reload
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl -540(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: adcl -576(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: addl -20(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -576(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -24(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill
-; X32-NEXT: adcl $0, -52(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, -44(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -348(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl -284(%ebp), %ecx # 4-byte Reload
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, -368(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl -368(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl -48(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %ebx, %ecx
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movl %eax, -368(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %esi, %ebx
; X32-NEXT: setb %cl
-; X32-NEXT: movl -216(%ebp), %eax # 4-byte Reload
-; X32-NEXT: mull %ebx
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl -180(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: addl -308(%ebp), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl -320(%ebp), %edi # 4-byte Reload
-; X32-NEXT: adcl -208(%ebp), %edi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ebx
-; X32-NEXT: adcl %edx, %edi
-; X32-NEXT: movl -576(%ebp), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, -20(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -24(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, -368(%ebp) # 4-byte Folded Spill
+; X32-NEXT: adcl %edx, %ecx
+; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: addl -52(%ebp), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl -44(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: setb -576(%ebp) # 1-byte Folded Spill
-; X32-NEXT: movl -288(%ebp), %eax # 4-byte Reload
-; X32-NEXT: movl -284(%ebp), %esi # 4-byte Reload
+; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, -52(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill
-; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl -52(%ebp), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill
-; X32-NEXT: movl -288(%ebp), %eax # 4-byte Reload
-; X32-NEXT: movl -48(%ebp), %esi # 4-byte Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movl %eax, -52(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -24(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: setb -24(%ebp) # 1-byte Folded Spill
-; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl -24(%ebp), %esi # 1-byte Folded Reload
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
; X32-NEXT: adcl %esi, %edx
-; X32-NEXT: movl -280(%ebp), %esi # 4-byte Reload
-; X32-NEXT: addl -308(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: movl -312(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: adcl -208(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %esi
-; X32-NEXT: movl %esi, -24(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl -44(%ebp), %edx # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %edi
+; X32-NEXT: adcl %edx, %ebp
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: addl %ebx, %edx
-; X32-NEXT: movl -52(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl %edi, %esi
-; X32-NEXT: movzbl -576(%ebp), %eax # 1-byte Folded Reload
-; X32-NEXT: movl -24(%ebp), %edi # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: adcl %ecx, %esi
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edi
-; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl -540(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, -44(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -800(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, -52(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -796(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: movl %edi, -24(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -792(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, %edi
-; X32-NEXT: movl -32(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: addl -228(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl -68(%ebp), %edx # 4-byte Reload
-; X32-NEXT: adcl -196(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl -20(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl -620(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: movl -368(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: adcl -328(%ebp), %ebx # 4-byte Folded Reload
-; X32-NEXT: movzbl -464(%ebp), %eax # 1-byte Folded Reload
-; X32-NEXT: adcl %eax, -44(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, -52(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, -24(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: addl -344(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, -32(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -404(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, -68(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -72(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, -20(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -76(%ebp), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl -44(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl -232(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl -52(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: adcl -164(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl -24(%ebp), %edx # 4-byte Reload
-; X32-NEXT: adcl -40(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: adcl -56(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: movl -32(%ebp), %esi # 4-byte Reload
-; X32-NEXT: addl -616(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, -32(%ebp) # 4-byte Spill
-; X32-NEXT: movl -68(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl -612(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, -68(%ebp) # 4-byte Spill
-; X32-NEXT: movl -20(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl -424(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, -20(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -420(%ebp), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl %ebx, -368(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -508(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -504(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, -52(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -152(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -64(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: movl %edi, -464(%ebp) # 4-byte Spill
-; X32-NEXT: adcl $0, -292(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, -372(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, -88(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, -296(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, -272(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, -36(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, -332(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, -80(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -288(%ebp), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl -188(%ebp), %edi # 4-byte Reload
+; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %ebx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl $0, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl (%esp), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %eax, -164(%ebp) # 4-byte Spill
-; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: addl %ebp, %ebx
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl -148(%ebp), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT: adcl %edi, %ebp
; X32-NEXT: setb %bl
-; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movzbl %bl, %ecx
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl -280(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: addl -100(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl -204(%ebp), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: movl %ecx, -56(%ebp) # 4-byte Spill
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, -40(%ebp) # 4-byte Spill
-; X32-NEXT: movl -348(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl -188(%ebp), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, -72(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, -152(%ebp) # 4-byte Spill
-; X32-NEXT: movl -216(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebx, %ecx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %edi, %ebp
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl -72(%ebp), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl -148(%ebp), %esi # 4-byte Reload
+; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: adcl (%esp), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, -228(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl -216(%ebp), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl -468(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: adcl -804(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: addl -164(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -76(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, -76(%ebp) # 4-byte Spill
-; X32-NEXT: adcl $0, -56(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, -40(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -348(%ebp), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl -236(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, -164(%ebp) # 4-byte Spill
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %ebx, %ecx
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl -112(%ebp), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movl %eax, -232(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ebx
-; X32-NEXT: setb %cl
-; X32-NEXT: movl -216(%ebp), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %esi, %edi
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl -180(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: addl -304(%ebp), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl -320(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: adcl -128(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %ebx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %ebp
; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl -72(%ebp), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, -164(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -76(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, -232(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl (%esp), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl -56(%ebp), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl -40(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: setb -72(%ebp) # 1-byte Folded Spill
-; X32-NEXT: movl -288(%ebp), %eax # 4-byte Reload
-; X32-NEXT: movl -236(%ebp), %esi # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: setb (%esp) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, -40(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, -56(%ebp) # 4-byte Spill
-; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl -40(%ebp), %edi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: movl %edx, -40(%ebp) # 4-byte Spill
-; X32-NEXT: movl -288(%ebp), %eax # 4-byte Reload
-; X32-NEXT: movl -112(%ebp), %esi # 4-byte Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -40(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: setb -40(%ebp) # 1-byte Folded Spill
-; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movzbl -40(%ebp), %esi # 1-byte Folded Reload
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
; X32-NEXT: adcl %esi, %edx
-; X32-NEXT: movl -280(%ebp), %edi # 4-byte Reload
-; X32-NEXT: addl -304(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl -128(%ebp), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: addl %eax, %edi
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, -40(%ebp) # 4-byte Spill
-; X32-NEXT: movl -56(%ebp), %edx # 4-byte Reload
-; X32-NEXT: addl %ebx, %edx
-; X32-NEXT: movl -76(%ebp), %esi # 4-byte Reload
+; X32-NEXT: adcl %edx, %ebx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT: addl %ebp, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl %ecx, %esi
-; X32-NEXT: movzbl -72(%ebp), %eax # 1-byte Folded Reload
+; X32-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edi
-; X32-NEXT: movl -40(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl $0, %eax
-; X32-NEXT: addl -468(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, -56(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -816(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, -76(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -812(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: movl %edi, -344(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -808(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -40(%ebp) # 4-byte Spill
-; X32-NEXT: movl -184(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl -188(%ebp), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, -196(%ebp) # 4-byte Spill
-; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %esi, %ebx
+; X32-NEXT: addl (%esp), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl -148(%ebp), %ecx # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, -328(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edi, %ebp
; X32-NEXT: setb %bl
-; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ecx
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl -160(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: addl -100(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl -268(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl -204(%ebp), %esi # 4-byte Folded Reload
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movzbl %bl, %edi
+; X32-NEXT: adcl %edi, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: movl %ecx, -64(%ebp) # 4-byte Spill
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, -72(%ebp) # 4-byte Spill
-; X32-NEXT: movl -260(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl -188(%ebp), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, -468(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, -508(%ebp) # 4-byte Spill
-; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %esi, (%esp) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %edi, %ebp
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl -468(%ebp), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebx, %edi
+; X32-NEXT: setb %cl
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %edi, %ebp
+; X32-NEXT: movzbl %cl, %eax
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl -148(%ebp), %esi # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, -504(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl -124(%ebp), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl -512(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: adcl -820(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: addl -196(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -404(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -328(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, -196(%ebp) # 4-byte Spill
-; X32-NEXT: adcl $0, -64(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, -72(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -260(%ebp), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl -236(%ebp), %ecx # 4-byte Reload
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, -328(%ebp) # 4-byte Spill
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: adcl %esi, %ebx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %ebx, %ecx
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl -112(%ebp), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movl %eax, -468(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ebx
-; X32-NEXT: setb %cl
-; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl -136(%ebp), %esi # 4-byte Reload
-; X32-NEXT: addl -304(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: movl -264(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: adcl -128(%ebp), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: addl %eax, %esi
; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl -404(%ebp), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, -328(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, -468(%ebp) # 4-byte Folded Spill
+; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %esi
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl -64(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: adcl -72(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: setb -196(%ebp) # 1-byte Folded Spill
-; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload
-; X32-NEXT: movl -236(%ebp), %edi # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: adcl (%esp), %ecx # 4-byte Folded Reload
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, -64(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill
-; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %edi
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl -64(%ebp), %ebx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: movl %edx, -404(%ebp) # 4-byte Spill
-; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload
-; X32-NEXT: movl -112(%ebp), %edi # 4-byte Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -404(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: setb -404(%ebp) # 1-byte Folded Spill
-; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: setb %bl
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl -404(%ebp), %edi # 1-byte Folded Reload
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movzbl %bl, %edi
; X32-NEXT: adcl %edi, %edx
-; X32-NEXT: movl -160(%ebp), %edi # 4-byte Reload
-; X32-NEXT: addl -304(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: movl -268(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: adcl -128(%ebp), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: addl %eax, %edi
; X32-NEXT: adcl %edx, %ebx
-; X32-NEXT: movl -72(%ebp), %edx # 4-byte Reload
+; X32-NEXT: movl (%esp), %edx # 4-byte Reload
; X32-NEXT: addl %esi, %edx
-; X32-NEXT: movl -64(%ebp), %esi # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl %ecx, %esi
-; X32-NEXT: movzbl -196(%ebp), %eax # 1-byte Folded Reload
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edi
; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: addl -512(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: adcl -676(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: adcl -624(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: adcl -628(%ebp), %ebx # 4-byte Folded Reload
-; X32-NEXT: addl -152(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, -72(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -228(%ebp), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, -64(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -164(%ebp), %edi # 4-byte Folded Reload
-; X32-NEXT: movl %edi, -628(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -232(%ebp), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl %ebx, -624(%ebp) # 4-byte Spill
-; X32-NEXT: adcl $0, -56(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, -76(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, -344(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, -40(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -184(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl -300(%ebp), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, -232(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, -164(%ebp) # 4-byte Spill
-; X32-NEXT: movl -60(%ebp), %esi # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl -232(%ebp), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %edi, %ecx
; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull -144(%ebp) # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, -228(%ebp) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %ecx
-; X32-NEXT: setb %bl
; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull -144(%ebp) # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebx, %edi
+; X32-NEXT: setb %cl
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl -160(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: addl -336(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl -268(%ebp), %esi # 4-byte Reload
-; X32-NEXT: adcl -176(%ebp), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: movl %ecx, -152(%ebp) # 4-byte Spill
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, -232(%ebp) # 4-byte Spill
-; X32-NEXT: movl -260(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl -300(%ebp), %edi # 4-byte Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, -404(%ebp) # 4-byte Spill
-; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %esi, %ebx
+; X32-NEXT: addl %ecx, %ebx
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl -144(%ebp), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, -540(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edi, %ecx
-; X32-NEXT: setb -196(%ebp) # 1-byte Folded Spill
-; X32-NEXT: movl -124(%ebp), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl -196(%ebp), %ecx # 1-byte Folded Reload
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl -588(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: adcl -824(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: addl -164(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -420(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -228(%ebp), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, -424(%ebp) # 4-byte Spill
-; X32-NEXT: adcl $0, -152(%ebp) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, -232(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -260(%ebp), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl -244(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, -228(%ebp) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %esi, %edi
+; X32-NEXT: mull %ebp
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %esi, %ecx
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl 12(%ebp), %eax
-; X32-NEXT: movl 60(%eax), %esi
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %esi, -164(%ebp) # 4-byte Spill
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %ebp, %ecx