aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/ARM
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h3
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp38
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp16
-rw-r--r--lib/Target/ARM/ARMHazardRecognizer.cpp17
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp15
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp102
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td7
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td4
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td31
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td162
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp4
-rw-r--r--lib/Target/ARM/MLxExpansionPass.cpp20
-rw-r--r--lib/Target/ARM/NEONMoveFix.cpp9
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp6
14 files changed, 263 insertions, 171 deletions
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 1fb88726d0de..7e2183d7cd5e 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -155,10 +155,11 @@ namespace ARMII {
//===------------------------------------------------------------------===//
// Code domain.
DomainShift = 18,
- DomainMask = 3 << DomainShift,
+ DomainMask = 7 << DomainShift,
DomainGeneral = 0 << DomainShift,
DomainVFP = 1 << DomainShift,
DomainNEON = 2 << DomainShift,
+ DomainNEONA8 = 4 << DomainShift,
//===------------------------------------------------------------------===//
// Field shifts - such shifts are used to set field while generating
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 9f295302db0e..26f48b308316 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -172,6 +172,7 @@ class ARMFastISel : public FastISel {
unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT);
unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg);
unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg);
+ unsigned ARMSelectCallOp(const GlobalValue *GV);
// Call handling routines.
private:
@@ -1633,6 +1634,25 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
return true;
}
+unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) {
+
+ // Depend our opcode for thumb on whether or not we're targeting an
+ // externally callable function. For libcalls we'll just pass a NULL GV
+ // in here.
+ bool isExternal = false;
+ if (!GV || GV->hasExternalLinkage()) isExternal = true;
+
+ // Darwin needs the r9 versions of the opcodes.
+ bool isDarwin = Subtarget->isTargetDarwin();
+ if (isThumb && isExternal) {
+ return isDarwin ? ARM::tBLXi_r9 : ARM::tBLXi;
+ } else if (isThumb) {
+ return isDarwin ? ARM::tBLr9 : ARM::tBL;
+ } else {
+ return isDarwin ? ARM::BLr9 : ARM::BL;
+ }
+}
+
// A quick function that will emit a call for a named libcall in F with the
// vector of passed arguments for the Instruction in I. We can assume that we
// can emit a call for any libcall we can produce. This is an abridged version
@@ -1694,20 +1714,17 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
// Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops.
// TODO: Turn this into the table of arm call ops.
MachineInstrBuilder MIB;
- unsigned CallOpc;
- if(isThumb) {
- CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi;
+ unsigned CallOpc = ARMSelectCallOp(NULL);
+ if(isThumb)
// Explicitly adding the predicate here.
MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(CallOpc)))
.addExternalSymbol(TLI.getLibcallName(Call));
- } else {
- CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL;
+ else
// Explicitly adding the predicate here.
MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(CallOpc))
.addExternalSymbol(TLI.getLibcallName(Call)));
- }
// Add implicit physical register uses to the call.
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
@@ -1813,21 +1830,18 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
// Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops.
// TODO: Turn this into the table of arm call ops.
MachineInstrBuilder MIB;
- unsigned CallOpc;
+ unsigned CallOpc = ARMSelectCallOp(GV);
// Explicitly adding the predicate here.
- if(isThumb) {
- CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi;
+ if(isThumb)
// Explicitly adding the predicate here.
MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(CallOpc)))
.addGlobalAddress(GV, 0, 0);
- } else {
- CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL;
+ else
// Explicitly adding the predicate here.
MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(CallOpc))
.addGlobalAddress(GV, 0, 0));
- }
// Add implicit physical register uses to the call.
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index f42c6db84fd3..68c33f098ec9 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -215,7 +215,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
// Move past area 3.
- if (DPRCSSize > 0) MBBI++;
+ if (DPRCSSize > 0) {
+ MBBI++;
+ // Since vpush register list cannot have gaps, there may be multiple vpush
+ // instructions in the prologue.
+ while (MBBI->getOpcode() == ARM::VSTMDDB_UPD)
+ MBBI++;
+ }
NumBytes = DPRCSOffset;
if (NumBytes) {
@@ -370,7 +376,13 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
// Increment past our save areas.
- if (AFI->getDPRCalleeSavedAreaSize()) MBBI++;
+ if (AFI->getDPRCalleeSavedAreaSize()) {
+ MBBI++;
+ // Since vpop register list cannot have gaps, there may be multiple vpop
+ // instructions in the epilogue.
+ while (MBBI->getOpcode() == ARM::VLDMDIA_UPD)
+ MBBI++;
+ }
if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
}
diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp
index 676b01e91c53..e97ce50bc429 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -21,17 +21,14 @@ static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI,
// FIXME: Detect integer instructions properly.
const TargetInstrDesc &TID = MI->getDesc();
unsigned Domain = TID.TSFlags & ARMII::DomainMask;
- if (Domain == ARMII::DomainVFP) {
- unsigned Opcode = MI->getOpcode();
- if (Opcode == ARM::VSTRS || Opcode == ARM::VSTRD ||
- Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
- return false;
- } else if (Domain == ARMII::DomainNEON) {
- if (MI->getDesc().mayStore() || MI->getDesc().mayLoad())
- return false;
- } else
+ if (TID.mayStore())
return false;
- return MI->readsRegister(DefMI->getOperand(0).getReg(), &TRI);
+ unsigned Opcode = TID.getOpcode();
+ if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+ return false;
+ if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON))
+ return MI->readsRegister(DefMI->getOperand(0).getReg(), &TRI);
+ return false;
}
ScheduleHazardRecognizer::HazardType
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index a506cffdba34..f0d5a7d7c2e7 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -126,6 +126,7 @@ public:
bool SelectAddrMode5(SDValue N, SDValue &Base,
SDValue &Offset);
bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
+ bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
@@ -886,6 +887,20 @@ bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
return true;
}
+bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
+ SDValue &Offset) {
+ LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
+ ISD::MemIndexedMode AM = LdSt->getAddressingMode();
+ if (AM != ISD::POST_INC)
+ return false;
+ Offset = N;
+ if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
+ if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ }
+ return true;
+}
+
bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
SDValue &Offset, SDValue &Label) {
if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 1835ec0f0054..ab9f9e1571e3 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -2236,7 +2236,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
RC = ARM::GPRRegisterClass;
// Transform the arguments stored in physical registers into virtual ones.
- unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl);
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
SDValue ArgValue2;
@@ -2250,7 +2250,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
MachinePointerInfo::getFixedStack(FI),
false, false, 0);
} else {
- Reg = MF.addLiveIn(NextVA.getLocReg(), RC, dl);
+ Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
}
@@ -2331,7 +2331,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
// Transform the arguments in physical registers into virtual ones.
- unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl);
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
}
@@ -2408,7 +2408,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
else
RC = ARM::GPRRegisterClass;
- unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC, dl);
+ unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
SDValue Store =
DAG.getStore(Val.getValue(1), dl, Val, FIN,
@@ -2838,8 +2838,51 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
EVT VT = Op.getValueType();
EVT SrcVT = Tmp1.getValueType();
- bool F2IisFast = Subtarget->isCortexA9() ||
- Tmp0.getOpcode() == ISD::BITCAST || Tmp0.getOpcode() == ARMISD::VMOVDRR;
+ bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
+ Tmp0.getOpcode() == ARMISD::VMOVDRR;
+ bool UseNEON = !InGPR && Subtarget->hasNEON();
+
+ if (UseNEON) {
+ // Use VBSL to copy the sign bit.
+ unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
+ SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
+ DAG.getTargetConstant(EncodedVal, MVT::i32));
+ EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
+ if (VT == MVT::f64)
+ Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
+ DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
+ DAG.getConstant(32, MVT::i32));
+ else /*if (VT == MVT::f32)*/
+ Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
+ if (SrcVT == MVT::f32) {
+ Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
+ if (VT == MVT::f64)
+ Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
+ DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
+ DAG.getConstant(32, MVT::i32));
+ }
+ Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
+
+ SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
+ MVT::i32);
+ AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
+ SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
+ DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
+
+ SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
+ DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
+ DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
+ if (SrcVT == MVT::f32) {
+ Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
+ Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
+ DAG.getConstant(0, MVT::i32));
+ } else {
+ Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
+ }
+
+ return Res;
+ }
// Bitcast operand 1 to i32.
if (SrcVT == MVT::f64)
@@ -2847,37 +2890,24 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
&Tmp1, 1).getValue(1);
Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
- // If float to int conversion isn't going to be super expensive, then simply
- // or in the signbit.
- if (F2IisFast) {
- SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);
- SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);
- Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
- if (VT == MVT::f32) {
- Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
- DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
- return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
- DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
- }
-
- // f64: Or the high part with signbit and then combine two parts.
- Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
- &Tmp0, 1);
- SDValue Lo = Tmp0.getValue(0);
- SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
- Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
- return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
+ // Or in the signbit with integer operations.
+ SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);
+ SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);
+ Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
+ if (VT == MVT::f32) {
+ Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+ DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
}
- // Remove the signbit of operand 0.
- Tmp0 = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
-
- // If operand 1 signbit is one, then negate operand 0.
- SDValue ARMcc;
- SDValue Cmp = getARMCmp(Tmp1, DAG.getConstant(0, MVT::i32),
- ISD::SETLT, ARMcc, DAG, dl);
- SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- return DAG.getNode(ARMISD::CNEG, dl, VT, Tmp0, Tmp0, ARMcc, CCR, Cmp);
+ // f64: Or the high part with signbit and then combine two parts.
+ Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ &Tmp0, 1);
+ SDValue Lo = Tmp0.getValue(0);
+ SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
+ Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
+ return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
}
SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
@@ -2897,7 +2927,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
}
// Return LR, which contains the return address. Mark it an implicit live-in.
- unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32), dl);
+ unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
}
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 765cba42d0bd..359ac45cee1d 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -127,13 +127,14 @@ def IndexModePost : IndexMode<2>;
def IndexModeUpd : IndexMode<3>;
// Instruction execution domain.
-class Domain<bits<2> val> {
- bits<2> Value = val;
+class Domain<bits<3> val> {
+ bits<3> Value = val;
}
def GenericDomain : Domain<0>;
def VFPDomain : Domain<1>; // Instructions in VFP domain only
def NeonDomain : Domain<2>; // Instructions in Neon domain only
def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains
+def VFPNeonA8Domain : Domain<5>; // Instructions in VFP & Neon under A8
//===----------------------------------------------------------------------===//
// ARM special operands.
@@ -249,7 +250,7 @@ class InstTemplate<AddrMode am, SizeFlagVal sz, IndexMode im,
let TSFlags{15-10} = Form;
let TSFlags{16} = isUnaryDataProc;
let TSFlags{17} = canXformTo16Bit;
- let TSFlags{19-18} = D.Value;
+ let TSFlags{20-18} = D.Value;
let Constraints = cstr;
let Itinerary = itin;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index c827ce3da97c..6e3fe2e039f5 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -561,7 +561,9 @@ def addrmode6 : Operand<i32>,
let EncoderMethod = "getAddrMode6AddressOpValue";
}
-def am6offset : Operand<i32> {
+def am6offset : Operand<i32>,
+ ComplexPattern<i32, 1, "SelectAddrMode6Offset",
+ [], [SDNPWantRoot]> {
let PrintMethod = "printAddrMode6OffsetOperand";
let MIOperandInfo = (ops GPR);
let EncoderMethod = "getAddrMode6OffsetOpValue";
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 1e2e5504e662..dc3d63e26ef5 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -1402,31 +1402,42 @@ def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
(VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
-let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
-
// ...with address register writeback:
-class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
+ PatFrag StoreOp, SDNode ExtractOp>
: NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm,
DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
"\\{$Vd[$lane]\\}, $Rn$Rm",
- "$Rn.addr = $wb", []>;
+ "$Rn.addr = $wb",
+ [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
+ addrmode6:$Rn, am6offset:$Rm))]>;
+class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
+ : VSTQLNWBPseudo<IIC_VST1lnu> {
+ let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
+ addrmode6:$addr, am6offset:$offset))];
+}
-def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8"> {
+def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8,
+ NEONvgetlaneu> {
let Inst{7-5} = lane{2-0};
}
-def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16"> {
+def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16,
+ NEONvgetlaneu> {
let Inst{7-6} = lane{1-0};
let Inst{4} = Rn{5};
}
-def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32"> {
+def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store,
+ extractelt> {
let Inst{7} = lane{0};
let Inst{5-4} = Rn{5-4};
}
-def VST1LNq8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>;
-def VST1LNq16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>;
-def VST1LNq32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>;
+def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>;
+def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>;
+def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>;
+
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
// VST2LN : Vector Store (single 2-element structure from one lane)
class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 920c5c98002a..29902833f2bb 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -197,9 +197,9 @@ def VADDS : ASbIn<0b11100, 0b11, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm",
[(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VSUBD : ADbI<0b11100, 0b11, 1, 0,
@@ -211,9 +211,9 @@ def VSUBS : ASbIn<0b11100, 0b11, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm",
[(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VDIVD : ADbI<0b11101, 0b00, 0, 0,
@@ -235,9 +235,9 @@ def VMULS : ASbIn<0b11100, 0b10, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm",
[(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VNMULD : ADbI<0b11100, 0b10, 1, 0,
@@ -249,9 +249,9 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm",
[(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
// Match reassociated forms only if not sign dependent rounding.
@@ -271,9 +271,9 @@ def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0,
(outs), (ins SPR:$Sd, SPR:$Sm),
IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm",
[(arm_cmpfp SPR:$Sd, SPR:$Sm)]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
// FIXME: Verify encoding after integrated assembler is working.
@@ -286,9 +286,9 @@ def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
(outs), (ins SPR:$Sd, SPR:$Sm),
IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
} // Defs = [FPSCR]
@@ -305,9 +305,9 @@ def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpUNA32, "vabs", ".f32\t$Sd, $Sm",
[(set SPR:$Sd, (fabs SPR:$Sm))]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
let Defs = [FPSCR] in {
@@ -326,9 +326,9 @@ def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0,
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
// FIXME: Verify encoding after integrated assembler is working.
@@ -347,9 +347,9 @@ def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
} // Defs = [FPSCR]
@@ -423,9 +423,9 @@ def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm",
[(set SPR:$Sd, (fneg SPR:$Sm))]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0,
@@ -598,9 +598,9 @@ def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
[(set SPR:$Sd, (arm_sitof SPR:$Sm))]> {
let Inst{7} = 1; // s32
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
@@ -616,9 +616,9 @@ def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
[(set SPR:$Sd, (arm_uitof SPR:$Sm))]> {
let Inst{7} = 0; // u32
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
// FP -> Int:
@@ -671,9 +671,9 @@ def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
[(set SPR:$Sd, (arm_ftosi SPR:$Sm))]> {
let Inst{7} = 1; // Z bit
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
@@ -689,9 +689,9 @@ def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
[(set SPR:$Sd, (arm_ftoui SPR:$Sm))]> {
let Inst{7} = 1; // Z bit
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
@@ -743,36 +743,36 @@ def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits",
[/* For disassembly only; pattern left blank */]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits",
[/* For disassembly only; pattern left blank */]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits",
[/* For disassembly only; pattern left blank */]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits",
[/* For disassembly only; pattern left blank */]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0,
@@ -801,36 +801,36 @@ def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits",
[/* For disassembly only; pattern left blank */]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits",
[/* For disassembly only; pattern left blank */]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits",
[/* For disassembly only; pattern left blank */]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits",
[/* For disassembly only; pattern left blank */]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0,
@@ -874,9 +874,9 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
@@ -901,9 +901,9 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
@@ -928,9 +928,9 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
@@ -954,9 +954,9 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
[(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
@@ -995,9 +995,9 @@ def VNEGScc : ASuI<0b11101, 0b11, 0b0001, 0b01, 0,
IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm",
[/*(set SPR:$Sd, (ARMcneg SPR:$Sn, SPR:$Sm, imm:$cc))*/]>,
RegConstraint<"$Sn = $Sd"> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
} // neverHasSideEffects
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 0bd740cfb28c..1465984899c6 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -171,7 +171,9 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
// Materializable GVs (in JIT lazy compilation mode) do not require an extra
// load from stub.
- bool isDecl = GV->isDeclaration() && !GV->isMaterializable();
+ bool isDecl = GV->hasAvailableExternallyLinkage();
+ if (GV->isDeclaration() && !GV->isMaterializable())
+ isDecl = true;
if (!isTargetDarwin()) {
// Extra load is needed for all externally visible.
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index f9e86eb36e04..9a27e2f47064 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -132,22 +132,16 @@ unsigned MLxExpansion::getDefReg(MachineInstr *MI) const {
}
bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
- const TargetInstrDesc &TID = MI->getDesc();
// FIXME: Detect integer instructions properly.
+ const TargetInstrDesc &TID = MI->getDesc();
unsigned Domain = TID.TSFlags & ARMII::DomainMask;
- if (Domain == ARMII::DomainVFP) {
- unsigned Opcode = TID.getOpcode();
- if (Opcode == ARM::VSTRS || Opcode == ARM::VSTRD ||
- Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
- return false;
- } else if (Domain == ARMII::DomainNEON) {
- if (TID.mayStore() || TID.mayLoad())
- return false;
- } else {
+ if (TID.mayStore())
return false;
- }
-
- return MI->readsRegister(Reg, TRI);
+ unsigned Opcode = TID.getOpcode();
+ if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+ return false;
+ if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON))
+ return MI->readsRegister(Reg, TRI);
return false;
}
diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp
index 97e54bfaed9e..965665c2821a 100644
--- a/lib/Target/ARM/NEONMoveFix.cpp
+++ b/lib/Target/ARM/NEONMoveFix.cpp
@@ -35,6 +35,7 @@ namespace {
private:
const TargetRegisterInfo *TRI;
const ARMBaseInstrInfo *TII;
+ bool isA8;
typedef DenseMap<unsigned, const MachineInstr*> RegMap;
@@ -43,6 +44,11 @@ namespace {
char NEONMoveFixPass::ID = 0;
}
+static bool inNEONDomain(unsigned Domain, bool isA8) {
+ return (Domain & ARMII::DomainNEON) ||
+ (isA8 && (Domain & ARMII::DomainNEONA8));
+}
+
bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
RegMap Defs;
bool Modified = false;
@@ -70,7 +76,7 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
Domain = ARMII::DomainNEON;
}
- if (Domain & ARMII::DomainNEON) {
+ if (inNEONDomain(Domain, isA8)) {
// Convert VMOVD to VMOVDneon
unsigned DestReg = MI->getOperand(0).getReg();
@@ -123,6 +129,7 @@ bool NEONMoveFixPass::runOnMachineFunction(MachineFunction &Fn) {
TRI = TM.getRegisterInfo();
TII = static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
+ isA8 = TM.getSubtarget<ARMSubtarget>().isCortexA8();
bool Modified = false;
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 2f67257f8fa1..9b1073be3c8e 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -95,6 +95,12 @@ Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
bool
Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) const {
+ while (MBBI->isDebugValue()) {
+ ++MBBI;
+ if (MBBI == MBB.end())
+ return false;
+ }
+
unsigned PredReg = 0;
return llvm::getITInstrPredicate(MBBI, PredReg) == ARMCC::AL;
}