diff options
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r-- | lib/Target/ARM/ARMBaseInstrInfo.h | 3 | ||||
-rw-r--r-- | lib/Target/ARM/ARMFastISel.cpp | 38 | ||||
-rw-r--r-- | lib/Target/ARM/ARMFrameLowering.cpp | 16 | ||||
-rw-r--r-- | lib/Target/ARM/ARMHazardRecognizer.cpp | 17 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelDAGToDAG.cpp | 15 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.cpp | 102 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrFormats.td | 7 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrInfo.td | 4 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 31 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrVFP.td | 162 | ||||
-rw-r--r-- | lib/Target/ARM/ARMSubtarget.cpp | 4 | ||||
-rw-r--r-- | lib/Target/ARM/MLxExpansionPass.cpp | 20 | ||||
-rw-r--r-- | lib/Target/ARM/NEONMoveFix.cpp | 9 | ||||
-rw-r--r-- | lib/Target/ARM/Thumb2InstrInfo.cpp | 6 |
14 files changed, 263 insertions, 171 deletions
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 1fb88726d0de..7e2183d7cd5e 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -155,10 +155,11 @@ namespace ARMII { //===------------------------------------------------------------------===// // Code domain. DomainShift = 18, - DomainMask = 3 << DomainShift, + DomainMask = 7 << DomainShift, DomainGeneral = 0 << DomainShift, DomainVFP = 1 << DomainShift, DomainNEON = 2 << DomainShift, + DomainNEONA8 = 4 << DomainShift, //===------------------------------------------------------------------===// // Field shifts - such shifts are used to set field while generating diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 9f295302db0e..26f48b308316 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -172,6 +172,7 @@ class ARMFastISel : public FastISel { unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT); unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg); unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg); + unsigned ARMSelectCallOp(const GlobalValue *GV); // Call handling routines. private: @@ -1633,6 +1634,25 @@ bool ARMFastISel::SelectRet(const Instruction *I) { return true; } +unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) { + + // Depend our opcode for thumb on whether or not we're targeting an + // externally callable function. For libcalls we'll just pass a NULL GV + // in here. + bool isExternal = false; + if (!GV || GV->hasExternalLinkage()) isExternal = true; + + // Darwin needs the r9 versions of the opcodes. + bool isDarwin = Subtarget->isTargetDarwin(); + if (isThumb && isExternal) { + return isDarwin ? ARM::tBLXi_r9 : ARM::tBLXi; + } else if (isThumb) { + return isDarwin ? ARM::tBLr9 : ARM::tBL; + } else { + return isDarwin ? ARM::BLr9 : ARM::BL; + } +} + // A quick function that will emit a call for a named libcall in F with the // vector of passed arguments for the Instruction in I. We can assume that we // can emit a call for any libcall we can produce. This is an abridged version @@ -1694,20 +1714,17 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops. // TODO: Turn this into the table of arm call ops. MachineInstrBuilder MIB; - unsigned CallOpc; - if(isThumb) { - CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi; + unsigned CallOpc = ARMSelectCallOp(NULL); + if(isThumb) // Explicitly adding the predicate here. MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))) .addExternalSymbol(TLI.getLibcallName(Call)); - } else { - CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL; + else // Explicitly adding the predicate here. MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)) .addExternalSymbol(TLI.getLibcallName(Call))); - } // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) @@ -1813,21 +1830,18 @@ bool ARMFastISel::SelectCall(const Instruction *I) { // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops. // TODO: Turn this into the table of arm call ops. MachineInstrBuilder MIB; - unsigned CallOpc; + unsigned CallOpc = ARMSelectCallOp(GV); // Explicitly adding the predicate here. - if(isThumb) { - CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi; + if(isThumb) // Explicitly adding the predicate here. MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))) .addGlobalAddress(GV, 0, 0); - } else { - CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL; + else // Explicitly adding the predicate here. MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)) .addGlobalAddress(GV, 0, 0)); - } // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index f42c6db84fd3..68c33f098ec9 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -215,7 +215,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); // Move past area 3. - if (DPRCSSize > 0) MBBI++; + if (DPRCSSize > 0) { + MBBI++; + // Since vpush register list cannot have gaps, there may be multiple vpush + // instructions in the prologue. + while (MBBI->getOpcode() == ARM::VSTMDDB_UPD) + MBBI++; + } NumBytes = DPRCSOffset; if (NumBytes) { @@ -370,7 +376,13 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); // Increment past our save areas. - if (AFI->getDPRCalleeSavedAreaSize()) MBBI++; + if (AFI->getDPRCalleeSavedAreaSize()) { + MBBI++; + // Since vpop register list cannot have gaps, there may be multiple vpop + // instructions in the epilogue. + while (MBBI->getOpcode() == ARM::VLDMDIA_UPD) + MBBI++; + } if (AFI->getGPRCalleeSavedArea2Size()) MBBI++; if (AFI->getGPRCalleeSavedArea1Size()) MBBI++; } diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp index 676b01e91c53..e97ce50bc429 100644 --- a/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -21,17 +21,14 @@ static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI, // FIXME: Detect integer instructions properly. const TargetInstrDesc &TID = MI->getDesc(); unsigned Domain = TID.TSFlags & ARMII::DomainMask; - if (Domain == ARMII::DomainVFP) { - unsigned Opcode = MI->getOpcode(); - if (Opcode == ARM::VSTRS || Opcode == ARM::VSTRD || - Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) - return false; - } else if (Domain == ARMII::DomainNEON) { - if (MI->getDesc().mayStore() || MI->getDesc().mayLoad()) - return false; - } else + if (TID.mayStore()) return false; - return MI->readsRegister(DefMI->getOperand(0).getReg(), &TRI); + unsigned Opcode = TID.getOpcode(); + if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) + return false; + if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON)) + return MI->readsRegister(DefMI->getOperand(0).getReg(), &TRI); + return false; } ScheduleHazardRecognizer::HazardType diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index a506cffdba34..f0d5a7d7c2e7 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -126,6 +126,7 @@ public: bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset); bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align); + bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset); bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label); @@ -886,6 +887,20 @@ bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, return true; } +bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N, + SDValue &Offset) { + LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op); + ISD::MemIndexedMode AM = LdSt->getAddressingMode(); + if (AM != ISD::POST_INC) + return false; + Offset = N; + if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) { + if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits()) + Offset = CurDAG->getRegister(0, MVT::i32); + } + return true; +} + bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label) { if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 1835ec0f0054..ab9f9e1571e3 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -2236,7 +2236,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, RC = ARM::GPRRegisterClass; // Transform the arguments stored in physical registers into virtual ones. - unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl); + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); SDValue ArgValue2; @@ -2250,7 +2250,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, MachinePointerInfo::getFixedStack(FI), false, false, 0); } else { - Reg = MF.addLiveIn(NextVA.getLocReg(), RC, dl); + Reg = MF.addLiveIn(NextVA.getLocReg(), RC); ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); } @@ -2331,7 +2331,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); // Transform the arguments in physical registers into virtual ones. - unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl); + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); } @@ -2408,7 +2408,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, else RC = ARM::GPRRegisterClass; - unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC, dl); + unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, @@ -2838,8 +2838,51 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); EVT VT = Op.getValueType(); EVT SrcVT = Tmp1.getValueType(); - bool F2IisFast = Subtarget->isCortexA9() || - Tmp0.getOpcode() == ISD::BITCAST || Tmp0.getOpcode() == ARMISD::VMOVDRR; + bool InGPR = Tmp0.getOpcode() == ISD::BITCAST || + Tmp0.getOpcode() == ARMISD::VMOVDRR; + bool UseNEON = !InGPR && Subtarget->hasNEON(); + + if (UseNEON) { + // Use VBSL to copy the sign bit. + unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80); + SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32, + DAG.getTargetConstant(EncodedVal, MVT::i32)); + EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64; + if (VT == MVT::f64) + Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT, + DAG.getNode(ISD::BITCAST, dl, OpVT, Mask), + DAG.getConstant(32, MVT::i32)); + else /*if (VT == MVT::f32)*/ + Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0); + if (SrcVT == MVT::f32) { + Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1); + if (VT == MVT::f64) + Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT, + DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1), + DAG.getConstant(32, MVT::i32)); + } + Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0); + Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1); + + SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff), + MVT::i32); + AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes); + SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask, + DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes)); + + SDValue Res = DAG.getNode(ISD::OR, dl, OpVT, + DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask), + DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot)); + if (SrcVT == MVT::f32) { + Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res); + Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res, + DAG.getConstant(0, MVT::i32)); + } else { + Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res); + } + + return Res; + } // Bitcast operand 1 to i32. if (SrcVT == MVT::f64) @@ -2847,37 +2890,24 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { &Tmp1, 1).getValue(1); Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1); - // If float to int conversion isn't going to be super expensive, then simply - // or in the signbit. - if (F2IisFast) { - SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32); - SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32); - Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1); - if (VT == MVT::f32) { - Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32, - DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2); - return DAG.getNode(ISD::BITCAST, dl, MVT::f32, - DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1)); - } - - // f64: Or the high part with signbit and then combine two parts. - Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), - &Tmp0, 1); - SDValue Lo = Tmp0.getValue(0); - SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); - Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); - return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); + // Or in the signbit with integer operations. + SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32); + SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32); + Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1); + if (VT == MVT::f32) { + Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32, + DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2); + return DAG.getNode(ISD::BITCAST, dl, MVT::f32, + DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1)); } - // Remove the signbit of operand 0. - Tmp0 = DAG.getNode(ISD::FABS, dl, VT, Tmp0); - - // If operand 1 signbit is one, then negate operand 0. - SDValue ARMcc; - SDValue Cmp = getARMCmp(Tmp1, DAG.getConstant(0, MVT::i32), - ISD::SETLT, ARMcc, DAG, dl); - SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - return DAG.getNode(ARMISD::CNEG, dl, VT, Tmp0, Tmp0, ARMcc, CCR, Cmp); + // f64: Or the high part with signbit and then combine two parts. + Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), + &Tmp0, 1); + SDValue Lo = Tmp0.getValue(0); + SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); + Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); + return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); } SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ @@ -2897,7 +2927,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ } // Return LR, which contains the return address. Mark it an implicit live-in. - unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32), dl); + unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); } diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 765cba42d0bd..359ac45cee1d 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -127,13 +127,14 @@ def IndexModePost : IndexMode<2>; def IndexModeUpd : IndexMode<3>; // Instruction execution domain. -class Domain<bits<2> val> { - bits<2> Value = val; +class Domain<bits<3> val> { + bits<3> Value = val; } def GenericDomain : Domain<0>; def VFPDomain : Domain<1>; // Instructions in VFP domain only def NeonDomain : Domain<2>; // Instructions in Neon domain only def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains +def VFPNeonA8Domain : Domain<5>; // Instructions in VFP & Neon under A8 //===----------------------------------------------------------------------===// // ARM special operands. @@ -249,7 +250,7 @@ class InstTemplate<AddrMode am, SizeFlagVal sz, IndexMode im, let TSFlags{15-10} = Form; let TSFlags{16} = isUnaryDataProc; let TSFlags{17} = canXformTo16Bit; - let TSFlags{19-18} = D.Value; + let TSFlags{20-18} = D.Value; let Constraints = cstr; let Itinerary = itin; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index c827ce3da97c..6e3fe2e039f5 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -561,7 +561,9 @@ def addrmode6 : Operand<i32>, let EncoderMethod = "getAddrMode6AddressOpValue"; } -def am6offset : Operand<i32> { +def am6offset : Operand<i32>, + ComplexPattern<i32, 1, "SelectAddrMode6Offset", + [], [SDNPWantRoot]> { let PrintMethod = "printAddrMode6OffsetOperand"; let MIOperandInfo = (ops GPR); let EncoderMethod = "getAddrMode6OffsetOpValue"; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 1e2e5504e662..dc3d63e26ef5 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -1402,31 +1402,42 @@ def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr), def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; -let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { - // ...with address register writeback: -class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> +class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, + PatFrag StoreOp, SDNode ExtractOp> : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn$Rm", - "$Rn.addr = $wb", []>; + "$Rn.addr = $wb", + [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), + addrmode6:$Rn, am6offset:$Rm))]>; +class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> + : VSTQLNWBPseudo<IIC_VST1lnu> { + let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), + addrmode6:$addr, am6offset:$offset))]; +} -def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8"> { +def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, + NEONvgetlaneu> { let Inst{7-5} = lane{2-0}; } -def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16"> { +def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, + NEONvgetlaneu> { let Inst{7-6} = lane{1-0}; let Inst{4} = Rn{5}; } -def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32"> { +def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, + extractelt> { let Inst{7} = lane{0}; let Inst{5-4} = Rn{5-4}; } -def VST1LNq8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>; -def VST1LNq16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>; -def VST1LNq32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>; +def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>; +def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>; +def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>; + +let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { // VST2LN : Vector Store (single 2-element structure from one lane) class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 920c5c98002a..29902833f2bb 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -197,9 +197,9 @@ def VADDS : ASbIn<0b11100, 0b11, 0, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VSUBD : ADbI<0b11100, 0b11, 1, 0, @@ -211,9 +211,9 @@ def VSUBS : ASbIn<0b11100, 0b11, 1, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VDIVD : ADbI<0b11101, 0b00, 0, 0, @@ -235,9 +235,9 @@ def VMULS : ASbIn<0b11100, 0b10, 0, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VNMULD : ADbI<0b11100, 0b10, 1, 0, @@ -249,9 +249,9 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } // Match reassociated forms only if not sign dependent rounding. @@ -271,9 +271,9 @@ def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins SPR:$Sd, SPR:$Sm), IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm", [(arm_cmpfp SPR:$Sd, SPR:$Sm)]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } // FIXME: Verify encoding after integrated assembler is working. @@ -286,9 +286,9 @@ def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins SPR:$Sd, SPR:$Sm), IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } } // Defs = [FPSCR] @@ -305,9 +305,9 @@ def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpUNA32, "vabs", ".f32\t$Sd, $Sm", [(set SPR:$Sd, (fabs SPR:$Sm))]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } let Defs = [FPSCR] in { @@ -326,9 +326,9 @@ def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0, let Inst{3-0} = 0b0000; let Inst{5} = 0; - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } // FIXME: Verify encoding after integrated assembler is working. @@ -347,9 +347,9 @@ def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0, let Inst{3-0} = 0b0000; let Inst{5} = 0; - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } } // Defs = [FPSCR] @@ -423,9 +423,9 @@ def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm", [(set SPR:$Sd, (fneg SPR:$Sm))]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0, @@ -598,9 +598,9 @@ def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, [(set SPR:$Sd, (arm_sitof SPR:$Sm))]> { let Inst{7} = 1; // s32 - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011, @@ -616,9 +616,9 @@ def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, [(set SPR:$Sd, (arm_uitof SPR:$Sm))]> { let Inst{7} = 0; // u32 - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } // FP -> Int: @@ -671,9 +671,9 @@ def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010, [(set SPR:$Sd, (arm_ftosi SPR:$Sm))]> { let Inst{7} = 1; // Z bit - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011, @@ -689,9 +689,9 @@ def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010, [(set SPR:$Sd, (arm_ftoui SPR:$Sm))]> { let Inst{7} = 1; // Z bit - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } // And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR. @@ -743,36 +743,36 @@ def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0, @@ -801,36 +801,36 @@ def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0, @@ -874,9 +874,9 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0, SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), @@ -901,9 +901,9 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0, SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), @@ -928,9 +928,9 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0, SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), @@ -954,9 +954,9 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0, [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), @@ -995,9 +995,9 @@ def VNEGScc : ASuI<0b11101, 0b11, 0b0001, 0b01, 0, IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm", [/*(set SPR:$Sd, (ARMcneg SPR:$Sn, SPR:$Sm, imm:$cc))*/]>, RegConstraint<"$Sn = $Sd"> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } } // neverHasSideEffects diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 0bd740cfb28c..1465984899c6 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -171,7 +171,9 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV, // Materializable GVs (in JIT lazy compilation mode) do not require an extra // load from stub. - bool isDecl = GV->isDeclaration() && !GV->isMaterializable(); + bool isDecl = GV->hasAvailableExternallyLinkage(); + if (GV->isDeclaration() && !GV->isMaterializable()) + isDecl = true; if (!isTargetDarwin()) { // Extra load is needed for all externally visible. diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp index f9e86eb36e04..9a27e2f47064 100644 --- a/lib/Target/ARM/MLxExpansionPass.cpp +++ b/lib/Target/ARM/MLxExpansionPass.cpp @@ -132,22 +132,16 @@ unsigned MLxExpansion::getDefReg(MachineInstr *MI) const { } bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const { - const TargetInstrDesc &TID = MI->getDesc(); // FIXME: Detect integer instructions properly. + const TargetInstrDesc &TID = MI->getDesc(); unsigned Domain = TID.TSFlags & ARMII::DomainMask; - if (Domain == ARMII::DomainVFP) { - unsigned Opcode = TID.getOpcode(); - if (Opcode == ARM::VSTRS || Opcode == ARM::VSTRD || - Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) - return false; - } else if (Domain == ARMII::DomainNEON) { - if (TID.mayStore() || TID.mayLoad()) - return false; - } else { + if (TID.mayStore()) return false; - } - - return MI->readsRegister(Reg, TRI); + unsigned Opcode = TID.getOpcode(); + if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) + return false; + if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON)) + return MI->readsRegister(Reg, TRI); return false; } diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp index 97e54bfaed9e..965665c2821a 100644 --- a/lib/Target/ARM/NEONMoveFix.cpp +++ b/lib/Target/ARM/NEONMoveFix.cpp @@ -35,6 +35,7 @@ namespace { private: const TargetRegisterInfo *TRI; const ARMBaseInstrInfo *TII; + bool isA8; typedef DenseMap<unsigned, const MachineInstr*> RegMap; @@ -43,6 +44,11 @@ namespace { char NEONMoveFixPass::ID = 0; } +static bool inNEONDomain(unsigned Domain, bool isA8) { + return (Domain & ARMII::DomainNEON) || + (isA8 && (Domain & ARMII::DomainNEONA8)); +} + bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) { RegMap Defs; bool Modified = false; @@ -70,7 +76,7 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) { Domain = ARMII::DomainNEON; } - if (Domain & ARMII::DomainNEON) { + if (inNEONDomain(Domain, isA8)) { // Convert VMOVD to VMOVDneon unsigned DestReg = MI->getOperand(0).getReg(); @@ -123,6 +129,7 @@ bool NEONMoveFixPass::runOnMachineFunction(MachineFunction &Fn) { TRI = TM.getRegisterInfo(); TII = static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo()); + isA8 = TM.getSubtarget<ARMSubtarget>().isCortexA8(); bool Modified = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 2f67257f8fa1..9b1073be3c8e 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -95,6 +95,12 @@ Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, bool Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { + while (MBBI->isDebugValue()) { + ++MBBI; + if (MBBI == MBB.end()) + return false; + } + unsigned PredReg = 0; return llvm::getITInstrPredicate(MBBI, PredReg) == ARMCC::AL; } |