diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2015-08-07 23:01:33 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2015-08-07 23:01:33 +0000 |
commit | ee8648bdac07986a0f1ec897b02ec82a2f144d46 (patch) | |
tree | 52d1861acda1205241ee35a94aa63129c604d469 /lib/Target/ARM | |
parent | 1a82d4c088707c791c792f6822f611b47a12bdfe (diff) | |
download | src-ee8648bdac07986a0f1ec897b02ec82a2f144d46.tar.gz src-ee8648bdac07986a0f1ec897b02ec82a2f144d46.zip |
Vendor import of llvm trunk r242221:vendor/llvm/llvm-trunk-r242221
Notes
Notes:
svn path=/vendor/llvm/dist/; revision=286425
svn path=/vendor/llvm/llvm-trunk-r242221/; revision=286426; tag=vendor/llvm/llvm-trunk-r242221
Diffstat (limited to 'lib/Target/ARM')
23 files changed, 1209 insertions, 1070 deletions
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 96b4742da2bb..ef609a66d032 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -150,6 +150,10 @@ def FeatureAClass : SubtargetFeature<"aclass", "ARMProcClass", "AClass", def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true", "NaCl trap">; +def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true", + "Generate calls via indirect call " + "instructions">; + // ARM ISAs. def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true", "Support ARM v4T instructions">; diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index b1a11d626bda..9f43e732bd73 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1230,8 +1230,7 @@ ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { Reloc::Model RM = MF.getTarget().getRelocationModel(); if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD) { - assert(getSubtarget().getTargetTriple().getObjectFormat() == - Triple::MachO && + assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() && "LOAD_STACK_GUARD currently supported only for MachO."); expandLoadStackGuard(MI, RM); MI->getParent()->erase(MI); diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 3f79a9b53d70..e7d5be7753e4 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -127,7 +127,7 @@ ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF, BitVector ARMBaseRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>(); - const TargetFrameLowering *TFI = STI.getFrameLowering(); + const ARMFrameLowering *TFI = getFrameLowering(MF); // FIXME: avoid re-calculating this every time. BitVector Reserved(getNumRegs()); @@ -194,7 +194,7 @@ unsigned ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>(); - const TargetFrameLowering *TFI = STI.getFrameLowering(); + const ARMFrameLowering *TFI = getFrameLowering(MF); switch (RC->getID()) { default: @@ -302,7 +302,7 @@ ARMBaseRegisterInfo::updateRegAllocHint(unsigned Reg, unsigned NewReg, bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const ARMFrameLowering *TFI = getFrameLowering(MF); // When outgoing call frames are so large that we adjust the stack pointer // around the call, we can no longer use the stack pointer to reach the @@ -333,6 +333,7 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { const MachineRegisterInfo *MRI = &MF.getRegInfo(); const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + const ARMFrameLowering *TFI = getFrameLowering(MF); // We can't realign the stack if: // 1. Dynamic stack realignment is explicitly disabled, // 2. This is a Thumb1 function (it's not useful, so we don't bother), or @@ -347,7 +348,7 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { return false; // We may also need a base pointer if there are dynamic allocas or stack // pointer adjustments around calls. - if (MF.getSubtarget().getFrameLowering()->hasReservedCallFrame(MF)) + if (TFI->hasReservedCallFrame(MF)) return true; // A base pointer is required and allowed. Check that it isn't too late to // reserve it. @@ -357,9 +358,9 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { bool ARMBaseRegisterInfo:: needsStackRealignment(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); + const ARMFrameLowering *TFI = getFrameLowering(MF); const Function *F = MF.getFunction(); - unsigned StackAlign = - MF.getSubtarget().getFrameLowering()->getStackAlignment(); + unsigned StackAlign = TFI->getStackAlignment(); bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || F->hasFnAttribute(Attribute::StackAlignment)); @@ -378,7 +379,7 @@ cannotEliminateFrame(const MachineFunction &MF) const { unsigned ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const { const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>(); - const TargetFrameLowering *TFI = STI.getFrameLowering(); + const ARMFrameLowering *TFI = getFrameLowering(MF); if (TFI->hasFP(MF)) return getFramePointerReg(STI); @@ -517,7 +518,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { // Note that the incoming offset is based on the SP value at function entry, // so it'll be negative. MachineFunction &MF = *MI->getParent()->getParent(); - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const ARMFrameLowering *TFI = getFrameLowering(MF); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); @@ -694,8 +695,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineFunction &MF = *MBB.getParent(); const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); - const ARMFrameLowering *TFI = static_cast<const ARMFrameLowering *>( - MF.getSubtarget().getFrameLowering()); + const ARMFrameLowering *TFI = getFrameLowering(MF); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); assert(!AFI->isThumb1OnlyFunction() && "This eliminateFrameIndex does not support Thumb1!"); diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index 7dd21ecbe91b..27cf06b995a0 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -142,6 +142,9 @@ def CC_ARM_AAPCS : CallingConv<[ // Handles byval parameters. CCIfByVal<CCPassByVal<4, 4>>, + // The 'nest' parameter, if any, is passed in R12. + CCIfNest<CCAssignToReg<[R12]>>, + // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>, diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 4175b4af86e6..fdd0763ea608 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -49,8 +49,6 @@ #include "llvm/Target/TargetOptions.h" using namespace llvm; -extern cl::opt<bool> EnableARMLongCalls; - namespace { // All possible address modes, plus some. @@ -685,7 +683,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { } unsigned ARMFastISel::fastMaterializeConstant(const Constant *C) { - EVT CEVT = TLI.getValueType(C->getType(), true); + EVT CEVT = TLI.getValueType(DL, C->getType(), true); // Only handle simple types. if (!CEVT.isSimple()) return 0; @@ -732,7 +730,7 @@ unsigned ARMFastISel::fastMaterializeAlloca(const AllocaInst *AI) { } bool ARMFastISel::isTypeLegal(Type *Ty, MVT &VT) { - EVT evt = TLI.getValueType(Ty, true); + EVT evt = TLI.getValueType(DL, Ty, true); // Only handle simple types. if (evt == MVT::Other || !evt.isSimple()) return false; @@ -786,12 +784,13 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { return ARMComputeAddress(U->getOperand(0), Addr); case Instruction::IntToPtr: // Look past no-op inttoptrs. - if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) + if (TLI.getValueType(DL, U->getOperand(0)->getType()) == + TLI.getPointerTy(DL)) return ARMComputeAddress(U->getOperand(0), Addr); break; case Instruction::PtrToInt: // Look past no-op ptrtoints. - if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) + if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) return ARMComputeAddress(U->getOperand(0), Addr); break; case Instruction::GetElementPtr: { @@ -1365,7 +1364,7 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) { bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt) { Type *Ty = Src1Value->getType(); - EVT SrcEVT = TLI.getValueType(Ty, true); + EVT SrcEVT = TLI.getValueType(DL, Ty, true); if (!SrcEVT.isSimple()) return false; MVT SrcVT = SrcEVT.getSimpleVT(); @@ -1557,7 +1556,7 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) { return false; Value *Src = I->getOperand(0); - EVT SrcEVT = TLI.getValueType(Src->getType(), true); + EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true); if (!SrcEVT.isSimple()) return false; MVT SrcVT = SrcEVT.getSimpleVT(); @@ -1750,7 +1749,7 @@ bool ARMFastISel::SelectRem(const Instruction *I, bool isSigned) { } bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { - EVT DestVT = TLI.getValueType(I->getType(), true); + EVT DestVT = TLI.getValueType(DL, I->getType(), true); // We can get here in the case when we have a binary operation on a non-legal // type and the target independent selector doesn't know how to handle it. @@ -1790,7 +1789,7 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { } bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) { - EVT FPVT = TLI.getValueType(I->getType(), true); + EVT FPVT = TLI.getValueType(DL, I->getType(), true); if (!FPVT.isSimple()) return false; MVT VT = FPVT.getSimpleVT(); @@ -2095,7 +2094,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) { CallingConv::ID CC = F.getCallingConv(); if (Ret->getNumOperands() > 0) { SmallVector<ISD::OutputArg, 4> Outs; - GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI); + GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL); // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ValLocs; @@ -2122,7 +2121,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) { return false; unsigned SrcReg = Reg + VA.getValNo(); - EVT RVEVT = TLI.getValueType(RV->getType()); + EVT RVEVT = TLI.getValueType(DL, RV->getType()); if (!RVEVT.isSimple()) return false; MVT RVVT = RVEVT.getSimpleVT(); MVT DestVT = VA.getValVT(); @@ -2173,7 +2172,7 @@ unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) { unsigned ARMFastISel::getLibcallReg(const Twine &Name) { // Manually compute the global's type to avoid building it when unnecessary. Type *GVTy = Type::getInt32PtrTy(*Context, /*AS=*/0); - EVT LCREVT = TLI.getValueType(GVTy); + EVT LCREVT = TLI.getValueType(DL, GVTy); if (!LCREVT.isSimple()) return 0; GlobalValue *GV = new GlobalVariable(M, Type::getInt32Ty(*Context), false, @@ -2246,19 +2245,19 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { return false; unsigned CalleeReg = 0; - if (EnableARMLongCalls) { + if (Subtarget->genLongCalls()) { CalleeReg = getLibcallReg(TLI.getLibcallName(Call)); if (CalleeReg == 0) return false; } // Issue the call. - unsigned CallOpc = ARMSelectCallOp(EnableARMLongCalls); + unsigned CallOpc = ARMSelectCallOp(Subtarget->genLongCalls()); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc)); // BL / BLX don't take a predicate, but tBL / tBLX do. if (isThumb2) AddDefaultPred(MIB); - if (EnableARMLongCalls) + if (Subtarget->genLongCalls()) MIB.addReg(CalleeReg); else MIB.addExternalSymbol(TLI.getLibcallName(Call)); @@ -2380,7 +2379,7 @@ bool ARMFastISel::SelectCall(const Instruction *I, bool UseReg = false; const GlobalValue *GV = dyn_cast<GlobalValue>(Callee); - if (!GV || EnableARMLongCalls) UseReg = true; + if (!GV || Subtarget->genLongCalls()) UseReg = true; unsigned CalleeReg = 0; if (UseReg) { @@ -2576,8 +2575,8 @@ bool ARMFastISel::SelectTrunc(const Instruction *I) { Value *Op = I->getOperand(0); EVT SrcVT, DestVT; - SrcVT = TLI.getValueType(Op->getType(), true); - DestVT = TLI.getValueType(I->getType(), true); + SrcVT = TLI.getValueType(DL, Op->getType(), true); + DestVT = TLI.getValueType(DL, I->getType(), true); if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8) return false; @@ -2742,8 +2741,8 @@ bool ARMFastISel::SelectIntExt(const Instruction *I) { if (!SrcReg) return false; EVT SrcEVT, DestEVT; - SrcEVT = TLI.getValueType(SrcTy, true); - DestEVT = TLI.getValueType(DestTy, true); + SrcEVT = TLI.getValueType(DL, SrcTy, true); + DestEVT = TLI.getValueType(DL, DestTy, true); if (!SrcEVT.isSimple()) return false; if (!DestEVT.isSimple()) return false; @@ -2763,7 +2762,7 @@ bool ARMFastISel::SelectShift(const Instruction *I, return false; // Only handle i32 now. - EVT DestVT = TLI.getValueType(I->getType(), true); + EVT DestVT = TLI.getValueType(DL, I->getType(), true); if (DestVT != MVT::i32) return false; @@ -3026,7 +3025,7 @@ bool ARMFastISel::fastLowerArguments() { if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy()) return false; - EVT ArgVT = TLI.getValueType(ArgTy); + EVT ArgVT = TLI.getValueType(DL, ArgTy); if (!ArgVT.isSimple()) return false; switch (ArgVT.getSimpleVT().SimpleTy) { case MVT::i8: diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index a52e49780e27..6744000afe2b 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -800,7 +800,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, // This is bad, if an interrupt is taken after the mov, sp is in an // inconsistent state. // Use the first callee-saved register as a scratch register. - assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) && + assert(!MFI->getPristineRegs(MF).test(ARM::R4) && "No scratch register to restore SP from FP!"); emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, ARMCC::AL, 0, TII); @@ -1470,7 +1470,8 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF, // callee-saved vector registers after realigning the stack. The vst1 and vld1 // instructions take alignment hints that can improve performance. // -static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) { +static void +checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) { MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0); if (!SpillAlignedNEONRegs) return; @@ -1497,10 +1498,9 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) { // callee-saved registers in order, but it can happen that there are holes in // the range. Registers above the hole will be spilled to the standard DPRCS // area. - MachineRegisterInfo &MRI = MF.getRegInfo(); unsigned NumSpills = 0; for (; NumSpills < 8; ++NumSpills) - if (!MRI.isPhysRegUsed(ARM::D8 + NumSpills)) + if (!SavedRegs.test(ARM::D8 + NumSpills)) break; // Don't do this for just one d-register. It's not worth it. @@ -1511,12 +1511,13 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) { MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills); // A scratch register is required for the vst1 / vld1 instructions. - MF.getRegInfo().setPhysRegUsed(ARM::R4); + SavedRegs.set(ARM::R4); } -void -ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { +void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, + BitVector &SavedRegs, + RegScavenger *RS) const { + TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); // This tells PEI to spill the FP as if it is any other callee-save register // to take advantage the eliminateFrameIndex machinery. This also ensures it // is spilled in the order specified by getCalleeSavedRegs() to make it easier @@ -1543,12 +1544,12 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // FIXME: It will be better just to find spare register here. if (AFI->isThumb2Function() && (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF))) - MRI.setPhysRegUsed(ARM::R4); + SavedRegs.set(ARM::R4); if (AFI->isThumb1OnlyFunction()) { // Spill LR if Thumb1 function uses variable length argument lists. if (AFI->getArgRegsSaveSize() > 0) - MRI.setPhysRegUsed(ARM::LR); + SavedRegs.set(ARM::LR); // Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know // for sure what the stack size will be, but for this, an estimate is good @@ -1558,23 +1559,23 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // FIXME: It will be better just to find spare register here. unsigned StackSize = MFI->estimateStackSize(MF); if (MFI->hasVarSizedObjects() || StackSize > 508) - MRI.setPhysRegUsed(ARM::R4); + SavedRegs.set(ARM::R4); } // See if we can spill vector registers to aligned stack. - checkNumAlignedDPRCS2Regs(MF); + checkNumAlignedDPRCS2Regs(MF, SavedRegs); // Spill the BasePtr if it's used. if (RegInfo->hasBasePointer(MF)) - MRI.setPhysRegUsed(RegInfo->getBaseRegister()); + SavedRegs.set(RegInfo->getBaseRegister()); // Don't spill FP if the frame can be eliminated. This is determined - // by scanning the callee-save registers to see if any is used. + // by scanning the callee-save registers to see if any is modified. const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; bool Spilled = false; - if (MRI.isPhysRegUsed(Reg)) { + if (SavedRegs.test(Reg)) { Spilled = true; CanEliminateFrame = false; } @@ -1668,7 +1669,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled. // Spill LR as well so we can fold BX_RET to the registers restore (LDM). if (!LRSpilled && CS1Spilled) { - MRI.setPhysRegUsed(ARM::LR); + SavedRegs.set(ARM::LR); NumGPRSpills++; SmallVectorImpl<unsigned>::iterator LRPos; LRPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(), @@ -1681,7 +1682,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, } if (hasFP(MF)) { - MRI.setPhysRegUsed(FramePtr); + SavedRegs.set(FramePtr); auto FPPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(), FramePtr); if (FPPos != UnspilledCS1GPRs.end()) @@ -1700,7 +1701,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Don't spill high register if the function is thumb if (!AFI->isThumbFunction() || isARMLowRegister(Reg) || Reg == ARM::LR) { - MRI.setPhysRegUsed(Reg); + SavedRegs.set(Reg); if (!MRI.isReserved(Reg)) ExtraCSSpill = true; break; @@ -1708,7 +1709,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, } } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) { unsigned Reg = UnspilledCS2GPRs.front(); - MRI.setPhysRegUsed(Reg); + SavedRegs.set(Reg); if (!MRI.isReserved(Reg)) ExtraCSSpill = true; } @@ -1747,7 +1748,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, } if (Extras.size() && NumExtras == 0) { for (unsigned i = 0, e = Extras.size(); i != e; ++i) { - MRI.setPhysRegUsed(Extras[i]); + SavedRegs.set(Extras[i]); } } else if (!AFI->isThumb1OnlyFunction()) { // note: Thumb1 functions spill to R12, not the stack. Reserve a slot @@ -1761,7 +1762,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, } if (ForceLRSpill) { - MRI.setPhysRegUsed(ARM::LR); + SavedRegs.set(ARM::LR); AFI->setLRIsSpilledForFarJump(true); } } diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h index d763d17a506f..6fdc5eff5e47 100644 --- a/lib/Target/ARM/ARMFrameLowering.h +++ b/lib/Target/ARM/ARMFrameLowering.h @@ -54,8 +54,8 @@ public: unsigned &FrameReg, int SPAdj) const; int getFrameIndexOffset(const MachineFunction &MF, int FI) const override; - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const override; + void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, + RegScavenger *RS) const override; void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &MBB) const override; diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 50afb192b331..b110628a0a86 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -533,7 +533,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, if (N.getOpcode() == ISD::FrameIndex) { // Match frame index. int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex( + FI, TLI->getPointerTy(CurDAG->getDataLayout())); OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); return true; } @@ -556,7 +557,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex( + FI, TLI->getPointerTy(CurDAG->getDataLayout())); } OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); return true; @@ -702,7 +704,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, Base = N; if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex( + FI, TLI->getPointerTy(CurDAG->getDataLayout())); } else if (N.getOpcode() == ARMISD::Wrapper && N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { Base = N.getOperand(0); @@ -722,7 +725,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex( + FI, TLI->getPointerTy(CurDAG->getDataLayout())); } Offset = CurDAG->getRegister(0, MVT::i32); @@ -900,7 +904,8 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, Base = N; if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex( + FI, TLI->getPointerTy(CurDAG->getDataLayout())); } Offset = CurDAG->getRegister(0, MVT::i32); Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), @@ -915,7 +920,8 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex( + FI, TLI->getPointerTy(CurDAG->getDataLayout())); } Offset = CurDAG->getRegister(0, MVT::i32); @@ -964,7 +970,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, Base = N; if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex( + FI, TLI->getPointerTy(CurDAG->getDataLayout())); } else if (N.getOpcode() == ARMISD::Wrapper && N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { Base = N.getOperand(0); @@ -981,7 +988,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex( + FI, TLI->getPointerTy(CurDAG->getDataLayout())); } ARM_AM::AddrOpc AddSub = ARM_AM::add; @@ -1215,7 +1223,8 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, MachineFrameInfo *MFI = MF->getFrameInfo(); if (MFI->getObjectAlignment(FI) < 4) MFI->setObjectAlignment(FI, 4); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex( + FI, TLI->getPointerTy(CurDAG->getDataLayout())); OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); return true; } @@ -1237,7 +1246,8 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, MachineFrameInfo *MFI = MF->getFrameInfo(); if (MFI->getObjectAlignment(FI) < 4) MFI->setObjectAlignment(FI, 4); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex( + FI, TLI->getPointerTy(CurDAG->getDataLayout())); } OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); return true; @@ -1285,7 +1295,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, if (N.getOpcode() == ISD::FrameIndex) { // Match frame index. int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex( + FI, TLI->getPointerTy(CurDAG->getDataLayout())); OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); return true; } @@ -1314,7 +1325,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex( + FI, TLI->getPointerTy(CurDAG->getDataLayout())); } OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); return true; @@ -1343,7 +1355,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex( + FI, TLI->getPointerTy(CurDAG->getDataLayout())); } OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); return true; @@ -1438,7 +1451,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex( + FI, TLI->getPointerTy(CurDAG->getDataLayout())); } OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32); @@ -2510,7 +2524,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { if (UseCP) { SDValue CPIdx = CurDAG->getTargetConstantPool( ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val), - TLI->getPointerTy()); + TLI->getPointerTy(CurDAG->getDataLayout())); SDNode *ResNode; if (Subtarget->isThumb()) { @@ -2540,7 +2554,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ISD::FrameIndex: { // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. int FI = cast<FrameIndexSDNode>(N)->getIndex(); - SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + SDValue TFI = CurDAG->getTargetFrameIndex( + FI, TLI->getPointerTy(CurDAG->getDataLayout())); if (Subtarget->isThumb1Only()) { // Set the alignment of the frame object to 4, to avoid having to generate // more than one ADD diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 4b2105b7442f..e335784f6d87 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -60,11 +60,6 @@ STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt"); STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments"); -cl::opt<bool> -EnableARMLongCalls("arm-long-calls", cl::Hidden, - cl::desc("Generate calls via indirect call instructions"), - cl::init(false)); - static cl::opt<bool> ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), @@ -548,6 +543,27 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::CTPOP, MVT::v4i16, Custom); setOperationAction(ISD::CTPOP, MVT::v8i16, Custom); + // NEON does not have single instruction CTTZ for vectors. + setOperationAction(ISD::CTTZ, MVT::v8i8, Custom); + setOperationAction(ISD::CTTZ, MVT::v4i16, Custom); + setOperationAction(ISD::CTTZ, MVT::v2i32, Custom); + setOperationAction(ISD::CTTZ, MVT::v1i64, Custom); + + setOperationAction(ISD::CTTZ, MVT::v16i8, Custom); + setOperationAction(ISD::CTTZ, MVT::v8i16, Custom); + setOperationAction(ISD::CTTZ, MVT::v4i32, Custom); + setOperationAction(ISD::CTTZ, MVT::v2i64, Custom); + + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i8, Custom); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i16, Custom); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i32, Custom); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v1i64, Custom); + + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom); + // NEON only has FMA instructions as of VFP4. if (!Subtarget->hasVFP4()) { setOperationAction(ISD::FMA, MVT::v2f32, Expand); @@ -1149,8 +1165,10 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { return nullptr; } -EVT ARMTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { - if (!VT.isVector()) return getPointerTy(); +EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, + EVT VT) const { + if (!VT.isVector()) + return getPointerTy(DL); return VT.changeVectorElementTypeToInteger(); } @@ -1429,7 +1447,8 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, ISD::ArgFlagsTy Flags) const { unsigned LocMemOffset = VA.getLocMemOffset(); SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); - PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); + PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), + StackPtr, PtrOff); return DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo::getStack(LocMemOffset), false, false, 0); @@ -1453,7 +1472,8 @@ void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG, else { assert(NextVA.isMemLoc()); if (!StackPtr.getNode()) - StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); + StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, + getPointerTy(DAG.getDataLayout())); MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id), dl, DAG, NextVA, @@ -1526,7 +1546,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), dl); - SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); + SDValue StackPtr = + DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout())); RegsToPassVector RegsToPass; SmallVector<SDValue, 8> MemOpChains; @@ -1607,7 +1628,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, unsigned RegBegin, RegEnd; CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = + DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); unsigned int i, j; for (i = 0, j = RegBegin; j < RegEnd; i++, j++) { SDValue Const = DAG.getConstant(4*i, dl, MVT::i32); @@ -1628,12 +1650,12 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } if (Flags.getByValSize() > 4*offset) { + auto PtrVT = getPointerTy(DAG.getDataLayout()); unsigned LocMemOffset = VA.getLocMemOffset(); SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); - SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, - StkPtrOff); + SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff); SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl); - SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset); + SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset); SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl, MVT::i32); SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl, @@ -1693,8 +1715,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool isARMFunc = false; bool isLocalARMFunc = false; ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + auto PtrVt = getPointerTy(DAG.getDataLayout()); - if (EnableARMLongCalls) { + if (Subtarget->genLongCalls()) { assert((Subtarget->isTargetWindows() || getTargetMachine().getRelocationModel() == Reloc::Static) && "long-calls with non-static relocation model!"); @@ -1709,12 +1732,11 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0); // Get the address of the callee into a register - SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); + SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - Callee = DAG.getLoad(getPointerTy(), dl, - DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(), - false, false, false, 0); + Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr, + MachinePointerInfo::getConstantPool(), false, false, + false, 0); } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) { const char *Sym = S->getSymbol(); @@ -1724,29 +1746,28 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, ARMPCLabelIndex, 0); // Get the address of the callee into a register - SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); + SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - Callee = DAG.getLoad(getPointerTy(), dl, - DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(), - false, false, false, 0); + Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr, + MachinePointerInfo::getConstantPool(), false, false, + false, 0); } } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { const GlobalValue *GV = G->getGlobal(); isDirect = true; - bool isExt = GV->isDeclaration() || GV->isWeakForLinker(); - bool isStub = (isExt && Subtarget->isTargetMachO()) && + bool isDef = GV->isStrongDefinitionForLinker(); + bool isStub = (!isDef && Subtarget->isTargetMachO()) && getTargetMachine().getRelocationModel() != Reloc::Static; isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass()); // ARM call to a local ARM function is predicable. - isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking); + isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking); // tBX takes a register source operand. if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?"); - Callee = DAG.getNode(ARMISD::WrapperPIC, dl, getPointerTy(), - DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), - 0, ARMII::MO_NONLAZY)); - Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee, + Callee = DAG.getNode( + ARMISD::WrapperPIC, dl, PtrVt, + DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY)); + Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), Callee, MachinePointerInfo::getGOT(), false, false, true, 0); } else if (Subtarget->isTargetCOFF()) { assert(Subtarget->isTargetWindows() && @@ -1754,20 +1775,20 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, unsigned TargetFlags = GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG; - Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), /*Offset=*/0, - TargetFlags); + Callee = + DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0, TargetFlags); if (GV->hasDLLImportStorageClass()) - Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - DAG.getNode(ARMISD::Wrapper, dl, getPointerTy(), - Callee), MachinePointerInfo::getGOT(), - false, false, false, 0); + Callee = + DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), + DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee), + MachinePointerInfo::getGOT(), false, false, false, 0); } else { // On ELF targets for PIC code, direct calls should go through the PLT unsigned OpFlags = 0; if (Subtarget->isTargetELF() && getTargetMachine().getRelocationModel() == Reloc::PIC_) OpFlags = ARMII::MO_PLT; - Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags); + Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, OpFlags); } } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { isDirect = true; @@ -1781,22 +1802,20 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, ARMPCLabelIndex, 4); - SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); + SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - Callee = DAG.getLoad(getPointerTy(), dl, - DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(), - false, false, false, 0); + Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr, + MachinePointerInfo::getConstantPool(), false, false, + false, 0); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); - Callee = DAG.getNode(ARMISD::PIC_ADD, dl, - getPointerTy(), Callee, PICLabel); + Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel); } else { unsigned OpFlags = 0; // On ELF targets for PIC code, direct calls should go through the PLT if (Subtarget->isTargetELF() && getTargetMachine().getRelocationModel() == Reloc::PIC_) OpFlags = ARMII::MO_PLT; - Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags); + Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, OpFlags); } } @@ -2433,7 +2452,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned ARMPCLabelIndex = 0; SDLoc DL(Op); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); SDValue CPAddr; @@ -2462,7 +2481,7 @@ SDValue ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG) const { SDLoc dl(GA); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); @@ -2508,7 +2527,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, SDLoc dl(GA); SDValue Offset; SDValue Chain = DAG.getEntryNode(); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); // Get the Thread Pointer SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); @@ -2574,7 +2593,7 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const { - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc dl(Op); const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { @@ -2617,7 +2636,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const { - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc dl(Op); const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); @@ -2648,7 +2667,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op, const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); const ARMII::TOF TargetFlags = (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Result; SDLoc DL(Op); @@ -2672,7 +2691,7 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc dl(Op); unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; ARMConstantPoolValue *CPV = @@ -2716,14 +2735,14 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(1)); } case Intrinsic::arm_thread_pointer: { - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); } case Intrinsic::eh_sjlj_lsda: { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); SDValue CPAddr; unsigned PCAdj = (RelocM != Reloc::PIC_) @@ -2820,7 +2839,7 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. SDLoc dl(Op); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), @@ -2850,7 +2869,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true); // Create load node to retrieve arguments from the stack. - SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, MachinePointerInfo::getFixedStack(FI), false, false, false, 0); @@ -2904,8 +2923,9 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, if (REnd != RBegin) ArgOffset = -4 * (ARM::R4 - RBegin); + auto PtrVT = getPointerTy(DAG.getDataLayout()); int FrameIndex = MFI->CreateFixedObject(ArgSize, ArgOffset, false); - SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy()); + SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT); SmallVector<SDValue, 4> MemOps; const TargetRegisterClass *RC = @@ -2918,8 +2938,7 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo(OrigArg, 4 * i), false, false, 0); MemOps.push_back(Store); - FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, - DAG.getConstant(4, dl, getPointerTy())); + FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT)); } if (!MemOps.empty()) @@ -3013,6 +3032,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin); AFI->setArgRegsSaveSize(TotalArgRegsSaveSize); + auto PtrVT = getPointerTy(DAG.getDataLayout()); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; @@ -3035,7 +3055,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, SDValue ArgValue2; if (VA.isMemLoc()) { int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true); - SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, MachinePointerInfo::getFixedStack(FI), false, false, false, 0); @@ -3122,7 +3142,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, CurOrigArg, CurByValIndex, VA.getLocMemOffset(), Flags.getByValSize()); - InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy())); + InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT)); CCInfo.nextInRegsParam(); } else { unsigned FIOffset = VA.getLocMemOffset(); @@ -3130,7 +3150,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, FIOffset, true); // Create load nodes to retrieve arguments from the stack. - SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo::getFixedStack(FI), false, false, false, 0)); @@ -3855,7 +3875,7 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { SDValue Index = Op.getOperand(2); SDLoc dl(Op); - EVT PTy = getPointerTy(); + EVT PTy = getPointerTy(DAG.getDataLayout()); JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI); @@ -4102,8 +4122,8 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. -unsigned ARMTargetLowering::getRegisterByName(const char* RegName, - EVT VT) const { +unsigned ARMTargetLowering::getRegisterByName(const char* RegName, EVT VT, + SelectionDAG &DAG) const { unsigned Reg = StringSwitch<unsigned>(RegName) .Case("sp", ARM::SP) .Default(0); @@ -4163,7 +4183,7 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { // Turn f64->i64 into VMOVRRD. if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) { SDValue Cvt; - if (TLI.isBigEndian() && SrcVT.isVector() && + if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() && SrcVT.getVectorNumElements() > 1) Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), @@ -4283,8 +4303,82 @@ SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { - EVT VT = N->getValueType(0); SDLoc dl(N); + EVT VT = N->getValueType(0); + if (VT.isVector()) { + assert(ST->hasNEON()); + + // Compute the least significant set bit: LSB = X & -X + SDValue X = N->getOperand(0); + SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X); + SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX); + + EVT ElemTy = VT.getVectorElementType(); + + if (ElemTy == MVT::i8) { + // Compute with: cttz(x) = ctpop(lsb - 1) + SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT, + DAG.getTargetConstant(1, dl, ElemTy)); + SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One); + return DAG.getNode(ISD::CTPOP, dl, VT, Bits); + } + + if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) && + (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) { + // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0 + unsigned NumBits = ElemTy.getSizeInBits(); + SDValue WidthMinus1 = + DAG.getNode(ARMISD::VMOVIMM, dl, VT, + DAG.getTargetConstant(NumBits - 1, dl, ElemTy)); + SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB); + return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ); + } + + // Compute with: cttz(x) = ctpop(lsb - 1) + + // Since we can only compute the number of bits in a byte with vcnt.8, we + // have to gather the result with pairwise addition (vpaddl) for i16, i32, + // and i64. + + // Compute LSB - 1. + SDValue Bits; + if (ElemTy == MVT::i64) { + // Load constant 0xffff'ffff'ffff'ffff to register. + SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT, + DAG.getTargetConstant(0x1eff, dl, MVT::i32)); + Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF); + } else { + SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT, + DAG.getTargetConstant(1, dl, ElemTy)); + Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One); + } + + // Count #bits with vcnt.8. + EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8; + SDValue BitsVT8 = DAG.getNode(ISD::BITCAST, dl, VT8Bit, Bits); + SDValue Cnt8 = DAG.getNode(ISD::CTPOP, dl, VT8Bit, BitsVT8); + + // Gather the #bits with vpaddl (pairwise add.) + EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16; + SDValue Cnt16 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT16Bit, + DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32), + Cnt8); + if (ElemTy == MVT::i16) + return Cnt16; + + EVT VT32Bit = VT.is64BitVector() ? MVT::v2i32 : MVT::v4i32; + SDValue Cnt32 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT32Bit, + DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32), + Cnt16); + if (ElemTy == MVT::i32) + return Cnt32; + + assert(ElemTy == MVT::i64); + SDValue Cnt64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32), + Cnt32); + return Cnt64; + } if (!ST->hasV6T2Ops()) return SDValue(); @@ -4730,7 +4824,7 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, ImmMask <<= 1; } - if (DAG.getTargetLoweringInfo().isBigEndian()) + if (DAG.getDataLayout().isBigEndian()) // swap higher and lower 32 bit word Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4); @@ -5868,7 +5962,7 @@ static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, if (BVN->getValueType(0) != MVT::v4i32 || BVN->getOpcode() != ISD::BUILD_VECTOR) return false; - unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0; + unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0; unsigned HiElt = 1 - LoElt; ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt)); ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt)); @@ -6013,7 +6107,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { SDNode *BVN = N->getOperand(0).getNode(); assert(BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"); - unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0; + unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0; return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), MVT::v2i32, BVN->getOperand(LowElt), BVN->getOperand(LowElt+2)); } @@ -6342,18 +6436,19 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { SDValue Arg = Op.getOperand(0); EVT ArgVT = Arg.getValueType(); Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + auto PtrVT = getPointerTy(DAG.getDataLayout()); MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Pair of floats / doubles used to pass the result. StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr); // Create stack object for sret. - const uint64_t ByteSize = TLI.getDataLayout()->getTypeAllocSize(RetTy); - const unsigned StackAlign = TLI.getDataLayout()->getPrefTypeAlignment(RetTy); + auto &DL = DAG.getDataLayout(); + const uint64_t ByteSize = DL.getTypeAllocSize(RetTy); + const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy); int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false); - SDValue SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy()); + SDValue SRet = DAG.getFrameIndex(FrameIdx, getPointerTy(DL)); ArgListTy Args; ArgListEntry Entry; @@ -6373,7 +6468,7 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { const char *LibcallName = (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret"; - SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy()); + SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL)); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) @@ -6387,7 +6482,7 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { MachinePointerInfo(), false, false, false, 0); // Address of cos field. - SDValue Add = DAG.getNode(ISD::ADD, dl, getPointerTy(), SRet, + SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet, DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl)); SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo(), false, false, false, 0); @@ -6487,7 +6582,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); case ISD::SRL_PARTS: case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); - case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget); + case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget); case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget); case ISD::SETCC: return LowerVSETCC(Op, DAG); case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget); @@ -6845,9 +6941,9 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI, const Constant *C = ConstantInt::get(Int32Ty, NumLPads); // MachineConstantPool wants an explicit alignment. - unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty); + unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty); if (Align == 0) - Align = getDataLayout()->getTypeAllocSize(C->getType()); + Align = MF->getDataLayout().getTypeAllocSize(C->getType()); unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); unsigned VReg1 = MRI->createVirtualRegister(TRC); @@ -6935,9 +7031,9 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI, const Constant *C = ConstantInt::get(Int32Ty, NumLPads); // MachineConstantPool wants an explicit alignment. - unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty); + unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty); if (Align == 0) - Align = getDataLayout()->getTypeAllocSize(C->getType()); + Align = MF->getDataLayout().getTypeAllocSize(C->getType()); unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); unsigned VReg1 = MRI->createVirtualRegister(TRC); @@ -7313,9 +7409,9 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, const Constant *C = ConstantInt::get(Int32Ty, LoopSize); // MachineConstantPool wants an explicit alignment. - unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty); + unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty); if (Align == 0) - Align = getDataLayout()->getTypeAllocSize(C->getType()); + Align = MF->getDataLayout().getTypeAllocSize(C->getType()); unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); if (IsThumb1) @@ -8001,7 +8097,7 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, // Build operand list. SmallVector<SDValue, 8> Ops; Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, dl, - TLI.getPointerTy())); + TLI.getPointerTy(DAG.getDataLayout()))); // Input is the vector. Ops.push_back(Vec); @@ -8681,7 +8777,7 @@ static SDValue PerformVMOVRRDCombine(SDNode *N, std::min(4U, LD->getAlignment() / 2)); DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1)); - if (DCI.DAG.getTargetLoweringInfo().isBigEndian()) + if (DCI.DAG.getDataLayout().isBigEndian()) std::swap (NewLD1, NewLD2); SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2); return Result; @@ -9312,7 +9408,9 @@ static SDValue PerformSTORECombine(SDNode *N, SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal); SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1); for (unsigned i = 0; i < NumElems; ++i) - ShuffleVec[i] = TLI.isBigEndian() ? (i+1) * SizeRatio - 1 : i * SizeRatio; + ShuffleVec[i] = DAG.getDataLayout().isBigEndian() + ? (i + 1) * SizeRatio - 1 + : i * SizeRatio; // Can't shuffle using an illegal type. if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); @@ -9339,8 +9437,8 @@ static SDValue PerformSTORECombine(SDNode *N, assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff); SmallVector<SDValue, 8> Chains; - SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8, DL, - TLI.getPointerTy()); + SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL, + TLI.getPointerTy(DAG.getDataLayout())); SDValue BasePtr = St->getBasePtr(); // Perform one or more big stores into memory. @@ -9367,7 +9465,7 @@ static SDValue PerformSTORECombine(SDNode *N, if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && StVal.getNode()->hasOneUse()) { SelectionDAG &DAG = DCI.DAG; - bool isBigEndian = DAG.getTargetLoweringInfo().isBigEndian(); + bool isBigEndian = DAG.getDataLayout().isBigEndian(); SDLoc DL(St); SDValue BasePtr = St->getBasePtr(); SDValue NewST1 = DAG.getStore(St->getChain(), DL, @@ -10078,7 +10176,7 @@ bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, // For any little-endian targets with neon, we can support unaligned ld/st // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8. // A big-endian target may also explicitly support unaligned accesses - if (Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian())) { + if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) { if (Fast) *Fast = true; return true; @@ -10317,10 +10415,10 @@ bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM, /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. -bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty, +bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL, + const AddrMode &AM, Type *Ty, unsigned AS) const { - EVT VT = getValueType(Ty, true); + EVT VT = getValueType(DL, Ty, true); if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) return false; @@ -10664,7 +10762,7 @@ bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const { /// getConstraintType - Given a constraint letter, return the type of /// constraint it is for this target. ARMTargetLowering::ConstraintType -ARMTargetLowering::getConstraintType(const std::string &Constraint) const { +ARMTargetLowering::getConstraintType(StringRef Constraint) const { if (Constraint.size() == 1) { switch (Constraint[0]) { default: break; @@ -10723,10 +10821,8 @@ ARMTargetLowering::getSingleConstraintMatchWeight( } typedef std::pair<unsigned, const TargetRegisterClass*> RCPair; -RCPair -ARMTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, - const std::string &Constraint, - MVT VT) const { +RCPair ARMTargetLowering::getRegForInlineAsmConstraint( + const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { if (Constraint.size() == 1) { // GCC ARM Constraint Letters switch (Constraint[0]) { @@ -10974,7 +11070,7 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { } SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), - getPointerTy()); + getPointerTy(DAG.getDataLayout())); Type *RetTy = (Type*)StructType::get(Ty, Ty, nullptr); @@ -11083,7 +11179,8 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::arm_neon_vld4lane: { Info.opc = ISD::INTRINSIC_W_CHAIN; // Conservatively set memVT to the entire set of vectors loaded. - uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8; + auto &DL = I.getCalledFunction()->getParent()->getDataLayout(); + uint64_t NumElts = DL.getTypeAllocSize(I.getType()) / 8; Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; @@ -11103,12 +11200,13 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::arm_neon_vst4lane: { Info.opc = ISD::INTRINSIC_VOID; // Conservatively set memVT to the entire set of vectors stored. + auto &DL = I.getCalledFunction()->getParent()->getDataLayout(); unsigned NumElts = 0; for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) { Type *ArgTy = I.getArgOperand(ArgI)->getType(); if (!ArgTy->isVectorTy()) break; - NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8; + NumElts += DL.getTypeAllocSize(ArgTy) / 8; } Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(0); @@ -11122,12 +11220,13 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, } case Intrinsic::arm_ldaex: case Intrinsic::arm_ldrex: { + auto &DL = I.getCalledFunction()->getParent()->getDataLayout(); PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::getVT(PtrTy->getElementType()); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType()); + Info.align = DL.getABITypeAlignment(PtrTy->getElementType()); Info.vol = true; Info.readMem = true; Info.writeMem = false; @@ -11135,12 +11234,13 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, } case Intrinsic::arm_stlex: case Intrinsic::arm_strex: { + auto &DL = I.getCalledFunction()->getParent()->getDataLayout(); PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType()); Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::getVT(PtrTy->getElementType()); Info.ptrVal = I.getArgOperand(1); Info.offset = 0; - Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType()); + Info.align = DL.getABITypeAlignment(PtrTy->getElementType()); Info.vol = true; Info.readMem = false; Info.writeMem = true; @@ -11427,9 +11527,9 @@ bool ARMTargetLowering::lowerInterleavedLoad( VectorType *VecTy = Shuffles[0]->getType(); Type *EltTy = VecTy->getVectorElementType(); - const DataLayout *DL = getDataLayout(); - unsigned VecSize = DL->getTypeAllocSizeInBits(VecTy); - bool EltIs64Bits = DL->getTypeAllocSizeInBits(EltTy) == 64; + const DataLayout &DL = LI->getModule()->getDataLayout(); + unsigned VecSize = DL.getTypeAllocSizeInBits(VecTy); + bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64; // Skip illegal vector types and vector types of i64/f64 element (vldN doesn't // support i64/f64 element). @@ -11439,8 +11539,8 @@ bool ARMTargetLowering::lowerInterleavedLoad( // A pointer vector can not be the return type of the ldN intrinsics. Need to // load integer vectors first and then convert to pointer vectors. if (EltTy->isPointerTy()) - VecTy = VectorType::get(DL->getIntPtrType(EltTy), - VecTy->getVectorNumElements()); + VecTy = + VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements()); static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2, Intrinsic::arm_neon_vld3, @@ -11517,9 +11617,9 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI, Type *EltTy = VecTy->getVectorElementType(); VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts); - const DataLayout *DL = getDataLayout(); - unsigned SubVecSize = DL->getTypeAllocSizeInBits(SubVecTy); - bool EltIs64Bits = DL->getTypeAllocSizeInBits(EltTy) == 64; + const DataLayout &DL = SI->getModule()->getDataLayout(); + unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy); + bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64; // Skip illegal sub vector types and vector types of i64/f64 element (vstN // doesn't support i64/f64 element). @@ -11533,7 +11633,7 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI, // StN intrinsics don't support pointer vectors as arguments. Convert pointer // vectors to integer vectors. if (EltTy->isPointerTy()) { - Type *IntTy = DL->getIntPtrType(EltTy); + Type *IntTy = DL.getIntPtrType(EltTy); // Convert to the corresponding integer vector. Type *IntVecTy = diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 74396392f8e3..efc9020c193a 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -249,7 +249,8 @@ namespace llvm { } /// getSetCCResultType - Return the value type to use for ISD::SETCC. - EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override; + EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, + EVT VT) const override; MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, @@ -286,8 +287,8 @@ namespace llvm { /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty, - unsigned AS) const override; + bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, + Type *Ty, unsigned AS) const override; bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const; /// isLegalICmpImmediate - Return true if the specified immediate is legal @@ -324,8 +325,7 @@ namespace llvm { bool ExpandInlineAsm(CallInst *CI) const override; - ConstraintType - getConstraintType(const std::string &Constraint) const override; + ConstraintType getConstraintType(StringRef Constraint) const override; /// Examine constraint string and operand type and determine a weight value. /// The operand object must already have been set up with the operand type. @@ -334,8 +334,7 @@ namespace llvm { std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, - const std::string &Constraint, - MVT VT) const override; + StringRef Constraint, MVT VT) const override; /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. If hasMemory is @@ -345,8 +344,8 @@ namespace llvm { std::vector<SDValue> &Ops, SelectionDAG &DAG) const override; - unsigned getInlineAsmMemConstraint( - const std::string &ConstraintCode) const override { + unsigned + getInlineAsmMemConstraint(StringRef ConstraintCode) const override { if (ConstraintCode == "Q") return InlineAsm::Constraint_Q; else if (ConstraintCode.size() == 2) { @@ -533,7 +532,8 @@ namespace llvm { SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; - unsigned getRegisterByName(const char* RegName, EVT VT) const override; + unsigned getRegisterByName(const char* RegName, EVT VT, + SelectionDAG &DAG) const override; /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index b8cac135baf6..61c45af26fe1 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -306,8 +306,8 @@ def HasSlowVDUP32 : Predicate<"Subtarget->isSwift()">; def UseVMOVSR : Predicate<"Subtarget->isCortexA9() || !Subtarget->useNEONForSinglePrecisionFP()">; def DontUseVMOVSR : Predicate<"!Subtarget->isCortexA9() && Subtarget->useNEONForSinglePrecisionFP()">; -def IsLE : Predicate<"getTargetLowering()->isLittleEndian()">; -def IsBE : Predicate<"getTargetLowering()->isBigEndian()">; +def IsLE : Predicate<"MF->getDataLayout().isLittleEndian()">; +def IsBE : Predicate<"MF->getDataLayout().isBigEndian()">; //===----------------------------------------------------------------------===// // ARM Flag Definitions. diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 245c9e869bf6..37352810c99f 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -31,11 +31,13 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -65,12 +67,18 @@ namespace { static char ID; ARMLoadStoreOpt() : MachineFunctionPass(ID) {} + const MachineFunction *MF; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; + const MachineRegisterInfo *MRI; const ARMSubtarget *STI; const TargetLowering *TL; ARMFunctionInfo *AFI; - RegScavenger *RS; + LivePhysRegs LiveRegs; + RegisterClassInfo RegClassInfo; + MachineBasicBlock::const_iterator LiveRegPos; + bool LiveRegsValid; + bool RegClassInfoValid; bool isThumb1, isThumb2; bool runOnMachineFunction(MachineFunction &Fn) override; @@ -80,64 +88,60 @@ namespace { } private: + /// A set of load/store MachineInstrs with same base register sorted by + /// offset. struct MemOpQueueEntry { - int Offset; - unsigned Reg; - bool isKill; - unsigned Position; - MachineBasicBlock::iterator MBBI; - bool Merged; - MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, - MachineBasicBlock::iterator i) - : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} + MachineInstr *MI; + int Offset; ///< Load/Store offset. + unsigned Position; ///< Position as counted from end of basic block. + MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position) + : MI(MI), Offset(Offset), Position(Position) {} }; typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; - typedef MemOpQueue::iterator MemOpQueueIter; - void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, - const MemOpQueue &MemOps, unsigned DefReg, - unsigned RangeBegin, unsigned RangeEnd); + /// A set of MachineInstrs that fulfill (nearly all) conditions to get + /// merged into a LDM/STM. + struct MergeCandidate { + /// List of instructions ordered by load/store offset. + SmallVector<MachineInstr*, 4> Instrs; + /// Index in Instrs of the instruction being latest in the schedule. + unsigned LatestMIIdx; + /// Index in Instrs of the instruction being earliest in the schedule. + unsigned EarliestMIIdx; + /// Index into the basic block where the merged instruction will be + /// inserted. (See MemOpQueueEntry.Position) + unsigned InsertPos; + /// Whether the instructions can be merged into a ldm/stm instruction. + bool CanMergeToLSMulti; + /// Whether the instructions can be merged into a ldrd/strd instruction. + bool CanMergeToLSDouble; + }; + SpecificBumpPtrAllocator<MergeCandidate> Allocator; + SmallVector<const MergeCandidate*,4> Candidates; + SmallVector<MachineInstr*,4> MergeBaseCandidates; + + void moveLiveRegsBefore(const MachineBasicBlock &MBB, + MachineBasicBlock::const_iterator Before); + unsigned findFreeReg(const TargetRegisterClass &RegClass); void UpdateBaseRegUses(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - DebugLoc dl, unsigned Base, unsigned WordOffset, + DebugLoc DL, unsigned Base, unsigned WordOffset, ARMCC::CondCodes Pred, unsigned PredReg); - bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - int Offset, unsigned Base, bool BaseKill, unsigned Opcode, - ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, - DebugLoc dl, - ArrayRef<std::pair<unsigned, bool> > Regs, - ArrayRef<unsigned> ImpDefs); - void MergeOpsUpdate(MachineBasicBlock &MBB, - MemOpQueue &MemOps, - unsigned memOpsBegin, - unsigned memOpsEnd, - unsigned insertAfter, - int Offset, - unsigned Base, - bool BaseKill, - unsigned Opcode, - ARMCC::CondCodes Pred, - unsigned PredReg, - unsigned Scratch, - DebugLoc dl, - SmallVectorImpl<MachineBasicBlock::iterator> &Merges); - void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, - unsigned Opcode, unsigned Size, - ARMCC::CondCodes Pred, unsigned PredReg, - unsigned Scratch, MemOpQueue &MemOps, - SmallVectorImpl<MachineBasicBlock::iterator> &Merges); - void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); + MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, + bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, + DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs); + MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, + bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, + DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const; + void FormCandidates(const MemOpQueue &MemOps); + MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand); bool FixInvalidRegPairOp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI); - bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - const TargetInstrInfo *TII, - bool &Advance, - MachineBasicBlock::iterator &I); - bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - bool &Advance, - MachineBasicBlock::iterator &I); + bool MergeBaseUpdateLoadStore(MachineInstr *MI); + bool MergeBaseUpdateLSMultiple(MachineInstr *MI); + bool MergeBaseUpdateLSDouble(MachineInstr &MI) const; bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); bool MergeReturnIntoLDM(MachineBasicBlock &MBB); }; @@ -185,6 +189,14 @@ static int getMemoryOpOffset(const MachineInstr *MI) { return Offset; } +static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) { + return MI.getOperand(1); +} + +static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) { + return MI.getOperand(0); +} + static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) { switch (Opcode) { default: llvm_unreachable("Unhandled opcode!"); @@ -348,6 +360,10 @@ static bool isi32Store(unsigned Opc) { return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc); } +static bool isLoadSingle(unsigned Opc) { + return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; +} + static unsigned getImmScale(unsigned Opc) { switch (Opc) { default: llvm_unreachable("Unhandled opcode!"); @@ -365,12 +381,55 @@ static unsigned getImmScale(unsigned Opc) { } } +static unsigned getLSMultipleTransferSize(const MachineInstr *MI) { + switch (MI->getOpcode()) { + default: return 0; + case ARM::LDRi12: + case ARM::STRi12: + case ARM::tLDRi: + case ARM::tSTRi: + case ARM::tLDRspi: + case ARM::tSTRspi: + case ARM::t2LDRi8: + case ARM::t2LDRi12: + case ARM::t2STRi8: + case ARM::t2STRi12: + case ARM::VLDRS: + case ARM::VSTRS: + return 4; + case ARM::VLDRD: + case ARM::VSTRD: + return 8; + case ARM::LDMIA: + case ARM::LDMDA: + case ARM::LDMDB: + case ARM::LDMIB: + case ARM::STMIA: + case ARM::STMDA: + case ARM::STMDB: + case ARM::STMIB: + case ARM::tLDMIA: + case ARM::tLDMIA_UPD: + case ARM::tSTMIA_UPD: + case ARM::t2LDMIA: + case ARM::t2LDMDB: + case ARM::t2STMIA: + case ARM::t2STMDB: + case ARM::VLDMSIA: + case ARM::VSTMSIA: + return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; + case ARM::VLDMDIA: + case ARM::VSTMDIA: + return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; + } +} + /// Update future uses of the base register with the offset introduced /// due to writeback. This function only works on Thumb1. void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - DebugLoc dl, unsigned Base, + DebugLoc DL, unsigned Base, unsigned WordOffset, ARMCC::CondCodes Pred, unsigned PredReg) { assert(isThumb1 && "Can only update base register uses for Thumb1!"); @@ -398,7 +457,7 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, Offset = MO.getImm() - WordOffset * getImmScale(Opc); // If storing the base register, it needs to be reset first. - unsigned InstrSrcReg = MBBI->getOperand(0).getReg(); + unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg(); if (Offset >= 0 && !(IsStore && InstrSrcReg == Base)) MO.setImm(Offset); @@ -439,7 +498,7 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, if (InsertSub) { // An instruction above couldn't be updated, so insert a sub. - AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true) + AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); return; } @@ -457,31 +516,65 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, // See PR21029. if (MBBI != MBB.end()) --MBBI; AddDefaultT1CC( - BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true) + BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); } } +/// Return the first register of class \p RegClass that is not in \p Regs. +unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) { + if (!RegClassInfoValid) { + RegClassInfo.runOnMachineFunction(*MF); + RegClassInfoValid = true; + } + + for (unsigned Reg : RegClassInfo.getOrder(&RegClass)) + if (!LiveRegs.contains(Reg)) + return Reg; + return 0; +} + +/// Compute live registers just before instruction \p Before (in normal schedule +/// direction). Computes backwards so multiple queries in the same block must +/// come in reverse order. +void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB, + MachineBasicBlock::const_iterator Before) { + // Initialize if we never queried in this block. + if (!LiveRegsValid) { + LiveRegs.init(TRI); + LiveRegs.addLiveOuts(&MBB, true); + LiveRegPos = MBB.end(); + LiveRegsValid = true; + } + // Move backward just before the "Before" position. + while (LiveRegPos != Before) { + --LiveRegPos; + LiveRegs.stepBackward(*LiveRegPos); + } +} + +static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs, + unsigned Reg) { + for (const std::pair<unsigned, bool> &R : Regs) + if (R.first == Reg) + return true; + return false; +} + /// Create and insert a LDM or STM with Base as base register and registers in /// Regs as the register operands that would be loaded / stored. It returns /// true if the transformation is done. -bool -ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - int Offset, unsigned Base, bool BaseKill, - unsigned Opcode, ARMCC::CondCodes Pred, - unsigned PredReg, unsigned Scratch, DebugLoc dl, - ArrayRef<std::pair<unsigned, bool> > Regs, - ArrayRef<unsigned> ImpDefs) { - // Only a single register to load / store. Don't bother. +MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, + bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, + DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) { unsigned NumRegs = Regs.size(); - if (NumRegs <= 1) - return false; + assert(NumRegs > 1); // For Thumb1 targets, it might be necessary to clobber the CPSR to merge. // Compute liveness information for that register to make the decision. bool SafeToClobberCPSR = !isThumb1 || - (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) == + (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) == MachineBasicBlock::LQR_Dead); bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. @@ -489,17 +582,14 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, // Exception: If the base register is in the input reglist, Thumb1 LDM is // non-writeback. // It's also not possible to merge an STR of the base register in Thumb1. - if (isThumb1) - for (const std::pair<unsigned, bool> &R : Regs) - if (Base == R.first) { - assert(Base != ARM::SP && "Thumb1 does not allow SP in register list"); - if (Opcode == ARM::tLDRi) { - Writeback = false; - break; - } else if (Opcode == ARM::tSTRi) { - return false; - } - } + if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) { + assert(Base != ARM::SP && "Thumb1 does not allow SP in register list"); + if (Opcode == ARM::tLDRi) { + Writeback = false; + } else if (Opcode == ARM::tSTRi) { + return nullptr; + } + } ARM_AM::AMSubMode Mode = ARM_AM::ia; // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. @@ -516,18 +606,18 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) { // Check if this is a supported opcode before inserting instructions to // calculate a new base register. - if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false; + if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr; // If starting offset isn't zero, insert a MI to materialize a new base. // But only do so if it is cost effective, i.e. merging more than two // loads / stores. if (NumRegs <= 2) - return false; + return nullptr; // On Thumb1, it's not worth materializing a new base register without // clobbering the CPSR (i.e. not using ADDS/SUBS). if (!SafeToClobberCPSR) - return false; + return nullptr; unsigned NewBase; if (isi32Load(Opcode)) { @@ -535,10 +625,17 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, // use as the new base. NewBase = Regs[NumRegs-1].first; } else { - // Use the scratch register to use as a new base. - NewBase = Scratch; + // Find a free register that we can use as scratch register. + moveLiveRegsBefore(MBB, InsertBefore); + // The merged instruction does not exist yet but will use several Regs if + // it is a Store. + if (!isLoadSingle(Opcode)) + for (const std::pair<unsigned, bool> &R : Regs) + LiveRegs.addReg(R.first); + + NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass); if (NewBase == 0) - return false; + return nullptr; } int BaseOpc = @@ -557,7 +654,12 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, if (!TL->isLegalAddImmediate(Offset)) // FIXME: Try add with register operand? - return false; // Probably not worth it then. + return nullptr; // Probably not worth it then. + + // We can only append a kill flag to the add/sub input if the value is not + // used in the register list of the stm as well. + bool KillOldBase = BaseKill && + (!isi32Store(Opcode) || !ContainsReg(Regs, Base)); if (isThumb1) { // Thumb1: depending on immediate size, use either @@ -572,43 +674,44 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, !STI->hasV6Ops()) { // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr if (Pred != ARMCC::AL) - return false; - BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase) - .addReg(Base, getKillRegState(BaseKill)); + return nullptr; + BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase) + .addReg(Base, getKillRegState(KillOldBase)); } else - BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase) - .addReg(Base, getKillRegState(BaseKill)) + BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase) + .addReg(Base, getKillRegState(KillOldBase)) .addImm(Pred).addReg(PredReg); - // Set up BaseKill and Base correctly to insert the ADDS/SUBS below. + // The following ADDS/SUBS becomes an update. Base = NewBase; - BaseKill = false; + KillOldBase = true; } if (BaseOpc == ARM::tADDrSPi) { assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4"); - BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) - .addReg(Base, getKillRegState(BaseKill)).addImm(Offset/4) + BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) + .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4) .addImm(Pred).addReg(PredReg); } else - AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true) - .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) + AddDefaultT1CC( + BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true) + .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) .addImm(Pred).addReg(PredReg); } else { - BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) - .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) + BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) + .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) .addImm(Pred).addReg(PredReg).addReg(0); } Base = NewBase; BaseKill = true; // New base is always killed straight away. } - bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || - Opcode == ARM::VLDRD); + bool isDef = isLoadSingle(Opcode); // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with // base register writeback. Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); - if (!Opcode) return false; + if (!Opcode) + return nullptr; // Check if a Thumb1 LDM/STM merge is safe. This is the case if: // - There is no writeback (LDM of base register), @@ -619,7 +722,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, // It's safe to return here since the code to materialize a new base register // above is also conditional on SafeToClobberCPSR. if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill) - return false; + return nullptr; MachineInstrBuilder MIB; @@ -628,7 +731,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, // Update tLDMIA with writeback if necessary. Opcode = ARM::tLDMIA_UPD; - MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); + MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); // Thumb1: we might need to set base writeback when building the MI. MIB.addReg(Base, getDefRegState(true)) @@ -637,381 +740,257 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, // The base isn't dead after a merged instruction with writeback. // Insert a sub instruction after the newly formed instruction to reset. if (!BaseKill) - UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg); + UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg); } else { // No writeback, simply build the MachineInstr. - MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); + MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); MIB.addReg(Base, getKillRegState(BaseKill)); } MIB.addImm(Pred).addReg(PredReg); for (const std::pair<unsigned, bool> &R : Regs) - MIB = MIB.addReg(R.first, getDefRegState(isDef) - | getKillRegState(R.second)); + MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second)); - // Add implicit defs for super-registers. - for (unsigned ImpDef : ImpDefs) - MIB.addReg(ImpDef, RegState::ImplicitDefine); - - return true; + return MIB.getInstr(); } -/// Find all instructions using a given imp-def within a range. -/// -/// We are trying to combine a range of instructions, one of which (located at -/// position RangeBegin) implicitly defines a register. The final LDM/STM will -/// be placed at RangeEnd, and so any uses of this definition between RangeStart -/// and RangeEnd must be modified to use an undefined value. -/// -/// The live range continues until we find a second definition or one of the -/// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so -/// we must consider all uses and decide which are relevant in a second pass. -void ARMLoadStoreOpt::findUsesOfImpDef( - SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps, - unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) { - std::map<unsigned, MachineOperand *> Uses; - unsigned LastLivePos = RangeEnd; - - // First we find all uses of this register with Position between RangeBegin - // and RangeEnd, any or all of these could be uses of a definition at - // RangeBegin. We also record the latest position a definition at RangeBegin - // would be considered live. - for (unsigned i = 0; i < MemOps.size(); ++i) { - MachineInstr &MI = *MemOps[i].MBBI; - unsigned MIPosition = MemOps[i].Position; - if (MIPosition <= RangeBegin || MIPosition > RangeEnd) - continue; - - // If this instruction defines the register, then any later use will be of - // that definition rather than ours. - if (MI.definesRegister(DefReg)) - LastLivePos = std::min(LastLivePos, MIPosition); - - MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg); - if (!UseOp) - continue; - - // If this instruction kills the register then (assuming liveness is - // correct when we start) we don't need to think about anything after here. - if (UseOp->isKill()) - LastLivePos = std::min(LastLivePos, MIPosition); - - Uses[MIPosition] = UseOp; - } - - // Now we traverse the list of all uses, and append the ones that actually use - // our definition to the requested list. - for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(), - E = Uses.end(); - I != E; ++I) { - // List is sorted by position so once we've found one out of range there - // will be no more to consider. - if (I->first > LastLivePos) - break; - UsesOfImpDefs.push_back(I->second); +MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, + bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, + DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const { + bool IsLoad = isi32Load(Opcode); + assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store"); + unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8; + + assert(Regs.size() == 2); + MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL, + TII->get(LoadStoreOpcode)); + if (IsLoad) { + MIB.addReg(Regs[0].first, RegState::Define) + .addReg(Regs[1].first, RegState::Define); + } else { + MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second)) + .addReg(Regs[1].first, getKillRegState(Regs[1].second)); } + MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); + return MIB.getInstr(); } /// Call MergeOps and update MemOps and merges accordingly on success. -void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, - MemOpQueue &memOps, - unsigned memOpsBegin, unsigned memOpsEnd, - unsigned insertAfter, int Offset, - unsigned Base, bool BaseKill, - unsigned Opcode, - ARMCC::CondCodes Pred, unsigned PredReg, - unsigned Scratch, - DebugLoc dl, - SmallVectorImpl<MachineBasicBlock::iterator> &Merges) { - // First calculate which of the registers should be killed by the merged - // instruction. - const unsigned insertPos = memOps[insertAfter].Position; - SmallSet<unsigned, 4> KilledRegs; - DenseMap<unsigned, unsigned> Killer; - for (unsigned i = 0, e = memOps.size(); i != e; ++i) { - if (i == memOpsBegin) { - i = memOpsEnd; - if (i == e) - break; - } - if (memOps[i].Position < insertPos && memOps[i].isKill) { - unsigned Reg = memOps[i].Reg; +MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) { + const MachineInstr *First = Cand.Instrs.front(); + unsigned Opcode = First->getOpcode(); + bool IsLoad = isLoadSingle(Opcode); + SmallVector<std::pair<unsigned, bool>, 8> Regs; + SmallVector<unsigned, 4> ImpDefs; + DenseSet<unsigned> KilledRegs; + // Determine list of registers and list of implicit super-register defs. + for (const MachineInstr *MI : Cand.Instrs) { + const MachineOperand &MO = getLoadStoreRegOp(*MI); + unsigned Reg = MO.getReg(); + bool IsKill = MO.isKill(); + if (IsKill) KilledRegs.insert(Reg); - Killer[Reg] = i; + Regs.push_back(std::make_pair(Reg, IsKill)); + + if (IsLoad) { + // Collect any implicit defs of super-registers, after merging we can't + // be sure anymore that we properly preserved these live ranges and must + // removed these implicit operands. + for (const MachineOperand &MO : MI->implicit_operands()) { + if (!MO.isReg() || !MO.isDef() || MO.isDead()) + continue; + assert(MO.isImplicit()); + unsigned DefReg = MO.getReg(); + + if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end()) + continue; + // We can ignore cases where the super-reg is read and written. + if (MI->readsRegister(DefReg)) + continue; + ImpDefs.push_back(DefReg); + } } } - for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { - MachineOperand &TransferOp = memOps[i].MBBI->getOperand(0); - if (TransferOp.isUse() && TransferOp.getReg() == Base) - BaseKill = false; + // Attempt the merge. + typedef MachineBasicBlock::iterator iterator; + MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx]; + iterator InsertBefore = std::next(iterator(LatestMI)); + MachineBasicBlock &MBB = *LatestMI->getParent(); + unsigned Offset = getMemoryOpOffset(First); + unsigned Base = getLoadStoreBaseOp(*First).getReg(); + bool BaseKill = LatestMI->killsRegister(Base); + unsigned PredReg = 0; + ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg); + DebugLoc DL = First->getDebugLoc(); + MachineInstr *Merged = nullptr; + if (Cand.CanMergeToLSDouble) + Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill, + Opcode, Pred, PredReg, DL, Regs); + if (!Merged && Cand.CanMergeToLSMulti) + Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill, + Opcode, Pred, PredReg, DL, Regs); + if (!Merged) + return nullptr; + + // Determine earliest instruction that will get removed. We then keep an + // iterator just above it so the following erases don't invalidated it. + iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]); + bool EarliestAtBegin = false; + if (EarliestI == MBB.begin()) { + EarliestAtBegin = true; + } else { + EarliestI = std::prev(EarliestI); } - SmallVector<std::pair<unsigned, bool>, 8> Regs; - SmallVector<unsigned, 8> ImpDefs; - SmallVector<MachineOperand *, 8> UsesOfImpDefs; - for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { - unsigned Reg = memOps[i].Reg; - // If we are inserting the merged operation after an operation that - // uses the same register, make sure to transfer any kill flag. - bool isKill = memOps[i].isKill || KilledRegs.count(Reg); - Regs.push_back(std::make_pair(Reg, isKill)); - - // Collect any implicit defs of super-registers. They must be preserved. - for (const MachineOperand &MO : memOps[i].MBBI->operands()) { - if (!MO.isReg() || !MO.isDef() || !MO.isImplicit() || MO.isDead()) - continue; - unsigned DefReg = MO.getReg(); - if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end()) - ImpDefs.push_back(DefReg); - - // There may be other uses of the definition between this instruction and - // the eventual LDM/STM position. These should be marked undef if the - // merge takes place. - findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position, - insertPos); + // Remove instructions which have been merged. + for (MachineInstr *MI : Cand.Instrs) + MBB.erase(MI); + + // Determine range between the earliest removed instruction and the new one. + if (EarliestAtBegin) + EarliestI = MBB.begin(); + else + EarliestI = std::next(EarliestI); + auto FixupRange = make_range(EarliestI, iterator(Merged)); + + if (isLoadSingle(Opcode)) { + // If the previous loads defined a super-reg, then we have to mark earlier + // operands undef; Replicate the super-reg def on the merged instruction. + for (MachineInstr &MI : FixupRange) { + for (unsigned &ImpDefReg : ImpDefs) { + for (MachineOperand &MO : MI.implicit_operands()) { + if (!MO.isReg() || MO.getReg() != ImpDefReg) + continue; + if (MO.readsReg()) + MO.setIsUndef(); + else if (MO.isDef()) + ImpDefReg = 0; + } + } } - } - // Try to do the merge. - MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; - ++Loc; - if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, - Pred, PredReg, Scratch, dl, Regs, ImpDefs)) - return; - - // Merge succeeded, update records. - Merges.push_back(std::prev(Loc)); - - // In gathering loads together, we may have moved the imp-def of a register - // past one of its uses. This is OK, since we know better than the rest of - // LLVM what's OK with ARM loads and stores; but we still have to adjust the - // affected uses. - for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(), - E = UsesOfImpDefs.end(); - I != E; ++I) - (*I)->setIsUndef(); - - for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { - // Remove kill flags from any memops that come before insertPos. - if (Regs[i-memOpsBegin].second) { - unsigned Reg = Regs[i-memOpsBegin].first; - if (KilledRegs.count(Reg)) { - unsigned j = Killer[Reg]; - int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true); - assert(Idx >= 0 && "Cannot find killing operand"); - memOps[j].MBBI->getOperand(Idx).setIsKill(false); - memOps[j].isKill = false; + MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged); + for (unsigned ImpDef : ImpDefs) + MIB.addReg(ImpDef, RegState::ImplicitDefine); + } else { + // Remove kill flags: We are possibly storing the values later now. + assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD); + for (MachineInstr &MI : FixupRange) { + for (MachineOperand &MO : MI.uses()) { + if (!MO.isReg() || !MO.isKill()) + continue; + if (KilledRegs.count(MO.getReg())) + MO.setIsKill(false); } - memOps[i].isKill = true; } - MBB.erase(memOps[i].MBBI); - // Update this memop to refer to the merged instruction. - // We may need to move kill flags again. - memOps[i].Merged = true; - memOps[i].MBBI = Merges.back(); - memOps[i].Position = insertPos; + assert(ImpDefs.empty()); } - // Update memOps offsets, since they may have been modified by MergeOps. - for (auto &MemOp : memOps) { - MemOp.Offset = getMemoryOpOffset(MemOp.MBBI); - } + return Merged; } -/// Merge a number of load / store instructions into one or more load / store -/// multiple instructions. -void -ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, - unsigned Base, unsigned Opcode, unsigned Size, - ARMCC::CondCodes Pred, unsigned PredReg, - unsigned Scratch, MemOpQueue &MemOps, - SmallVectorImpl<MachineBasicBlock::iterator> &Merges) { - bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); - int Offset = MemOps[SIndex].Offset; - int SOffset = Offset; - unsigned insertAfter = SIndex; - MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; - DebugLoc dl = Loc->getDebugLoc(); - const MachineOperand &PMO = Loc->getOperand(0); - unsigned PReg = PMO.getReg(); - unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); - unsigned Count = 1; - unsigned Limit = ~0U; - bool BaseKill = false; - // vldm / vstm limit are 32 for S variants, 16 for D variants. +static bool isValidLSDoubleOffset(int Offset) { + unsigned Value = abs(Offset); + // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally + // multiplied by 4. + return (Value % 4) == 0 && Value < 1024; +} - switch (Opcode) { - default: break; - case ARM::VSTRS: - Limit = 32; - break; - case ARM::VSTRD: - Limit = 16; - break; - case ARM::VLDRD: - Limit = 16; - break; - case ARM::VLDRS: - Limit = 32; - break; - } +/// Find candidates for load/store multiple merge in list of MemOpQueueEntries. +void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) { + const MachineInstr *FirstMI = MemOps[0].MI; + unsigned Opcode = FirstMI->getOpcode(); + bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); + unsigned Size = getLSMultipleTransferSize(FirstMI); + + unsigned SIndex = 0; + unsigned EIndex = MemOps.size(); + do { + // Look at the first instruction. + const MachineInstr *MI = MemOps[SIndex].MI; + int Offset = MemOps[SIndex].Offset; + const MachineOperand &PMO = getLoadStoreRegOp(*MI); + unsigned PReg = PMO.getReg(); + unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); + unsigned Latest = SIndex; + unsigned Earliest = SIndex; + unsigned Count = 1; + bool CanMergeToLSDouble = + STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset); + // ARM errata 602117: LDRD with base in list may result in incorrect base + // register when interrupted or faulted. + if (STI->isCortexM3() && isi32Load(Opcode) && + PReg == getLoadStoreBaseOp(*MI).getReg()) + CanMergeToLSDouble = false; + + bool CanMergeToLSMulti = true; + // On swift vldm/vstm starting with an odd register number as that needs + // more uops than single vldrs. + if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1) + CanMergeToLSMulti = false; + + // Merge following instructions where possible. + for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) { + int NewOffset = MemOps[I].Offset; + if (NewOffset != Offset + (int)Size) + break; + const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI); + unsigned Reg = MO.getReg(); + unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); + + // See if the current load/store may be part of a multi load/store. + bool PartOfLSMulti = CanMergeToLSMulti; + if (PartOfLSMulti) { + // Cannot load from SP + if (Reg == ARM::SP) + PartOfLSMulti = false; + // Register numbers must be in ascending order. + else if (RegNum <= PRegNum) + PartOfLSMulti = false; + // For VFP / NEON load/store multiples, the registers must be + // consecutive and within the limit on the number of registers per + // instruction. + else if (!isNotVFP && RegNum != PRegNum+1) + PartOfLSMulti = false; + } + // See if the current load/store may be part of a double load/store. + bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1; - for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { - int NewOffset = MemOps[i].Offset; - const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); - unsigned Reg = MO.getReg(); - unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); - // Register numbers must be in ascending order. For VFP / NEON load and - // store multiples, the registers must also be consecutive and within the - // limit on the number of registers per instruction. - if (Reg != ARM::SP && - NewOffset == Offset + (int)Size && - ((isNotVFP && RegNum > PRegNum) || - ((Count < Limit) && RegNum == PRegNum+1)) && - // On Swift we don't want vldm/vstm to start with a odd register num - // because Q register unaligned vldm/vstm need more uops. - (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) { + if (!PartOfLSMulti && !PartOfLSDouble) + break; + CanMergeToLSMulti &= PartOfLSMulti; + CanMergeToLSDouble &= PartOfLSDouble; + // Track MemOp with latest and earliest position (Positions are + // counted in reverse). + unsigned Position = MemOps[I].Position; + if (Position < MemOps[Latest].Position) + Latest = I; + else if (Position > MemOps[Earliest].Position) + Earliest = I; + // Prepare for next MemOp. Offset += Size; PRegNum = RegNum; - ++Count; - } else { - // Can't merge this in. Try merge the earlier ones first. - // We need to compute BaseKill here because the MemOps may have been - // reordered. - BaseKill = Loc->killsRegister(Base); - - MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base, - BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); - MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, - MemOps, Merges); - return; } - if (MemOps[i].Position > MemOps[insertAfter].Position) { - insertAfter = i; - Loc = MemOps[i].MBBI; - } - } - - BaseKill = Loc->killsRegister(Base); - MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, - Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); -} - -static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, - unsigned Bytes, unsigned Limit, - ARMCC::CondCodes Pred, unsigned PredReg) { - unsigned MyPredReg = 0; - if (!MI) - return false; - - bool CheckCPSRDef = false; - switch (MI->getOpcode()) { - default: return false; - case ARM::tSUBi8: - case ARM::t2SUBri: - case ARM::SUBri: - CheckCPSRDef = true; - break; - case ARM::tSUBspi: - break; - } - - // Make sure the offset fits in 8 bits. - if (Bytes == 0 || (Limit && Bytes >= Limit)) - return false; - - unsigned Scale = (MI->getOpcode() == ARM::tSUBspi || - MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME - if (!(MI->getOperand(0).getReg() == Base && - MI->getOperand(1).getReg() == Base && - (MI->getOperand(2).getImm() * Scale) == Bytes && - getInstrPredicate(MI, MyPredReg) == Pred && - MyPredReg == PredReg)) - return false; - - return CheckCPSRDef ? !definesCPSR(MI) : true; -} - -static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, - unsigned Bytes, unsigned Limit, - ARMCC::CondCodes Pred, unsigned PredReg) { - unsigned MyPredReg = 0; - if (!MI) - return false; - - bool CheckCPSRDef = false; - switch (MI->getOpcode()) { - default: return false; - case ARM::tADDi8: - case ARM::t2ADDri: - case ARM::ADDri: - CheckCPSRDef = true; - break; - case ARM::tADDspi: - break; - } - - if (Bytes == 0 || (Limit && Bytes >= Limit)) - // Make sure the offset fits in 8 bits. - return false; - - unsigned Scale = (MI->getOpcode() == ARM::tADDspi || - MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME - if (!(MI->getOperand(0).getReg() == Base && - MI->getOperand(1).getReg() == Base && - (MI->getOperand(2).getImm() * Scale) == Bytes && - getInstrPredicate(MI, MyPredReg) == Pred && - MyPredReg == PredReg)) - return false; - - return CheckCPSRDef ? !definesCPSR(MI) : true; -} - -static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { - switch (MI->getOpcode()) { - default: return 0; - case ARM::LDRi12: - case ARM::STRi12: - case ARM::tLDRi: - case ARM::tSTRi: - case ARM::tLDRspi: - case ARM::tSTRspi: - case ARM::t2LDRi8: - case ARM::t2LDRi12: - case ARM::t2STRi8: - case ARM::t2STRi12: - case ARM::VLDRS: - case ARM::VSTRS: - return 4; - case ARM::VLDRD: - case ARM::VSTRD: - return 8; - case ARM::LDMIA: - case ARM::LDMDA: - case ARM::LDMDB: - case ARM::LDMIB: - case ARM::STMIA: - case ARM::STMDA: - case ARM::STMDB: - case ARM::STMIB: - case ARM::tLDMIA: - case ARM::tLDMIA_UPD: - case ARM::tSTMIA_UPD: - case ARM::t2LDMIA: - case ARM::t2LDMDB: - case ARM::t2STMIA: - case ARM::t2STMDB: - case ARM::VLDMSIA: - case ARM::VSTMSIA: - return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; - case ARM::VLDMDIA: - case ARM::VSTMDIA: - return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; - } + // Form a candidate from the Ops collected so far. + MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate; + for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C) + Candidate->Instrs.push_back(MemOps[C].MI); + Candidate->LatestMIIdx = Latest - SIndex; + Candidate->EarliestMIIdx = Earliest - SIndex; + Candidate->InsertPos = MemOps[Latest].Position; + if (Count == 1) + CanMergeToLSMulti = CanMergeToLSDouble = false; + Candidate->CanMergeToLSMulti = CanMergeToLSMulti; + Candidate->CanMergeToLSDouble = CanMergeToLSDouble; + Candidates.push_back(Candidate); + // Continue after the chain. + SIndex += Count; + } while (SIndex < EIndex); } static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, @@ -1081,6 +1060,75 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, } } +/// Check if the given instruction increments or decrements a register and +/// return the amount it is incremented/decremented. Returns 0 if the CPSR flags +/// generated by the instruction are possibly read as well. +static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg, + ARMCC::CondCodes Pred, unsigned PredReg) { + bool CheckCPSRDef; + int Scale; + switch (MI.getOpcode()) { + case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break; + case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break; + case ARM::t2SUBri: + case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break; + case ARM::t2ADDri: + case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break; + case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break; + case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break; + default: return 0; + } + + unsigned MIPredReg; + if (MI.getOperand(0).getReg() != Reg || + MI.getOperand(1).getReg() != Reg || + getInstrPredicate(&MI, MIPredReg) != Pred || + MIPredReg != PredReg) + return 0; + + if (CheckCPSRDef && definesCPSR(&MI)) + return 0; + return MI.getOperand(2).getImm() * Scale; +} + +/// Searches for an increment or decrement of \p Reg before \p MBBI. +static MachineBasicBlock::iterator +findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg, + ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) { + Offset = 0; + MachineBasicBlock &MBB = *MBBI->getParent(); + MachineBasicBlock::iterator BeginMBBI = MBB.begin(); + MachineBasicBlock::iterator EndMBBI = MBB.end(); + if (MBBI == BeginMBBI) + return EndMBBI; + + // Skip debug values. + MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); + while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI) + --PrevMBBI; + + Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg); + return Offset == 0 ? EndMBBI : PrevMBBI; +} + +/// Searches for a increment or decrement of \p Reg after \p MBBI. +static MachineBasicBlock::iterator +findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg, + ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) { + Offset = 0; + MachineBasicBlock &MBB = *MBBI->getParent(); + MachineBasicBlock::iterator EndMBBI = MBB.end(); + MachineBasicBlock::iterator NextMBBI = std::next(MBBI); + // Skip debug values. + while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) + ++NextMBBI; + if (NextMBBI == EndMBBI) + return EndMBBI; + + Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg); + return Offset == 0 ? EndMBBI : NextMBBI; +} + /// Fold proceeding/trailing inc/dec of base register into the /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: /// @@ -1093,21 +1141,17 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, /// ldmia rn, <ra, rb, rc> /// => /// ldmdb rn!, <ra, rb, rc> -bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - bool &Advance, - MachineBasicBlock::iterator &I) { +bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { // Thumb1 is already using updating loads/stores. if (isThumb1) return false; - MachineInstr *MI = MBBI; - unsigned Base = MI->getOperand(0).getReg(); - bool BaseKill = MI->getOperand(0).isKill(); - unsigned Bytes = getLSMultipleTransferSize(MI); + const MachineOperand &BaseOP = MI->getOperand(0); + unsigned Base = BaseOP.getReg(); + bool BaseKill = BaseOP.isKill(); unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); unsigned Opcode = MI->getOpcode(); - DebugLoc dl = MI->getDebugLoc(); + DebugLoc DL = MI->getDebugLoc(); // Can't use an updating ld/st if the base register is also a dest // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. @@ -1115,55 +1159,27 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, if (MI->getOperand(i).getReg() == Base) return false; - bool DoMerge = false; + int Bytes = getLSMultipleTransferSize(MI); + MachineBasicBlock &MBB = *MI->getParent(); + MachineBasicBlock::iterator MBBI(MI); + int Offset; + MachineBasicBlock::iterator MergeInstr + = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset); ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode); - - // Try merging with the previous instruction. - MachineBasicBlock::iterator BeginMBBI = MBB.begin(); - if (MBBI != BeginMBBI) { - MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); - while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) - --PrevMBBI; - if (Mode == ARM_AM::ia && - isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { - Mode = ARM_AM::db; - DoMerge = true; - } else if (Mode == ARM_AM::ib && - isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { - Mode = ARM_AM::da; - DoMerge = true; - } - if (DoMerge) - MBB.erase(PrevMBBI); - } - - // Try merging with the next instruction. - MachineBasicBlock::iterator EndMBBI = MBB.end(); - if (!DoMerge && MBBI != EndMBBI) { - MachineBasicBlock::iterator NextMBBI = std::next(MBBI); - while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) - ++NextMBBI; - if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && - isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { - DoMerge = true; - } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && - isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { - DoMerge = true; - } - if (DoMerge) { - if (NextMBBI == I) { - Advance = true; - ++I; - } - MBB.erase(NextMBBI); - } + if (Mode == ARM_AM::ia && Offset == -Bytes) { + Mode = ARM_AM::db; + } else if (Mode == ARM_AM::ib && Offset == -Bytes) { + Mode = ARM_AM::da; + } else { + MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); + if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) && + ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) + return false; } - - if (!DoMerge) - return false; + MBB.erase(MergeInstr); unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); - MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) .addReg(Base, getDefRegState(true)) // WB base register .addReg(Base, getKillRegState(BaseKill)) .addImm(Pred).addReg(PredReg); @@ -1231,21 +1247,15 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, /// Fold proceeding/trailing inc/dec of base register into the /// LDR/STR/FLD{D|S}/FST{D|S} op when possible: -bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - const TargetInstrInfo *TII, - bool &Advance, - MachineBasicBlock::iterator &I) { +bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { // Thumb1 doesn't have updating LDR/STR. // FIXME: Use LDM/STM with single register instead. if (isThumb1) return false; - MachineInstr *MI = MBBI; - unsigned Base = MI->getOperand(1).getReg(); - bool BaseKill = MI->getOperand(1).isKill(); - unsigned Bytes = getLSMultipleTransferSize(MI); + unsigned Base = getLoadStoreBaseOp(*MI).getReg(); + bool BaseKill = getLoadStoreBaseOp(*MI).isKill(); unsigned Opcode = MI->getOpcode(); - DebugLoc dl = MI->getDebugLoc(); + DebugLoc DL = MI->getDebugLoc(); bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); @@ -1255,7 +1265,6 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) return false; - bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; // Can't do the merge if the destination register is the same as the would-be // writeback register. if (MI->getOperand(0).getReg() == Base) @@ -1263,64 +1272,38 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); - bool DoMerge = false; - ARM_AM::AddrOpc AddSub = ARM_AM::add; - unsigned NewOpc = 0; - // AM2 - 12 bits, thumb2 - 8 bits. - unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); - - // Try merging with the previous instruction. - MachineBasicBlock::iterator BeginMBBI = MBB.begin(); - if (MBBI != BeginMBBI) { - MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); - while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) - --PrevMBBI; - if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { - DoMerge = true; - AddSub = ARM_AM::sub; - } else if (!isAM5 && - isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { - DoMerge = true; - } - if (DoMerge) { - NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub); - MBB.erase(PrevMBBI); - } - } - - // Try merging with the next instruction. - MachineBasicBlock::iterator EndMBBI = MBB.end(); - if (!DoMerge && MBBI != EndMBBI) { - MachineBasicBlock::iterator NextMBBI = std::next(MBBI); - while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) - ++NextMBBI; - if (!isAM5 && - isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { - DoMerge = true; - AddSub = ARM_AM::sub; - } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { - DoMerge = true; - } - if (DoMerge) { - NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub); - if (NextMBBI == I) { - Advance = true; - ++I; - } - MBB.erase(NextMBBI); - } + int Bytes = getLSMultipleTransferSize(MI); + MachineBasicBlock &MBB = *MI->getParent(); + MachineBasicBlock::iterator MBBI(MI); + int Offset; + MachineBasicBlock::iterator MergeInstr + = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset); + unsigned NewOpc; + if (!isAM5 && Offset == Bytes) { + NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add); + } else if (Offset == -Bytes) { + NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); + } else { + MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); + if (Offset == Bytes) { + NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add); + } else if (!isAM5 && Offset == -Bytes) { + NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); + } else + return false; } + MBB.erase(MergeInstr); - if (!DoMerge) - return false; + ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add; + bool isLd = isLoadSingle(Opcode); if (isAM5) { // VLDM[SD]_UPD, VSTM[SD]_UPD // (There are no base-updating versions of VLDR/VSTR instructions, but the // updating load/store-multiple instructions can be used with only one // register.) MachineOperand &MO = MI->getOperand(0); - BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) + BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) .addReg(Base, getDefRegState(true)) // WB base register .addReg(Base, getKillRegState(isLd ? BaseKill : false)) .addImm(Pred).addReg(PredReg) @@ -1330,20 +1313,18 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, if (isAM2) { // LDR_PRE, LDR_POST if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { - int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; - BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) + BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) .addReg(Base, RegState::Define) .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); } else { - int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); - BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) + int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); + BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) .addReg(Base, RegState::Define) - .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); + .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); } } else { - int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; // t2LDR_PRE, t2LDR_POST - BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) + BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) .addReg(Base, RegState::Define) .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); } @@ -1353,15 +1334,14 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, // the vestigal zero-reg offset register. When that's fixed, this clause // can be removed entirely. if (isAM2 && NewOpc == ARM::STR_POST_IMM) { - int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); + int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); // STR_PRE, STR_POST - BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) + BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) .addReg(MO.getReg(), getKillRegState(MO.isKill())) - .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); + .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); } else { - int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; // t2STR_PRE, t2STR_POST - BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) + BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) .addReg(MO.getReg(), getKillRegState(MO.isKill())) .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); } @@ -1371,6 +1351,66 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, return true; } +bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const { + unsigned Opcode = MI.getOpcode(); + assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) && + "Must have t2STRDi8 or t2LDRDi8"); + if (MI.getOperand(3).getImm() != 0) + return false; + + // Behaviour for writeback is undefined if base register is the same as one + // of the others. + const MachineOperand &BaseOp = MI.getOperand(2); + unsigned Base = BaseOp.getReg(); + const MachineOperand &Reg0Op = MI.getOperand(0); + const MachineOperand &Reg1Op = MI.getOperand(1); + if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base) + return false; + + unsigned PredReg; + ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg); + MachineBasicBlock::iterator MBBI(MI); + MachineBasicBlock &MBB = *MI.getParent(); + int Offset; + MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred, + PredReg, Offset); + unsigned NewOpc; + if (Offset == 8 || Offset == -8) { + NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE; + } else { + MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); + if (Offset == 8 || Offset == -8) { + NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST; + } else + return false; + } + MBB.erase(MergeInstr); + + DebugLoc DL = MI.getDebugLoc(); + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)); + if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) { + MIB.addOperand(Reg0Op).addOperand(Reg1Op) + .addReg(BaseOp.getReg(), RegState::Define); + } else { + assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST); + MIB.addReg(BaseOp.getReg(), RegState::Define) + .addOperand(Reg0Op).addOperand(Reg1Op); + } + MIB.addReg(BaseOp.getReg(), RegState::Kill) + .addImm(Offset).addImm(Pred).addReg(PredReg); + assert(TII->get(Opcode).getNumOperands() == 6 && + TII->get(NewOpc).getNumOperands() == 7 && + "Unexpected number of operands in Opcode specification."); + + // Transfer implicit operands. + for (const MachineOperand &MO : MI.implicit_operands()) + MIB.addOperand(MO); + MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + + MBB.erase(MBBI); + return true; +} + /// Returns true if instruction is a memory operation that this pass is capable /// of operating on. static bool isMemoryOp(const MachineInstr *MI) { @@ -1426,26 +1466,10 @@ static bool isMemoryOp(const MachineInstr *MI) { return false; } -/// Advance register scavenger to just before the earliest memory op that is -/// being merged. -void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { - MachineBasicBlock::iterator Loc = MemOps[0].MBBI; - unsigned Position = MemOps[0].Position; - for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { - if (MemOps[i].Position < Position) { - Position = MemOps[i].Position; - Loc = MemOps[i].MBBI; - } - } - - if (Loc != MBB.begin()) - RS->forward(std::prev(Loc)); -} - static void InsertLDR_STR(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, int Offset, bool isDef, - DebugLoc dl, unsigned NewOpc, + DebugLoc DL, unsigned NewOpc, unsigned Reg, bool RegDeadKill, bool RegUndef, unsigned BaseReg, bool BaseKill, bool BaseUndef, bool OffKill, bool OffUndef, @@ -1491,7 +1515,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, if (!Errata602117 && !NonConsecutiveRegs) return false; - MachineBasicBlock::iterator NewBBI = MBBI; bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; bool EvenDeadKill = isLd ? @@ -1531,7 +1554,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); ++NumSTRD2STM; } - NewBBI = std::prev(MBBI); } else { // Split into two instructions. unsigned NewOpc = (isLd) @@ -1553,7 +1575,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, OddReg, OddDeadKill, false, BaseReg, false, BaseUndef, false, OffUndef, Pred, PredReg, TII, isT2); - NewBBI = std::prev(MBBI); InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill, false, BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, @@ -1573,7 +1594,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, EvenReg, EvenDeadKill, EvenUndef, BaseReg, false, BaseUndef, false, OffUndef, Pred, PredReg, TII, isT2); - NewBBI = std::prev(MBBI); InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, OddReg, OddDeadKill, OddUndef, BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, @@ -1585,191 +1605,160 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, ++NumSTRD2STR; } - MBB.erase(MI); - MBBI = NewBBI; + MBBI = MBB.erase(MBBI); return true; } /// An optimization pass to turn multiple LDR / STR ops of the same base and /// incrementing offset into LDM / STM ops. bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { - unsigned NumMerges = 0; - unsigned NumMemOps = 0; MemOpQueue MemOps; unsigned CurrBase = 0; unsigned CurrOpc = ~0u; - unsigned CurrSize = 0; ARMCC::CondCodes CurrPred = ARMCC::AL; - unsigned CurrPredReg = 0; unsigned Position = 0; - SmallVector<MachineBasicBlock::iterator,4> Merges; - - RS->enterBasicBlock(&MBB); - MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); - while (MBBI != E) { + assert(Candidates.size() == 0); + assert(MergeBaseCandidates.size() == 0); + LiveRegsValid = false; + + for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin(); + I = MBBI) { + // The instruction in front of the iterator is the one we look at. + MBBI = std::prev(I); if (FixInvalidRegPairOp(MBB, MBBI)) continue; + ++Position; - bool Advance = false; - bool TryMerge = false; - - bool isMemOp = isMemoryOp(MBBI); - if (isMemOp) { + if (isMemoryOp(MBBI)) { unsigned Opcode = MBBI->getOpcode(); - unsigned Size = getLSMultipleTransferSize(MBBI); const MachineOperand &MO = MBBI->getOperand(0); unsigned Reg = MO.getReg(); - bool isKill = MO.isDef() ? false : MO.isKill(); - unsigned Base = MBBI->getOperand(1).getReg(); + unsigned Base = getLoadStoreBaseOp(*MBBI).getReg(); unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); int Offset = getMemoryOpOffset(MBBI); - // Watch out for: - // r4 := ldr [r5] - // r5 := ldr [r5, #4] - // r6 := ldr [r5, #8] - // - // The second ldr has effectively broken the chain even though it - // looks like the later ldr(s) use the same base register. Try to - // merge the ldr's so far, including this one. But don't try to - // combine the following ldr(s). - bool Clobber = isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg(); - - // Watch out for: - // r4 := ldr [r0, #8] - // r4 := ldr [r0, #4] - // - // The optimization may reorder the second ldr in front of the first - // ldr, which violates write after write(WAW) dependence. The same as - // str. Try to merge inst(s) already in MemOps. - bool Overlap = false; - for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) { - if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) { - Overlap = true; - break; - } - } - - if (CurrBase == 0 && !Clobber) { + if (CurrBase == 0) { // Start of a new chain. CurrBase = Base; CurrOpc = Opcode; - CurrSize = Size; CurrPred = Pred; - CurrPredReg = PredReg; - MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); - ++NumMemOps; - Advance = true; - } else if (!Overlap) { - if (Clobber) { - TryMerge = true; - Advance = true; + MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); + continue; + } + // Note: No need to match PredReg in the next if. + if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { + // Watch out for: + // r4 := ldr [r0, #8] + // r4 := ldr [r0, #4] + // or + // r0 := ldr [r0] + // If a load overrides the base register or a register loaded by + // another load in our chain, we cannot take this instruction. + bool Overlap = false; + if (isLoadSingle(Opcode)) { + Overlap = (Base == Reg); + if (!Overlap) { + for (const MemOpQueueEntry &E : MemOps) { + if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) { + Overlap = true; + break; + } + } + } } - if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { - // No need to match PredReg. - // Continue adding to the queue. + if (!Overlap) { + // Check offset and sort memory operation into the current chain. if (Offset > MemOps.back().Offset) { - MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, - Position, MBBI)); - ++NumMemOps; - Advance = true; + MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); + continue; } else { - for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); - I != E; ++I) { - if (Offset < I->Offset) { - MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, - Position, MBBI)); - ++NumMemOps; - Advance = true; + MemOpQueue::iterator MI, ME; + for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) { + if (Offset < MI->Offset) { + // Found a place to insert. break; - } else if (Offset == I->Offset) { - // Collision! This can't be merged! + } + if (Offset == MI->Offset) { + // Collision, abort. + MI = ME; break; } } + if (MI != MemOps.end()) { + MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position)); + continue; + } } } } - } - if (MBBI->isDebugValue()) { - ++MBBI; - if (MBBI == E) - // Reach the end of the block, try merging the memory instructions. - TryMerge = true; - } else if (Advance) { - ++Position; - ++MBBI; - if (MBBI == E) - // Reach the end of the block, try merging the memory instructions. - TryMerge = true; - } else { - TryMerge = true; + // Don't advance the iterator; The op will start a new chain next. + MBBI = I; + --Position; + // Fallthrough to look into existing chain. + } else if (MBBI->isDebugValue()) { + continue; + } else if (MBBI->getOpcode() == ARM::t2LDRDi8 || + MBBI->getOpcode() == ARM::t2STRDi8) { + // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions + // remember them because we may still be able to merge add/sub into them. + MergeBaseCandidates.push_back(MBBI); } - if (TryMerge) { - if (NumMemOps > 1) { - // Try to find a free register to use as a new base in case it's needed. - // First advance to the instruction just before the start of the chain. - AdvanceRS(MBB, MemOps); - - // Find a scratch register. - unsigned Scratch = - RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass); - - // Process the load / store instructions. - RS->forward(std::prev(MBBI)); - - // Merge ops. - Merges.clear(); - MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, - CurrPred, CurrPredReg, Scratch, MemOps, Merges); - - // Try folding preceding/trailing base inc/dec into the generated - // LDM/STM ops. - for (unsigned i = 0, e = Merges.size(); i < e; ++i) - if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) - ++NumMerges; - NumMerges += Merges.size(); - - // Try folding preceding/trailing base inc/dec into those load/store - // that were not merged to form LDM/STM ops. - for (unsigned i = 0; i != NumMemOps; ++i) - if (!MemOps[i].Merged) - if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) - ++NumMerges; - - // RS may be pointing to an instruction that's deleted. - RS->skipTo(std::prev(MBBI)); - } else if (NumMemOps == 1) { - // Try folding preceding/trailing base inc/dec into the single - // load/store. - if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { - ++NumMerges; - RS->forward(std::prev(MBBI)); - } - } + // If we are here then the chain is broken; Extract candidates for a merge. + if (MemOps.size() > 0) { + FormCandidates(MemOps); + // Reset for the next chain. CurrBase = 0; CurrOpc = ~0u; - CurrSize = 0; CurrPred = ARMCC::AL; - CurrPredReg = 0; - if (NumMemOps) { - MemOps.clear(); - NumMemOps = 0; - } + MemOps.clear(); + } + } + if (MemOps.size() > 0) + FormCandidates(MemOps); - // If iterator hasn't been advanced and this is not a memory op, skip it. - // It can't start a new chain anyway. - if (!Advance && !isMemOp && MBBI != E) { - ++Position; - ++MBBI; + // Sort candidates so they get processed from end to begin of the basic + // block later; This is necessary for liveness calculation. + auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) { + return M0->InsertPos < M1->InsertPos; + }; + std::sort(Candidates.begin(), Candidates.end(), LessThan); + + // Go through list of candidates and merge. + bool Changed = false; + for (const MergeCandidate *Candidate : Candidates) { + if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) { + MachineInstr *Merged = MergeOpsUpdate(*Candidate); + // Merge preceding/trailing base inc/dec into the merged op. + if (Merged) { + Changed = true; + unsigned Opcode = Merged->getOpcode(); + if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8) + MergeBaseUpdateLSDouble(*Merged); + else + MergeBaseUpdateLSMultiple(Merged); + } else { + for (MachineInstr *MI : Candidate->Instrs) { + if (MergeBaseUpdateLoadStore(MI)) + Changed = true; + } } + } else { + assert(Candidate->Instrs.size() == 1); + if (MergeBaseUpdateLoadStore(Candidate->Instrs.front())) + Changed = true; } } - return NumMerges > 0; + Candidates.clear(); + // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt. + for (MachineInstr *MI : MergeBaseCandidates) + MergeBaseUpdateLSDouble(*MI); + MergeBaseCandidates.clear(); + + return Changed; } /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr") @@ -1814,12 +1803,14 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { } bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { + MF = &Fn; STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); TL = STI->getTargetLowering(); AFI = Fn.getInfo<ARMFunctionInfo>(); TII = STI->getInstrInfo(); TRI = STI->getRegisterInfo(); - RS = new RegScavenger(); + MRI = &Fn.getRegInfo(); + RegClassInfoValid = false; isThumb2 = AFI->isThumb2Function(); isThumb1 = AFI->isThumbFunction() && !isThumb2; @@ -1832,7 +1823,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { Modified |= MergeReturnIntoLDM(MBB); } - delete RS; + Allocator.DestroyAll(); return Modified; } @@ -2219,7 +2210,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { continue; int Opc = MI->getOpcode(); - bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; + bool isLd = isLoadSingle(Opc); unsigned Base = MI->getOperand(1).getReg(); int Offset = getMemoryOpOffset(MI); diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp index a59cf9851108..6cafbbb9f8eb 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -18,12 +18,6 @@ using namespace llvm; #define DEBUG_TYPE "arm-selectiondag-info" -ARMSelectionDAGInfo::ARMSelectionDAGInfo(const DataLayout &DL) - : TargetSelectionDAGInfo(&DL) {} - -ARMSelectionDAGInfo::~ARMSelectionDAGInfo() { -} - // Emit, if possible, a specialized version of the given Libcall. Typically this // means selecting the appropriately aligned version, but we also convert memset // of 0 into memclr. @@ -83,7 +77,7 @@ EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl, TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Ty = TLI->getDataLayout()->getIntPtrType(*DAG.getContext()); + Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); Entry.Node = Dst; Args.push_back(Entry); if (AEABILibcall == AEABI_MEMCLR) { @@ -121,12 +115,14 @@ EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl, { "__aeabi_memclr", "__aeabi_memclr4", "__aeabi_memclr8" } }; TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(dl).setChain(Chain) - .setCallee(TLI->getLibcallCallingConv(LC), - Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant], - TLI->getPointerTy()), std::move(Args), 0) - .setDiscardResult(); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setCallee( + TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant], + TLI->getPointerTy(DAG.getDataLayout())), + std::move(Args), 0) + .setDiscardResult(); std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); return CallResult.second; diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h index 1db190f41e1a..289879ee1d7e 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.h +++ b/lib/Target/ARM/ARMSelectionDAGInfo.h @@ -37,8 +37,6 @@ namespace ARM_AM { class ARMSelectionDAGInfo : public TargetSelectionDAGInfo { public: - explicit ARMSelectionDAGInfo(const DataLayout &DL); - ~ARMSelectionDAGInfo(); SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 55808dfb9efe..002c3e9b6291 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -112,7 +112,6 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU, : ARMGenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others), ARMProcClass(None), stackAlignment(4), CPUString(CPU), IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options), TM(TM), - TSInfo(*TM.getDataLayout()), FrameLowering(initializeFrameLowering(CPU, FS)), // At this point initializeSubtargetDependencies has been called so // we can query directly. @@ -172,6 +171,7 @@ void ARMSubtarget::initializeEnvironment() { AllowsUnalignedMem = false; Thumb2DSP = false; UseNaClTrap = false; + GenLongCalls = false; UnsafeFPMath = false; } @@ -286,7 +286,7 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV, if (RelocM == Reloc::Static) return false; - bool isDecl = GV->isDeclarationForLinker(); + bool isDef = GV->isStrongDefinitionForLinker(); if (!isTargetMachO()) { // Extra load is needed for all externally visible. @@ -294,34 +294,22 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV, return false; return true; } else { - if (RelocM == Reloc::PIC_) { - // If this is a strong reference to a definition, it is definitely not - // through a stub. - if (!isDecl && !GV->isWeakForLinker()) - return false; - - // Unless we have a symbol with hidden visibility, we have to go through a - // normal $non_lazy_ptr stub because this symbol might be resolved late. - if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference. - return true; + // If this is a strong reference to a definition, it is definitely not + // through a stub. + if (isDef) + return false; + + // Unless we have a symbol with hidden visibility, we have to go through a + // normal $non_lazy_ptr stub because this symbol might be resolved late. + if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference. + return true; + if (RelocM == Reloc::PIC_) { // If symbol visibility is hidden, we have a stub for common symbol // references and external declarations. - if (isDecl || GV->hasCommonLinkage()) + if (GV->isDeclarationForLinker() || GV->hasCommonLinkage()) // Hidden $non_lazy_ptr reference. return true; - - return false; - } else { - // If this is a strong reference to a definition, it is definitely not - // through a stub. - if (!isDecl && !GV->isWeakForLinker()) - return false; - - // Unless we have a symbol with hidden visibility, we have to go through a - // normal $non_lazy_ptr stub because this symbol might be resolved late. - if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference. - return true; } } diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 9909a6a6d198..dd101df9b63d 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -206,6 +206,9 @@ protected: /// NaCl TRAP instruction is generated instead of the regular TRAP. bool UseNaClTrap; + /// Generate calls via indirect call instructions. + bool GenLongCalls; + /// Target machine allowed unsafe FP math (such as use of NEON fp) bool UnsafeFPMath; @@ -342,6 +345,7 @@ public: bool hasMPExtension() const { return HasMPExtension; } bool hasThumb2DSP() const { return Thumb2DSP; } bool useNaClTrap() const { return UseNaClTrap; } + bool genLongCalls() const { return GenLongCalls; } bool hasFP16() const { return HasFP16; } bool hasD16() const { return HasD16; } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 6e81bd2d349d..93495d66ae70 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -80,8 +80,7 @@ computeTargetABI(const Triple &TT, StringRef CPU, // FIXME: This is duplicated code from the front end and should be unified. if (TT.isOSBinFormatMachO()) { if (TT.getEnvironment() == llvm::Triple::EABI || - (TT.getOS() == llvm::Triple::UnknownOS && - TT.getObjectFormat() == llvm::Triple::MachO) || + (TT.getOS() == llvm::Triple::UnknownOS && TT.isOSBinFormatMachO()) || CPU.startswith("cortex-m")) { TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS; } else { @@ -104,8 +103,8 @@ computeTargetABI(const Triple &TT, StringRef CPU, TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS; break; default: - if (TT.getOS() == llvm::Triple::NetBSD) - TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS; + if (TT.isOSNetBSD()) + TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS; else TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS; break; diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index f4901fc24e44..2f194cf7ae06 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -61,14 +61,14 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND || ISD == ISD::FP_EXTEND)) { - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src); int Idx = CostTableLookup(NEONFltDblTbl, ISD, LT.second); if (Idx != -1) return LT.first * NEONFltDblTbl[Idx].Cost; } - EVT SrcTy = TLI->getValueType(Src); - EVT DstTy = TLI->getValueType(Dst); + EVT SrcTy = TLI->getValueType(DL, Src); + EVT DstTy = TLI->getValueType(DL, Dst); if (!SrcTy.isSimple() || !DstTy.isSimple()) return BaseT::getCastInstrCost(Opcode, Dst, Src); @@ -282,8 +282,8 @@ unsigned ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 } }; - EVT SelCondTy = TLI->getValueType(CondTy); - EVT SelValTy = TLI->getValueType(ValTy); + EVT SelCondTy = TLI->getValueType(DL, CondTy); + EVT SelValTy = TLI->getValueType(DL, ValTy); if (SelCondTy.isSimple() && SelValTy.isSimple()) { int Idx = ConvertCostTableLookup(NEONVectorSelectTbl, ISD, SelCondTy.getSimpleVT(), @@ -292,7 +292,7 @@ unsigned ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, return NEONVectorSelectTbl[Idx].Cost; } - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy); + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); return LT.first; } @@ -353,7 +353,7 @@ unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2}, {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}}; - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp); + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp); int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); if (Idx == -1) @@ -379,7 +379,7 @@ unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}}; - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp); + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp); int Idx = CostTableLookup(NEONAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); if (Idx == -1) @@ -395,7 +395,7 @@ unsigned ARMTTIImpl::getArithmeticInstrCost( TTI::OperandValueProperties Opd2PropInfo) { int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode); - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); const unsigned FunctionCallDivCost = 20; const unsigned ReciprocalDivCost = 10; @@ -468,7 +468,7 @@ unsigned ARMTTIImpl::getArithmeticInstrCost( unsigned ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) { - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src); if (Src->isVectorTy() && Alignment != 16 && Src->getVectorElementType()->isDoubleTy()) { @@ -488,12 +488,12 @@ unsigned ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, assert(isa<VectorType>(VecTy) && "Expect a vector type"); // vldN/vstN doesn't support vector types of i64/f64 element. - bool EltIs64Bits = DL->getTypeAllocSizeInBits(VecTy->getScalarType()) == 64; + bool EltIs64Bits = DL.getTypeAllocSizeInBits(VecTy->getScalarType()) == 64; if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits) { unsigned NumElts = VecTy->getVectorNumElements(); Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor); - unsigned SubVecSize = TLI->getDataLayout()->getTypeAllocSize(SubVecTy); + unsigned SubVecSize = DL.getTypeAllocSize(SubVecTy); // vldN/vstN only support legal vector types of size 64 or 128 in bits. if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128)) diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h index f2e5db655ccf..84f256f73722 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/lib/Target/ARM/ARMTargetTransformInfo.h @@ -42,7 +42,8 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> { public: explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, Function &F) - : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} + : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), + TLI(ST->getTargetLowering()) {} // Provide value semantics. MSVC requires that we spell all of these out. ARMTTIImpl(const ARMTTIImpl &Arg) @@ -50,18 +51,6 @@ public: ARMTTIImpl(ARMTTIImpl &&Arg) : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)), TLI(std::move(Arg.TLI)) {} - ARMTTIImpl &operator=(const ARMTTIImpl &RHS) { - BaseT::operator=(static_cast<const BaseT &>(RHS)); - ST = RHS.ST; - TLI = RHS.TLI; - return *this; - } - ARMTTIImpl &operator=(ARMTTIImpl &&RHS) { - BaseT::operator=(std::move(static_cast<BaseT &>(RHS))); - ST = std::move(RHS.ST); - TLI = std::move(RHS.TLI); - return *this; - } /// \name Scalar TTI Implementations /// @{ diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index c2db74619871..f8f0eb2d4baa 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -189,9 +189,9 @@ class ARMAsmParser : public MCTargetAsmParser { return getParser().Error(L, Msg, Ranges); } - bool validatetLDMRegList(MCInst Inst, const OperandVector &Operands, + bool validatetLDMRegList(const MCInst &Inst, const OperandVector &Operands, unsigned ListNo, bool IsARPop = false); - bool validatetSTMRegList(MCInst Inst, const OperandVector &Operands, + bool validatetSTMRegList(const MCInst &Inst, const OperandVector &Operands, unsigned ListNo); int tryParseRegister(); @@ -242,6 +242,8 @@ class ARMAsmParser : public MCTargetAsmParser { bool &CanAcceptCarrySet, bool &CanAcceptPredicationCode); + void tryConvertingToTwoOperandForm(StringRef Mnemonic, bool CarrySetting, + OperandVector &Operands); bool isThumb() const { // FIXME: Can tablegen auto-generate this? return STI.getFeatureBits()[ARM::ModeThumb]; @@ -5465,6 +5467,92 @@ void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst, CanAcceptPredicationCode = true; } +// \brief Some Thumb instructions have two operand forms that are not +// available as three operand, convert to two operand form if possible. +// +// FIXME: We would really like to be able to tablegen'erate this. +void ARMAsmParser::tryConvertingToTwoOperandForm(StringRef Mnemonic, + bool CarrySetting, + OperandVector &Operands) { + if (Operands.size() != 6) + return; + + const auto &Op3 = static_cast<ARMOperand &>(*Operands[3]); + auto &Op4 = static_cast<ARMOperand &>(*Operands[4]); + if (!Op3.isReg() || !Op4.isReg()) + return; + + auto Op3Reg = Op3.getReg(); + auto Op4Reg = Op4.getReg(); + + // For most Thumb2 cases we just generate the 3 operand form and reduce + // it in processInstruction(), but the 3 operand form of ADD (t2ADDrr) + // won't accept SP or PC so we do the transformation here taking care + // with immediate range in the 'add sp, sp #imm' case. + auto &Op5 = static_cast<ARMOperand &>(*Operands[5]); + if (isThumbTwo()) { + if (Mnemonic != "add") + return; + bool TryTransform = Op3Reg == ARM::PC || Op4Reg == ARM::PC || + (Op5.isReg() && Op5.getReg() == ARM::PC); + if (!TryTransform) { + TryTransform = (Op3Reg == ARM::SP || Op4Reg == ARM::SP || + (Op5.isReg() && Op5.getReg() == ARM::SP)) && + !(Op3Reg == ARM::SP && Op4Reg == ARM::SP && + Op5.isImm() && !Op5.isImm0_508s4()); + } + if (!TryTransform) + return; + } else if (!isThumbOne()) + return; + + if (!(Mnemonic == "add" || Mnemonic == "sub" || Mnemonic == "and" || + Mnemonic == "eor" || Mnemonic == "lsl" || Mnemonic == "lsr" || + Mnemonic == "asr" || Mnemonic == "adc" || Mnemonic == "sbc" || + Mnemonic == "ror" || Mnemonic == "orr" || Mnemonic == "bic")) + return; + + // If first 2 operands of a 3 operand instruction are the same + // then transform to 2 operand version of the same instruction + // e.g. 'adds r0, r0, #1' transforms to 'adds r0, #1' + bool Transform = Op3Reg == Op4Reg; + + // For communtative operations, we might be able to transform if we swap + // Op4 and Op5. The 'ADD Rdm, SP, Rdm' form is already handled specially + // as tADDrsp. + const ARMOperand *LastOp = &Op5; + bool Swap = false; + if (!Transform && Op5.isReg() && Op3Reg == Op5.getReg() && + ((Mnemonic == "add" && Op4Reg != ARM::SP) || + Mnemonic == "and" || Mnemonic == "eor" || + Mnemonic == "adc" || Mnemonic == "orr")) { + Swap = true; + LastOp = &Op4; + Transform = true; + } + + // If both registers are the same then remove one of them from + // the operand list, with certain exceptions. + if (Transform) { + // Don't transform 'adds Rd, Rd, Rm' or 'sub{s} Rd, Rd, Rm' because the + // 2 operand forms don't exist. + if (((Mnemonic == "add" && CarrySetting) || Mnemonic == "sub") && + LastOp->isReg()) + Transform = false; + + // Don't transform 'add/sub{s} Rd, Rd, #imm' if the immediate fits into + // 3-bits because the ARMARM says not to. + if ((Mnemonic == "add" || Mnemonic == "sub") && LastOp->isImm0_7()) + Transform = false; + } + + if (Transform) { + if (Swap) + std::swap(Op4, Op5); + Operands.erase(Operands.begin() + 3); + } +} + bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, OperandVector &Operands) { // FIXME: This is all horribly hacky. We really need a better way to deal @@ -5838,6 +5926,8 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, "VFP/Neon double precision register expected"); } + tryConvertingToTwoOperandForm(Mnemonic, CarrySetting, Operands); + // Some instructions, mostly Thumb, have forms for the same mnemonic that // do and don't have a cc_out optional-def operand. With some spot-checks // of the operand list, we can figure out which variant we're trying to @@ -5901,48 +5991,6 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, } } - // If first 2 operands of a 3 operand instruction are the same - // then transform to 2 operand version of the same instruction - // e.g. 'adds r0, r0, #1' transforms to 'adds r0, #1' - // FIXME: We would really like to be able to tablegen'erate this. - if (isThumbOne() && Operands.size() == 6 && - (Mnemonic == "add" || Mnemonic == "sub" || Mnemonic == "and" || - Mnemonic == "eor" || Mnemonic == "lsl" || Mnemonic == "lsr" || - Mnemonic == "asr" || Mnemonic == "adc" || Mnemonic == "sbc" || - Mnemonic == "ror" || Mnemonic == "orr" || Mnemonic == "bic")) { - ARMOperand &Op3 = static_cast<ARMOperand &>(*Operands[3]); - ARMOperand &Op4 = static_cast<ARMOperand &>(*Operands[4]); - ARMOperand &Op5 = static_cast<ARMOperand &>(*Operands[5]); - - // If both registers are the same then remove one of them from - // the operand list. - if (Op3.isReg() && Op4.isReg() && Op3.getReg() == Op4.getReg()) { - // If 3rd operand (variable Op5) is a register and the instruction is adds/sub - // then do not transform as the backend already handles this instruction - // correctly. - if (!Op5.isReg() || !((Mnemonic == "add" && CarrySetting) || Mnemonic == "sub")) { - Operands.erase(Operands.begin() + 3); - if (Mnemonic == "add" && !CarrySetting) { - // Special case for 'add' (not 'adds') instruction must - // remove the CCOut operand as well. - Operands.erase(Operands.begin() + 1); - } - } - } - } - - // If instruction is 'add' and first two register operands - // use SP register, then remove one of the SP registers from - // the instruction. - // FIXME: We would really like to be able to tablegen'erate this. - if (isThumbOne() && Operands.size() == 5 && Mnemonic == "add" && !CarrySetting) { - ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[2]); - ARMOperand &Op3 = static_cast<ARMOperand &>(*Operands[3]); - if (Op2.isReg() && Op3.isReg() && Op2.getReg() == ARM::SP && Op3.getReg() == ARM::SP) { - Operands.erase(Operands.begin() + 2); - } - } - // GNU Assembler extension (compatibility) if ((Mnemonic == "ldrd" || Mnemonic == "strd")) { ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[2]); @@ -5985,8 +6033,9 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // return 'true' if register list contains non-low GPR registers, // 'false' otherwise. If Reg is in the register list or is HiReg, set // 'containsReg' to true. -static bool checkLowRegisterList(MCInst Inst, unsigned OpNo, unsigned Reg, - unsigned HiReg, bool &containsReg) { +static bool checkLowRegisterList(const MCInst &Inst, unsigned OpNo, + unsigned Reg, unsigned HiReg, + bool &containsReg) { containsReg = false; for (unsigned i = OpNo; i < Inst.getNumOperands(); ++i) { unsigned OpReg = Inst.getOperand(i).getReg(); @@ -6001,8 +6050,8 @@ static bool checkLowRegisterList(MCInst Inst, unsigned OpNo, unsigned Reg, // Check if the specified regisgter is in the register list of the inst, // starting at the indicated operand number. -static bool listContainsReg(MCInst &Inst, unsigned OpNo, unsigned Reg) { - for (unsigned i = OpNo; i < Inst.getNumOperands(); ++i) { +static bool listContainsReg(const MCInst &Inst, unsigned OpNo, unsigned Reg) { + for (unsigned i = OpNo, e = Inst.getNumOperands(); i < e; ++i) { unsigned OpReg = Inst.getOperand(i).getReg(); if (OpReg == Reg) return true; @@ -6020,7 +6069,7 @@ static bool instIsBreakpoint(const MCInst &Inst) { } -bool ARMAsmParser::validatetLDMRegList(MCInst Inst, +bool ARMAsmParser::validatetLDMRegList(const MCInst &Inst, const OperandVector &Operands, unsigned ListNo, bool IsARPop) { const ARMOperand &Op = static_cast<const ARMOperand &>(*Operands[ListNo]); @@ -6043,7 +6092,7 @@ bool ARMAsmParser::validatetLDMRegList(MCInst Inst, return false; } -bool ARMAsmParser::validatetSTMRegList(MCInst Inst, +bool ARMAsmParser::validatetSTMRegList(const MCInst &Inst, const OperandVector &Operands, unsigned ListNo) { const ARMOperand &Op = static_cast<const ARMOperand &>(*Operands[ListNo]); @@ -8167,8 +8216,16 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, // If the destination and first source operand are the same, and // there's no setting of the flags, use encoding T2 instead of T3. // Note that this is only for ADD, not SUB. This mirrors the system - // 'as' behaviour. Make sure the wide encoding wasn't explicit. - if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() || + // 'as' behaviour. Also take advantage of ADD being commutative. + // Make sure the wide encoding wasn't explicit. + bool Swap = false; + auto DestReg = Inst.getOperand(0).getReg(); + bool Transform = DestReg == Inst.getOperand(1).getReg(); + if (!Transform && DestReg == Inst.getOperand(2).getReg()) { + Transform = true; + Swap = true; + } + if (!Transform || Inst.getOperand(5).getReg() != 0 || (static_cast<ARMOperand &>(*Operands[3]).isToken() && static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w")) @@ -8177,7 +8234,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, TmpInst.setOpcode(ARM::tADDhirr); TmpInst.addOperand(Inst.getOperand(0)); TmpInst.addOperand(Inst.getOperand(0)); - TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(Inst.getOperand(Swap ? 1 : 2)); TmpInst.addOperand(Inst.getOperand(3)); TmpInst.addOperand(Inst.getOperand(4)); Inst = TmpInst; @@ -9176,8 +9233,7 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) { return false; } - STI.InitMCProcessorInfo(CPU, ""); - STI.InitCPUSchedModel(CPU); + STI.setDefaultFeatures(CPU); setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); return false; diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 4d12bfb5d60f..d17fdb95dbdf 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -1362,7 +1362,7 @@ MCTargetStreamer *createARMNullTargetStreamer(MCStreamer &S) { MCTargetStreamer *createARMObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { const Triple &TT = STI.getTargetTriple(); - if (TT.getObjectFormat() == Triple::ELF) + if (TT.isOSBinFormatELF()) return new ARMTargetELFStreamer(S); return new ARMTargetStreamer(S); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index fafe25ae5be5..21c9fc1e58b2 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -31,7 +31,7 @@ using namespace llvm; #define GET_REGINFO_MC_DESC #include "ARMGenRegisterInfo.inc" -static bool getMCRDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI, +static bool getMCRDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI, std::string &Info) { if (STI.getFeatureBits()[llvm::ARM::HasV7Ops] && (MI.getOperand(0).isImm() && MI.getOperand(0).getImm() == 15) && @@ -63,7 +63,7 @@ static bool getMCRDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI, return false; } -static bool getITDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI, +static bool getITDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI, std::string &Info) { if (STI.getFeatureBits()[llvm::ARM::HasV8Ops] && MI.getOperand(1).isImm() && MI.getOperand(1).getImm() != 8) { @@ -75,7 +75,7 @@ static bool getITDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI, return false; } -static bool getARMStoreDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI, +static bool getARMStoreDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI, std::string &Info) { assert(!STI.getFeatureBits()[llvm::ARM::ModeThumb] && "cannot predicate thumb instructions"); @@ -92,7 +92,7 @@ static bool getARMStoreDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI, return false; } -static bool getARMLoadDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI, +static bool getARMLoadDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI, std::string &Info) { assert(!STI.getFeatureBits()[llvm::ARM::ModeThumb] && "cannot predicate thumb instructions"); @@ -257,9 +257,7 @@ MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(const Triple &TT, ArchFS = FS; } - MCSubtargetInfo *X = new MCSubtargetInfo(); - InitARMMCSubtargetInfo(X, TT, CPU, ArchFS); - return X; + return createARMMCSubtargetInfoImpl(TT, CPU, ArchFS); } static MCInstrInfo *createARMMCInstrInfo() { @@ -268,7 +266,7 @@ static MCInstrInfo *createARMMCInstrInfo() { return X; } -static MCRegisterInfo *createARMMCRegisterInfo(StringRef Triple) { +static MCRegisterInfo *createARMMCRegisterInfo(const Triple &Triple) { MCRegisterInfo *X = new MCRegisterInfo(); InitARMMCRegisterInfo(X, ARM::LR, 0, 0, ARM::PC); return X; @@ -279,10 +277,10 @@ static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI, MCAsmInfo *MAI; if (TheTriple.isOSDarwin() || TheTriple.isOSBinFormatMachO()) MAI = new ARMMCAsmInfoDarwin(TheTriple); - else if (TheTriple.isWindowsItaniumEnvironment()) - MAI = new ARMCOFFMCAsmInfoGNU(); else if (TheTriple.isWindowsMSVCEnvironment()) MAI = new ARMCOFFMCAsmInfoMicrosoft(); + else if (TheTriple.isOSWindows()) + MAI = new ARMCOFFMCAsmInfoGNU(); else MAI = new ARMELFMCAsmInfo(TheTriple); @@ -292,14 +290,13 @@ static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI, return MAI; } -static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM, +static MCCodeGenInfo *createARMMCCodeGenInfo(const Triple &TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); if (RM == Reloc::Default) { - Triple TheTriple(TT); // Default relocation model on Darwin is PIC, not DynamicNoPIC. - RM = TheTriple.isOSDarwin() ? Reloc::PIC_ : Reloc::DynamicNoPIC; + RM = TT.isOSDarwin() ? Reloc::PIC_ : Reloc::DynamicNoPIC; } X->initMCCodeGenInfo(RM, CM, OL); return X; diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index 77cd890e4cad..3b4358b5d9bf 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -365,7 +365,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, // frame pointer stack slot, the target is ELF and the function has FP, or // the target uses var sized objects. if (NumBytes) { - assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) && + assert(!MFI->getPristineRegs(MF).test(ARM::R4) && "No scratch register to restore SP from FP!"); emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, TII, *RegInfo); |