src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2015-08-07 23:01:33 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2015-08-07 23:01:33 +0000
commit	ee8648bdac07986a0f1ec897b02ec82a2f144d46 (patch)
tree	52d1861acda1205241ee35a94aa63129c604d469 /lib/Target/ARM
parent	1a82d4c088707c791c792f6822f611b47a12bdfe (diff)
download	src-ee8648bdac07986a0f1ec897b02ec82a2f144d46.tar.gz src-ee8648bdac07986a0f1ec897b02ec82a2f144d46.zip

Vendor import of llvm trunk r242221:vendor/llvm/llvm-trunk-r242221

https://llvm.org/svn/llvm-project/llvm/trunk@242221

Notes

Notes: svn path=/vendor/llvm/dist/; revision=286425 svn path=/vendor/llvm/llvm-trunk-r242221/; revision=286426; tag=vendor/llvm/llvm-trunk-r242221

Diffstat (limited to 'lib/Target/ARM')

-rw-r--r--

lib/Target/ARM/ARM.td

-rw-r--r--

lib/Target/ARM/ARMBaseInstrInfo.cpp

-rw-r--r--

lib/Target/ARM/ARMBaseRegisterInfo.cpp

-rw-r--r--

lib/Target/ARM/ARMCallingConv.td

-rw-r--r--

lib/Target/ARM/ARMFastISel.cpp

-rw-r--r--

lib/Target/ARM/ARMFrameLowering.cpp

-rw-r--r--

lib/Target/ARM/ARMFrameLowering.h

-rw-r--r--

lib/Target/ARM/ARMISelDAGToDAG.cpp

-rw-r--r--

lib/Target/ARM/ARMISelLowering.cpp

328

-rw-r--r--

lib/Target/ARM/ARMISelLowering.h

-rw-r--r--

lib/Target/ARM/ARMInstrInfo.td

-rw-r--r--

lib/Target/ARM/ARMLoadStoreOptimizer.cpp

1453

-rw-r--r--

lib/Target/ARM/ARMSelectionDAGInfo.cpp

-rw-r--r--

lib/Target/ARM/ARMSelectionDAGInfo.h

-rw-r--r--

lib/Target/ARM/ARMSubtarget.cpp

-rw-r--r--

lib/Target/ARM/ARMSubtarget.h

-rw-r--r--

lib/Target/ARM/ARMTargetMachine.cpp

-rw-r--r--

lib/Target/ARM/ARMTargetTransformInfo.cpp

-rw-r--r--

lib/Target/ARM/ARMTargetTransformInfo.h

-rw-r--r--

lib/Target/ARM/AsmParser/ARMAsmParser.cpp

166

-rw-r--r--

lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp

-rw-r--r--

lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp

-rw-r--r--

lib/Target/ARM/Thumb1FrameLowering.cpp

23 files changed, 1209 insertions, 1070 deletions

diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 96b4742da2bb..ef609a66d032 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td

@@ -150,6 +150,10 @@ def FeatureAClass : SubtargetFeature<"aclass", "ARMProcClass", "AClass",

def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true",

"NaCl trap">;

+def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true",

+ "Generate calls via indirect call "

+ "instructions">;

// ARM ISAs.

def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true",

"Support ARM v4T instructions">;

diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index b1a11d626bda..9f43e732bd73 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp

@@ -1230,8 +1230,7 @@ ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {

Reloc::Model RM = MF.getTarget().getRelocationModel();

if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {

- assert(getSubtarget().getTargetTriple().getObjectFormat() ==

- Triple::MachO &&

+ assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() &&

"LOAD_STACK_GUARD currently supported only for MachO.");

expandLoadStackGuard(MI, RM);

MI->getParent()->erase(MI);

diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 3f79a9b53d70..e7d5be7753e4 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp

@@ -127,7 +127,7 @@ ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,

BitVector ARMBaseRegisterInfo::

getReservedRegs(const MachineFunction &MF) const {

const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();

- const TargetFrameLowering *TFI = STI.getFrameLowering();

+ const ARMFrameLowering *TFI = getFrameLowering(MF);

// FIXME: avoid re-calculating this every time.

BitVector Reserved(getNumRegs());

@@ -194,7 +194,7 @@ unsigned

ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,

MachineFunction &MF) const {

const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();

- const TargetFrameLowering *TFI = STI.getFrameLowering();

+ const ARMFrameLowering *TFI = getFrameLowering(MF);

switch (RC->getID()) {

default:

@@ -302,7 +302,7 @@ ARMBaseRegisterInfo::updateRegAllocHint(unsigned Reg, unsigned NewReg,

bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {

const MachineFrameInfo *MFI = MF.getFrameInfo();

const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();

+ const ARMFrameLowering *TFI = getFrameLowering(MF);

// When outgoing call frames are so large that we adjust the stack pointer

// around the call, we can no longer use the stack pointer to reach the

@@ -333,6 +333,7 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {

bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {

const MachineRegisterInfo *MRI = &MF.getRegInfo();

const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

+ const ARMFrameLowering *TFI = getFrameLowering(MF);

// We can't realign the stack if:

// 1. Dynamic stack realignment is explicitly disabled,

// 2. This is a Thumb1 function (it's not useful, so we don't bother), or

@@ -347,7 +348,7 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {

return false;

// We may also need a base pointer if there are dynamic allocas or stack

// pointer adjustments around calls.

- if (MF.getSubtarget().getFrameLowering()->hasReservedCallFrame(MF))

+ if (TFI->hasReservedCallFrame(MF))

return true;

// A base pointer is required and allowed. Check that it isn't too late to

// reserve it.

@@ -357,9 +358,9 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {

bool ARMBaseRegisterInfo::

needsStackRealignment(const MachineFunction &MF) const {

const MachineFrameInfo *MFI = MF.getFrameInfo();

+ const ARMFrameLowering *TFI = getFrameLowering(MF);

const Function *F = MF.getFunction();

- unsigned StackAlign =

- MF.getSubtarget().getFrameLowering()->getStackAlignment();

+ unsigned StackAlign = TFI->getStackAlignment();

bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||

F->hasFnAttribute(Attribute::StackAlignment));

@@ -378,7 +379,7 @@ cannotEliminateFrame(const MachineFunction &MF) const {

unsigned

ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {

const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();

- const TargetFrameLowering *TFI = STI.getFrameLowering();

+ const ARMFrameLowering *TFI = getFrameLowering(MF);

if (TFI->hasFP(MF))

return getFramePointerReg(STI);

@@ -517,7 +518,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {

// Note that the incoming offset is based on the SP value at function entry,

// so it'll be negative.

MachineFunction &MF = *MI->getParent()->getParent();

- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();

+ const ARMFrameLowering *TFI = getFrameLowering(MF);

MachineFrameInfo *MFI = MF.getFrameInfo();

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

@@ -694,8 +695,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,

MachineFunction &MF = *MBB.getParent();

const ARMBaseInstrInfo &TII =

*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());

- const ARMFrameLowering *TFI = static_cast<const ARMFrameLowering *>(

- MF.getSubtarget().getFrameLowering());

+ const ARMFrameLowering *TFI = getFrameLowering(MF);

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

assert(!AFI->isThumb1OnlyFunction() &&

"This eliminateFrameIndex does not support Thumb1!");

diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index 7dd21ecbe91b..27cf06b995a0 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td

@@ -142,6 +142,9 @@ def CC_ARM_AAPCS : CallingConv<[

// Handles byval parameters.

CCIfByVal<CCPassByVal<4, 4>>,

+ // The 'nest' parameter, if any, is passed in R12.

+ CCIfNest<CCAssignToReg<[R12]>>,

// Handle all vector types as either f64 or v2f64.

CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,

CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,

diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 4175b4af86e6..fdd0763ea608 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp

@@ -49,8 +49,6 @@

#include "llvm/Target/TargetOptions.h"

using namespace llvm;

-extern cl::opt<bool> EnableARMLongCalls;

namespace {

// All possible address modes, plus some.

@@ -685,7 +683,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {

}

unsigned ARMFastISel::fastMaterializeConstant(const Constant *C) {

- EVT CEVT = TLI.getValueType(C->getType(), true);

+ EVT CEVT = TLI.getValueType(DL, C->getType(), true);

// Only handle simple types.

if (!CEVT.isSimple()) return 0;

@@ -732,7 +730,7 @@ unsigned ARMFastISel::fastMaterializeAlloca(const AllocaInst *AI) {

}

bool ARMFastISel::isTypeLegal(Type *Ty, MVT &VT) {

- EVT evt = TLI.getValueType(Ty, true);

+ EVT evt = TLI.getValueType(DL, Ty, true);

// Only handle simple types.

if (evt == MVT::Other || !evt.isSimple()) return false;

@@ -786,12 +784,13 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {

return ARMComputeAddress(U->getOperand(0), Addr);

case Instruction::IntToPtr:

// Look past no-op inttoptrs.

- if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())

+ if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==

+ TLI.getPointerTy(DL))

return ARMComputeAddress(U->getOperand(0), Addr);

break;

case Instruction::PtrToInt:

// Look past no-op ptrtoints.

- if (TLI.getValueType(U->getType()) == TLI.getPointerTy())

+ if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))

return ARMComputeAddress(U->getOperand(0), Addr);

break;

case Instruction::GetElementPtr: {

@@ -1365,7 +1364,7 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) {

bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,

bool isZExt) {

Type *Ty = Src1Value->getType();

- EVT SrcEVT = TLI.getValueType(Ty, true);

+ EVT SrcEVT = TLI.getValueType(DL, Ty, true);

if (!SrcEVT.isSimple()) return false;

MVT SrcVT = SrcEVT.getSimpleVT();

@@ -1557,7 +1556,7 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {

return false;

Value *Src = I->getOperand(0);

- EVT SrcEVT = TLI.getValueType(Src->getType(), true);

+ EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true);

if (!SrcEVT.isSimple())

return false;

MVT SrcVT = SrcEVT.getSimpleVT();

@@ -1750,7 +1749,7 @@ bool ARMFastISel::SelectRem(const Instruction *I, bool isSigned) {

}

bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {

- EVT DestVT = TLI.getValueType(I->getType(), true);

+ EVT DestVT = TLI.getValueType(DL, I->getType(), true);

// We can get here in the case when we have a binary operation on a non-legal

// type and the target independent selector doesn't know how to handle it.

@@ -1790,7 +1789,7 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {

}

bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {

- EVT FPVT = TLI.getValueType(I->getType(), true);

+ EVT FPVT = TLI.getValueType(DL, I->getType(), true);

if (!FPVT.isSimple()) return false;

MVT VT = FPVT.getSimpleVT();

@@ -2095,7 +2094,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) {

CallingConv::ID CC = F.getCallingConv();

if (Ret->getNumOperands() > 0) {

SmallVector<ISD::OutputArg, 4> Outs;

- GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);

+ GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);

// Analyze operands of the call, assigning locations to each operand.

SmallVector<CCValAssign, 16> ValLocs;

@@ -2122,7 +2121,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) {

return false;

unsigned SrcReg = Reg + VA.getValNo();

- EVT RVEVT = TLI.getValueType(RV->getType());

+ EVT RVEVT = TLI.getValueType(DL, RV->getType());

if (!RVEVT.isSimple()) return false;

MVT RVVT = RVEVT.getSimpleVT();

MVT DestVT = VA.getValVT();

@@ -2173,7 +2172,7 @@ unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) {

unsigned ARMFastISel::getLibcallReg(const Twine &Name) {

// Manually compute the global's type to avoid building it when unnecessary.

Type *GVTy = Type::getInt32PtrTy(*Context, /*AS=*/0);

- EVT LCREVT = TLI.getValueType(GVTy);

+ EVT LCREVT = TLI.getValueType(DL, GVTy);

if (!LCREVT.isSimple()) return 0;

GlobalValue *GV = new GlobalVariable(M, Type::getInt32Ty(*Context), false,

@@ -2246,19 +2245,19 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {

return false;

unsigned CalleeReg = 0;

- if (EnableARMLongCalls) {

+ if (Subtarget->genLongCalls()) {

CalleeReg = getLibcallReg(TLI.getLibcallName(Call));

if (CalleeReg == 0) return false;

}

// Issue the call.

- unsigned CallOpc = ARMSelectCallOp(EnableARMLongCalls);

+ unsigned CallOpc = ARMSelectCallOp(Subtarget->genLongCalls());

MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,

DbgLoc, TII.get(CallOpc));

// BL / BLX don't take a predicate, but tBL / tBLX do.

if (isThumb2)

AddDefaultPred(MIB);

- if (EnableARMLongCalls)

+ if (Subtarget->genLongCalls())

MIB.addReg(CalleeReg);

else

MIB.addExternalSymbol(TLI.getLibcallName(Call));

@@ -2380,7 +2379,7 @@ bool ARMFastISel::SelectCall(const Instruction *I,

bool UseReg = false;

const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);

- if (!GV || EnableARMLongCalls) UseReg = true;

+ if (!GV || Subtarget->genLongCalls()) UseReg = true;

unsigned CalleeReg = 0;

if (UseReg) {

@@ -2576,8 +2575,8 @@ bool ARMFastISel::SelectTrunc(const Instruction *I) {

Value *Op = I->getOperand(0);

EVT SrcVT, DestVT;

- SrcVT = TLI.getValueType(Op->getType(), true);

- DestVT = TLI.getValueType(I->getType(), true);

+ SrcVT = TLI.getValueType(DL, Op->getType(), true);

+ DestVT = TLI.getValueType(DL, I->getType(), true);

if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)

return false;

@@ -2742,8 +2741,8 @@ bool ARMFastISel::SelectIntExt(const Instruction *I) {

if (!SrcReg) return false;

EVT SrcEVT, DestEVT;

- SrcEVT = TLI.getValueType(SrcTy, true);

- DestEVT = TLI.getValueType(DestTy, true);

+ SrcEVT = TLI.getValueType(DL, SrcTy, true);

+ DestEVT = TLI.getValueType(DL, DestTy, true);

if (!SrcEVT.isSimple()) return false;

if (!DestEVT.isSimple()) return false;

@@ -2763,7 +2762,7 @@ bool ARMFastISel::SelectShift(const Instruction *I,

return false;

// Only handle i32 now.

- EVT DestVT = TLI.getValueType(I->getType(), true);

+ EVT DestVT = TLI.getValueType(DL, I->getType(), true);

if (DestVT != MVT::i32)

return false;

@@ -3026,7 +3025,7 @@ bool ARMFastISel::fastLowerArguments() {

if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())

return false;

- EVT ArgVT = TLI.getValueType(ArgTy);

+ EVT ArgVT = TLI.getValueType(DL, ArgTy);

if (!ArgVT.isSimple()) return false;

switch (ArgVT.getSimpleVT().SimpleTy) {

case MVT::i8:

diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index a52e49780e27..6744000afe2b 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp

@@ -800,7 +800,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,

// This is bad, if an interrupt is taken after the mov, sp is in an

// inconsistent state.

// Use the first callee-saved register as a scratch register.

- assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) &&

+ assert(!MFI->getPristineRegs(MF).test(ARM::R4) &&

"No scratch register to restore SP from FP!");

emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,

ARMCC::AL, 0, TII);

@@ -1470,7 +1470,8 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF,

// callee-saved vector registers after realigning the stack. The vst1 and vld1

// instructions take alignment hints that can improve performance.

-static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) {

+static void

+checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) {

MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);

if (!SpillAlignedNEONRegs)

return;

@@ -1497,10 +1498,9 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) {

// callee-saved registers in order, but it can happen that there are holes in

// the range. Registers above the hole will be spilled to the standard DPRCS

// area.

- MachineRegisterInfo &MRI = MF.getRegInfo();

unsigned NumSpills = 0;

for (; NumSpills < 8; ++NumSpills)

- if (!MRI.isPhysRegUsed(ARM::D8 + NumSpills))

+ if (!SavedRegs.test(ARM::D8 + NumSpills))

break;

// Don't do this for just one d-register. It's not worth it.

@@ -1511,12 +1511,13 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) {

MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);

// A scratch register is required for the vst1 / vld1 instructions.

- MF.getRegInfo().setPhysRegUsed(ARM::R4);

+ SavedRegs.set(ARM::R4);

}

-void

-ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,

- RegScavenger *RS) const {

+void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,

+ BitVector &SavedRegs,

+ RegScavenger *RS) const {

+ TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);

// This tells PEI to spill the FP as if it is any other callee-save register

// to take advantage the eliminateFrameIndex machinery. This also ensures it

// is spilled in the order specified by getCalleeSavedRegs() to make it easier

@@ -1543,12 +1544,12 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,

// FIXME: It will be better just to find spare register here.

if (AFI->isThumb2Function() &&

(MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))

- MRI.setPhysRegUsed(ARM::R4);

+ SavedRegs.set(ARM::R4);

if (AFI->isThumb1OnlyFunction()) {

// Spill LR if Thumb1 function uses variable length argument lists.

if (AFI->getArgRegsSaveSize() > 0)

- MRI.setPhysRegUsed(ARM::LR);

+ SavedRegs.set(ARM::LR);

// Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know

// for sure what the stack size will be, but for this, an estimate is good

@@ -1558,23 +1559,23 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,

// FIXME: It will be better just to find spare register here.

unsigned StackSize = MFI->estimateStackSize(MF);

if (MFI->hasVarSizedObjects() || StackSize > 508)

- MRI.setPhysRegUsed(ARM::R4);

+ SavedRegs.set(ARM::R4);

}

// See if we can spill vector registers to aligned stack.

- checkNumAlignedDPRCS2Regs(MF);

+ checkNumAlignedDPRCS2Regs(MF, SavedRegs);

// Spill the BasePtr if it's used.

if (RegInfo->hasBasePointer(MF))

- MRI.setPhysRegUsed(RegInfo->getBaseRegister());

+ SavedRegs.set(RegInfo->getBaseRegister());

// Don't spill FP if the frame can be eliminated. This is determined

- // by scanning the callee-save registers to see if any is used.

+ // by scanning the callee-save registers to see if any is modified.

const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);

for (unsigned i = 0; CSRegs[i]; ++i) {

unsigned Reg = CSRegs[i];

bool Spilled = false;

- if (MRI.isPhysRegUsed(Reg)) {

+ if (SavedRegs.test(Reg)) {

Spilled = true;

CanEliminateFrame = false;

}

@@ -1668,7 +1669,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,

// If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.

// Spill LR as well so we can fold BX_RET to the registers restore (LDM).

if (!LRSpilled && CS1Spilled) {

- MRI.setPhysRegUsed(ARM::LR);

+ SavedRegs.set(ARM::LR);

NumGPRSpills++;

SmallVectorImpl<unsigned>::iterator LRPos;

LRPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(),

@@ -1681,7 +1682,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,

}

if (hasFP(MF)) {

- MRI.setPhysRegUsed(FramePtr);

+ SavedRegs.set(FramePtr);

auto FPPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(),

FramePtr);

if (FPPos != UnspilledCS1GPRs.end())

@@ -1700,7 +1701,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,

// Don't spill high register if the function is thumb

if (!AFI->isThumbFunction() ||

isARMLowRegister(Reg) || Reg == ARM::LR) {

- MRI.setPhysRegUsed(Reg);

+ SavedRegs.set(Reg);

if (!MRI.isReserved(Reg))

ExtraCSSpill = true;

break;

@@ -1708,7 +1709,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,

}

} else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {

unsigned Reg = UnspilledCS2GPRs.front();

- MRI.setPhysRegUsed(Reg);

+ SavedRegs.set(Reg);

if (!MRI.isReserved(Reg))

ExtraCSSpill = true;

}

@@ -1747,7 +1748,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,

}

if (Extras.size() && NumExtras == 0) {

for (unsigned i = 0, e = Extras.size(); i != e; ++i) {

- MRI.setPhysRegUsed(Extras[i]);

+ SavedRegs.set(Extras[i]);

}

} else if (!AFI->isThumb1OnlyFunction()) {

// note: Thumb1 functions spill to R12, not the stack. Reserve a slot

@@ -1761,7 +1762,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,

}

if (ForceLRSpill) {

- MRI.setPhysRegUsed(ARM::LR);

+ SavedRegs.set(ARM::LR);

AFI->setLRIsSpilledForFarJump(true);

}

diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h
index d763d17a506f..6fdc5eff5e47 100644
--- a/lib/Target/ARM/ARMFrameLowering.h
+++ b/lib/Target/ARM/ARMFrameLowering.h

@@ -54,8 +54,8 @@ public:

unsigned &FrameReg, int SPAdj) const;

int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;

- void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,

- RegScavenger *RS) const override;

+ void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,

+ RegScavenger *RS) const override;

void adjustForSegmentedStacks(MachineFunction &MF,

MachineBasicBlock &MBB) const override;

diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 50afb192b331..b110628a0a86 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp

@@ -533,7 +533,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,

if (N.getOpcode() == ISD::FrameIndex) {

// Match frame index.

int FI = cast<FrameIndexSDNode>(N)->getIndex();

- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());

+ Base = CurDAG->getTargetFrameIndex(

+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));

OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);

return true;

}

@@ -556,7 +557,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,

Base = N.getOperand(0);

if (Base.getOpcode() == ISD::FrameIndex) {

int FI = cast<FrameIndexSDNode>(Base)->getIndex();

- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());

+ Base = CurDAG->getTargetFrameIndex(

+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));

}

OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);

return true;

@@ -702,7 +704,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,

Base = N;

if (N.getOpcode() == ISD::FrameIndex) {

int FI = cast<FrameIndexSDNode>(N)->getIndex();

- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());

+ Base = CurDAG->getTargetFrameIndex(

+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));

} else if (N.getOpcode() == ARMISD::Wrapper &&

N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {

Base = N.getOperand(0);

@@ -722,7 +725,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,

Base = N.getOperand(0);

if (Base.getOpcode() == ISD::FrameIndex) {

int FI = cast<FrameIndexSDNode>(Base)->getIndex();

- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());

+ Base = CurDAG->getTargetFrameIndex(

+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));

}

Offset = CurDAG->getRegister(0, MVT::i32);

@@ -900,7 +904,8 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,

Base = N;

if (N.getOpcode() == ISD::FrameIndex) {

int FI = cast<FrameIndexSDNode>(N)->getIndex();

- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());

+ Base = CurDAG->getTargetFrameIndex(

+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));

}

Offset = CurDAG->getRegister(0, MVT::i32);

Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),

@@ -915,7 +920,8 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,

Base = N.getOperand(0);

if (Base.getOpcode() == ISD::FrameIndex) {

int FI = cast<FrameIndexSDNode>(Base)->getIndex();

- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());

+ Base = CurDAG->getTargetFrameIndex(

+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));

}

Offset = CurDAG->getRegister(0, MVT::i32);

@@ -964,7 +970,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,

Base = N;

if (N.getOpcode() == ISD::FrameIndex) {

int FI = cast<FrameIndexSDNode>(N)->getIndex();

- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());

+ Base = CurDAG->getTargetFrameIndex(

+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));

} else if (N.getOpcode() == ARMISD::Wrapper &&

N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {

Base = N.getOperand(0);

@@ -981,7 +988,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,

Base = N.getOperand(0);

if (Base.getOpcode() == ISD::FrameIndex) {

int FI = cast<FrameIndexSDNode>(Base)->getIndex();

- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());

+ Base = CurDAG->getTargetFrameIndex(

+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));

}

ARM_AM::AddrOpc AddSub = ARM_AM::add;

@@ -1215,7 +1223,8 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,

MachineFrameInfo *MFI = MF->getFrameInfo();

if (MFI->getObjectAlignment(FI) < 4)

MFI->setObjectAlignment(FI, 4);

- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());

+ Base = CurDAG->getTargetFrameIndex(

+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));

OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);

return true;

}

@@ -1237,7 +1246,8 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,

MachineFrameInfo *MFI = MF->getFrameInfo();

if (MFI->getObjectAlignment(FI) < 4)

MFI->setObjectAlignment(FI, 4);

- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());

+ Base = CurDAG->getTargetFrameIndex(

+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));

}

OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);

return true;

@@ -1285,7 +1295,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,

if (N.getOpcode() == ISD::FrameIndex) {

// Match frame index.

int FI = cast<FrameIndexSDNode>(N)->getIndex();

- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());

+ Base = CurDAG->getTargetFrameIndex(

+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));

OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);

return true;

}

@@ -1314,7 +1325,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,

Base = N.getOperand(0);

if (Base.getOpcode() == ISD::FrameIndex) {

int FI = cast<FrameIndexSDNode>(Base)->getIndex();

- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());

+ Base = CurDAG->getTargetFrameIndex(

+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));

}

OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);

return true;

@@ -1343,7 +1355,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,

Base = N.getOperand(0);

if (Base.getOpcode() == ISD::FrameIndex) {

int FI = cast<FrameIndexSDNode>(Base)->getIndex();

- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());

+ Base = CurDAG->getTargetFrameIndex(

+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));

}

OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);

return true;

@@ -1438,7 +1451,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,

Base = N.getOperand(0);

if (Base.getOpcode() == ISD::FrameIndex) {

int FI = cast<FrameIndexSDNode>(Base)->getIndex();

- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());

+ Base = CurDAG->getTargetFrameIndex(

+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));

}

OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);

@@ -2510,7 +2524,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {

if (UseCP) {

SDValue CPIdx = CurDAG->getTargetConstantPool(

ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),

- TLI->getPointerTy());

+ TLI->getPointerTy(CurDAG->getDataLayout()));

SDNode *ResNode;

if (Subtarget->isThumb()) {

@@ -2540,7 +2554,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {

case ISD::FrameIndex: {

// Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.

int FI = cast<FrameIndexSDNode>(N)->getIndex();

- SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());

+ SDValue TFI = CurDAG->getTargetFrameIndex(

+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));

if (Subtarget->isThumb1Only()) {

// Set the alignment of the frame object to 4, to avoid having to generate

// more than one ADD

diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 4b2105b7442f..e335784f6d87 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp

@@ -60,11 +60,6 @@ STATISTIC(NumTailCalls, "Number of tail calls");

STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");

STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");

-cl::opt<bool>

-EnableARMLongCalls("arm-long-calls", cl::Hidden,

- cl::desc("Generate calls via indirect call instructions"),

- cl::init(false));

static cl::opt<bool>

ARMInterworking("arm-interworking", cl::Hidden,

cl::desc("Enable / disable ARM interworking (for debugging only)"),

@@ -548,6 +543,27 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,

setOperationAction(ISD::CTPOP, MVT::v4i16, Custom);

setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);

+ // NEON does not have single instruction CTTZ for vectors.

+ setOperationAction(ISD::CTTZ, MVT::v8i8, Custom);

+ setOperationAction(ISD::CTTZ, MVT::v4i16, Custom);

+ setOperationAction(ISD::CTTZ, MVT::v2i32, Custom);

+ setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);

+ setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);

+ setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);

+ setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);

+ setOperationAction(ISD::CTTZ, MVT::v2i64, Custom);

+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i8, Custom);

+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i16, Custom);

+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i32, Custom);

+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v1i64, Custom);

+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom);

+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom);

+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);

+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);

// NEON only has FMA instructions as of VFP4.

if (!Subtarget->hasVFP4()) {

setOperationAction(ISD::FMA, MVT::v2f32, Expand);

@@ -1149,8 +1165,10 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {

return nullptr;

}

-EVT ARMTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {

- if (!VT.isVector()) return getPointerTy();

+EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,

+ EVT VT) const {

+ if (!VT.isVector())

+ return getPointerTy(DL);

return VT.changeVectorElementTypeToInteger();

}

@@ -1429,7 +1447,8 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,

ISD::ArgFlagsTy Flags) const {

unsigned LocMemOffset = VA.getLocMemOffset();

SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);

- PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);

+ PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),

+ StackPtr, PtrOff);

return DAG.getStore(Chain, dl, Arg, PtrOff,

MachinePointerInfo::getStack(LocMemOffset),

false, false, 0);

@@ -1453,7 +1472,8 @@ void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG,

else {

assert(NextVA.isMemLoc());

if (!StackPtr.getNode())

- StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());

+ StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,

+ getPointerTy(DAG.getDataLayout()));

MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),

dl, DAG, NextVA,

@@ -1526,7 +1546,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,

Chain = DAG.getCALLSEQ_START(Chain,

DAG.getIntPtrConstant(NumBytes, dl, true), dl);

- SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());

+ SDValue StackPtr =

+ DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));

RegsToPassVector RegsToPass;

SmallVector<SDValue, 8> MemOpChains;

@@ -1607,7 +1628,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,

unsigned RegBegin, RegEnd;

CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);

- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();

+ EVT PtrVT =

+ DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());

unsigned int i, j;

for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {

SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);

@@ -1628,12 +1650,12 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,

}

if (Flags.getByValSize() > 4*offset) {

+ auto PtrVT = getPointerTy(DAG.getDataLayout());

unsigned LocMemOffset = VA.getLocMemOffset();

SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);

- SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,

- StkPtrOff);

+ SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff);

SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);

- SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);

+ SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);

SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,

MVT::i32);

SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl,

@@ -1693,8 +1715,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,

bool isARMFunc = false;

bool isLocalARMFunc = false;

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

+ auto PtrVt = getPointerTy(DAG.getDataLayout());

- if (EnableARMLongCalls) {

+ if (Subtarget->genLongCalls()) {

assert((Subtarget->isTargetWindows() ||

getTargetMachine().getRelocationModel() == Reloc::Static) &&

"long-calls with non-static relocation model!");

@@ -1709,12 +1732,11 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,

ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);

// Get the address of the callee into a register

- SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);

+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);

CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);

- Callee = DAG.getLoad(getPointerTy(), dl,

- DAG.getEntryNode(), CPAddr,

- MachinePointerInfo::getConstantPool(),

- false, false, false, 0);

+ Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr,

+ MachinePointerInfo::getConstantPool(), false, false,

+ false, 0);

} else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {

const char *Sym = S->getSymbol();

@@ -1724,29 +1746,28 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,

ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,

ARMPCLabelIndex, 0);

// Get the address of the callee into a register

- SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);

+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);

CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);

- Callee = DAG.getLoad(getPointerTy(), dl,

- DAG.getEntryNode(), CPAddr,

- MachinePointerInfo::getConstantPool(),

- false, false, false, 0);

+ Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr,

+ MachinePointerInfo::getConstantPool(), false, false,

+ false, 0);

}

} else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {

const GlobalValue *GV = G->getGlobal();

isDirect = true;

- bool isExt = GV->isDeclaration() || GV->isWeakForLinker();

- bool isStub = (isExt && Subtarget->isTargetMachO()) &&

+ bool isDef = GV->isStrongDefinitionForLinker();

+ bool isStub = (!isDef && Subtarget->isTargetMachO()) &&

getTargetMachine().getRelocationModel() != Reloc::Static;

isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());

// ARM call to a local ARM function is predicable.

- isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);

+ isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);

// tBX takes a register source operand.

if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {

assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");

- Callee = DAG.getNode(ARMISD::WrapperPIC, dl, getPointerTy(),

- DAG.getTargetGlobalAddress(GV, dl, getPointerTy(),

- 0, ARMII::MO_NONLAZY));

- Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee,

+ Callee = DAG.getNode(

+ ARMISD::WrapperPIC, dl, PtrVt,

+ DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));

+ Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), Callee,

MachinePointerInfo::getGOT(), false, false, true, 0);

} else if (Subtarget->isTargetCOFF()) {

assert(Subtarget->isTargetWindows() &&

@@ -1754,20 +1775,20 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,

unsigned TargetFlags = GV->hasDLLImportStorageClass()

? ARMII::MO_DLLIMPORT

: ARMII::MO_NO_FLAG;

- Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), /*Offset=*/0,

- TargetFlags);

+ Callee =

+ DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0, TargetFlags);

if (GV->hasDLLImportStorageClass())

- Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),

- DAG.getNode(ARMISD::Wrapper, dl, getPointerTy(),

- Callee), MachinePointerInfo::getGOT(),

- false, false, false, 0);

+ Callee =

+ DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),

+ DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),

+ MachinePointerInfo::getGOT(), false, false, false, 0);

} else {

// On ELF targets for PIC code, direct calls should go through the PLT

unsigned OpFlags = 0;

if (Subtarget->isTargetELF() &&

getTargetMachine().getRelocationModel() == Reloc::PIC_)

OpFlags = ARMII::MO_PLT;

- Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);

+ Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, OpFlags);

}

} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {

isDirect = true;

@@ -1781,22 +1802,20 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,

ARMConstantPoolValue *CPV =

ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,

ARMPCLabelIndex, 4);

- SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);

+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);

CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);

- Callee = DAG.getLoad(getPointerTy(), dl,

- DAG.getEntryNode(), CPAddr,

- MachinePointerInfo::getConstantPool(),

- false, false, false, 0);

+ Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr,

+ MachinePointerInfo::getConstantPool(), false, false,

+ false, 0);

SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);

- Callee = DAG.getNode(ARMISD::PIC_ADD, dl,

- getPointerTy(), Callee, PICLabel);

+ Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);

} else {

unsigned OpFlags = 0;

// On ELF targets for PIC code, direct calls should go through the PLT

if (Subtarget->isTargetELF() &&

getTargetMachine().getRelocationModel() == Reloc::PIC_)

OpFlags = ARMII::MO_PLT;

- Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags);

+ Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, OpFlags);

}

@@ -2433,7 +2452,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

unsigned ARMPCLabelIndex = 0;

SDLoc DL(Op);

- EVT PtrVT = getPointerTy();

+ EVT PtrVT = getPointerTy(DAG.getDataLayout());

const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();

Reloc::Model RelocM = getTargetMachine().getRelocationModel();

SDValue CPAddr;

@@ -2462,7 +2481,7 @@ SDValue

ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,

SelectionDAG &DAG) const {

SDLoc dl(GA);

- EVT PtrVT = getPointerTy();

+ EVT PtrVT = getPointerTy(DAG.getDataLayout());

unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;

MachineFunction &MF = DAG.getMachineFunction();

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

@@ -2508,7 +2527,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,

SDLoc dl(GA);

SDValue Offset;

SDValue Chain = DAG.getEntryNode();

- EVT PtrVT = getPointerTy();

+ EVT PtrVT = getPointerTy(DAG.getDataLayout());

// Get the Thread Pointer

SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);

@@ -2574,7 +2593,7 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {

SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,

SelectionDAG &DAG) const {

- EVT PtrVT = getPointerTy();

+ EVT PtrVT = getPointerTy(DAG.getDataLayout());

SDLoc dl(Op);

const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();

if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {

@@ -2617,7 +2636,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,

SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,

SelectionDAG &DAG) const {

- EVT PtrVT = getPointerTy();

+ EVT PtrVT = getPointerTy(DAG.getDataLayout());

SDLoc dl(Op);

const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();

Reloc::Model RelocM = getTargetMachine().getRelocationModel();

@@ -2648,7 +2667,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,

const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();

const ARMII::TOF TargetFlags =

(GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG);

- EVT PtrVT = getPointerTy();

+ EVT PtrVT = getPointerTy(DAG.getDataLayout());

SDValue Result;

SDLoc DL(Op);

@@ -2672,7 +2691,7 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,

MachineFunction &MF = DAG.getMachineFunction();

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

unsigned ARMPCLabelIndex = AFI->createPICLabelUId();

- EVT PtrVT = getPointerTy();

+ EVT PtrVT = getPointerTy(DAG.getDataLayout());

SDLoc dl(Op);

unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;

ARMConstantPoolValue *CPV =

@@ -2716,14 +2735,14 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,

return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(1));

}

case Intrinsic::arm_thread_pointer: {

- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();

+ EVT PtrVT = getPointerTy(DAG.getDataLayout());

return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);

}

case Intrinsic::eh_sjlj_lsda: {

MachineFunction &MF = DAG.getMachineFunction();

ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

unsigned ARMPCLabelIndex = AFI->createPICLabelUId();

- EVT PtrVT = getPointerTy();

+ EVT PtrVT = getPointerTy(DAG.getDataLayout());

Reloc::Model RelocM = getTargetMachine().getRelocationModel();

SDValue CPAddr;

unsigned PCAdj = (RelocM != Reloc::PIC_)

@@ -2820,7 +2839,7 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {

// vastart just stores the address of the VarArgsFrameIndex slot into the

// memory location argument.

SDLoc dl(Op);

- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();

+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());

SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);

const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();

return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),

@@ -2850,7 +2869,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,

int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true);

// Create load node to retrieve arguments from the stack.

- SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());

+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));

ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,

MachinePointerInfo::getFixedStack(FI),

false, false, false, 0);

@@ -2904,8 +2923,9 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,

if (REnd != RBegin)

ArgOffset = -4 * (ARM::R4 - RBegin);

+ auto PtrVT = getPointerTy(DAG.getDataLayout());

int FrameIndex = MFI->CreateFixedObject(ArgSize, ArgOffset, false);

- SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());

+ SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);

SmallVector<SDValue, 4> MemOps;

const TargetRegisterClass *RC =

@@ -2918,8 +2938,7 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,

DAG.getStore(Val.getValue(1), dl, Val, FIN,

MachinePointerInfo(OrigArg, 4 * i), false, false, 0);

MemOps.push_back(Store);

- FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,

- DAG.getConstant(4, dl, getPointerTy()));

+ FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));

}

if (!MemOps.empty())

@@ -3013,6 +3032,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,

unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);

AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);

+ auto PtrVT = getPointerTy(DAG.getDataLayout());

for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {

CCValAssign &VA = ArgLocs[i];

@@ -3035,7 +3055,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,

SDValue ArgValue2;

if (VA.isMemLoc()) {

int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);

- SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());

+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);

ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,

MachinePointerInfo::getFixedStack(FI),

false, false, false, 0);

@@ -3122,7 +3142,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,

int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, CurOrigArg,

CurByValIndex, VA.getLocMemOffset(),

Flags.getByValSize());

- InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy()));

+ InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));

CCInfo.nextInRegsParam();

} else {

unsigned FIOffset = VA.getLocMemOffset();

@@ -3130,7 +3150,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,

FIOffset, true);

// Create load nodes to retrieve arguments from the stack.

- SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());

+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);

InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,

MachinePointerInfo::getFixedStack(FI),

false, false, false, 0));

@@ -3855,7 +3875,7 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {

SDValue Index = Op.getOperand(2);

SDLoc dl(Op);

- EVT PTy = getPointerTy();

+ EVT PTy = getPointerTy(DAG.getDataLayout());

JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);

SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);

Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);

@@ -4102,8 +4122,8 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {

// FIXME? Maybe this could be a TableGen attribute on some registers and

// this table could be generated automatically from RegInfo.

-unsigned ARMTargetLowering::getRegisterByName(const char* RegName,

- EVT VT) const {

+unsigned ARMTargetLowering::getRegisterByName(const char* RegName, EVT VT,

+ SelectionDAG &DAG) const {

unsigned Reg = StringSwitch<unsigned>(RegName)

.Case("sp", ARM::SP)

.Default(0);

@@ -4163,7 +4183,7 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {

// Turn f64->i64 into VMOVRRD.

if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {

SDValue Cvt;

- if (TLI.isBigEndian() && SrcVT.isVector() &&

+ if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() &&

SrcVT.getVectorNumElements() > 1)

Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,

DAG.getVTList(MVT::i32, MVT::i32),

@@ -4283,8 +4303,82 @@ SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,

static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,

const ARMSubtarget *ST) {

- EVT VT = N->getValueType(0);

SDLoc dl(N);

+ EVT VT = N->getValueType(0);

+ if (VT.isVector()) {

+ assert(ST->hasNEON());

+ // Compute the least significant set bit: LSB = X & -X

+ SDValue X = N->getOperand(0);

+ SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X);

+ SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX);

+ EVT ElemTy = VT.getVectorElementType();

+ if (ElemTy == MVT::i8) {

+ // Compute with: cttz(x) = ctpop(lsb - 1)

+ SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,

+ DAG.getTargetConstant(1, dl, ElemTy));

+ SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);

+ return DAG.getNode(ISD::CTPOP, dl, VT, Bits);

+ }

+ if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) &&

+ (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {

+ // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0

+ unsigned NumBits = ElemTy.getSizeInBits();

+ SDValue WidthMinus1 =

+ DAG.getNode(ARMISD::VMOVIMM, dl, VT,

+ DAG.getTargetConstant(NumBits - 1, dl, ElemTy));

+ SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB);

+ return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ);

+ }

+ // Compute with: cttz(x) = ctpop(lsb - 1)

+ // Since we can only compute the number of bits in a byte with vcnt.8, we

+ // have to gather the result with pairwise addition (vpaddl) for i16, i32,

+ // and i64.

+ // Compute LSB - 1.

+ SDValue Bits;

+ if (ElemTy == MVT::i64) {

+ // Load constant 0xffff'ffff'ffff'ffff to register.

+ SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT,

+ DAG.getTargetConstant(0x1eff, dl, MVT::i32));

+ Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF);

+ } else {

+ SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,

+ DAG.getTargetConstant(1, dl, ElemTy));

+ Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);

+ }

+ // Count #bits with vcnt.8.

+ EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;

+ SDValue BitsVT8 = DAG.getNode(ISD::BITCAST, dl, VT8Bit, Bits);

+ SDValue Cnt8 = DAG.getNode(ISD::CTPOP, dl, VT8Bit, BitsVT8);

+ // Gather the #bits with vpaddl (pairwise add.)

+ EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;

+ SDValue Cnt16 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT16Bit,

+ DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),

+ Cnt8);

+ if (ElemTy == MVT::i16)

+ return Cnt16;

+ EVT VT32Bit = VT.is64BitVector() ? MVT::v2i32 : MVT::v4i32;

+ SDValue Cnt32 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT32Bit,

+ DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),

+ Cnt16);

+ if (ElemTy == MVT::i32)

+ return Cnt32;

+ assert(ElemTy == MVT::i64);

+ SDValue Cnt64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,

+ DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),

+ Cnt32);

+ return Cnt64;

+ }

if (!ST->hasV6T2Ops())

return SDValue();

@@ -4730,7 +4824,7 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,

ImmMask <<= 1;

}

- if (DAG.getTargetLoweringInfo().isBigEndian())

+ if (DAG.getDataLayout().isBigEndian())

// swap higher and lower 32 bit word

Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4);

@@ -5868,7 +5962,7 @@ static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,

if (BVN->getValueType(0) != MVT::v4i32 ||

BVN->getOpcode() != ISD::BUILD_VECTOR)

return false;

- unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;

+ unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;

unsigned HiElt = 1 - LoElt;

ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));

ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));

@@ -6013,7 +6107,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {

SDNode *BVN = N->getOperand(0).getNode();

assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&

BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");

- unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;

+ unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;

return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), MVT::v2i32,

BVN->getOperand(LowElt), BVN->getOperand(LowElt+2));

}

@@ -6342,18 +6436,19 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {

SDValue Arg = Op.getOperand(0);

EVT ArgVT = Arg.getValueType();

Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());

+ auto PtrVT = getPointerTy(DAG.getDataLayout());

MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();

- const TargetLowering &TLI = DAG.getTargetLoweringInfo();

// Pair of floats / doubles used to pass the result.

StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr);

// Create stack object for sret.

- const uint64_t ByteSize = TLI.getDataLayout()->getTypeAllocSize(RetTy);

- const unsigned StackAlign = TLI.getDataLayout()->getPrefTypeAlignment(RetTy);

+ auto &DL = DAG.getDataLayout();

+ const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);

+ const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy);

int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);

- SDValue SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy());

+ SDValue SRet = DAG.getFrameIndex(FrameIdx, getPointerTy(DL));

ArgListTy Args;

ArgListEntry Entry;

@@ -6373,7 +6468,7 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {

const char *LibcallName = (ArgVT == MVT::f64)

? "__sincos_stret" : "__sincosf_stret";

- SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());

+ SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));

TargetLowering::CallLoweringInfo CLI(DAG);

CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())

@@ -6387,7 +6482,7 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {

MachinePointerInfo(), false, false, false, 0);

// Address of cos field.

- SDValue Add = DAG.getNode(ISD::ADD, dl, getPointerTy(), SRet,

+ SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet,

DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));

SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add,

MachinePointerInfo(), false, false, false, 0);

@@ -6487,7 +6582,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {

case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);

case ISD::SRL_PARTS:

case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);

- case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget);

+ case ISD::CTTZ:

+ case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);

case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);

case ISD::SETCC: return LowerVSETCC(Op, DAG);

case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);

@@ -6845,9 +6941,9 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,

const Constant *C = ConstantInt::get(Int32Ty, NumLPads);

// MachineConstantPool wants an explicit alignment.

- unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty);

+ unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);

if (Align == 0)

- Align = getDataLayout()->getTypeAllocSize(C->getType());

+ Align = MF->getDataLayout().getTypeAllocSize(C->getType());

unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);

unsigned VReg1 = MRI->createVirtualRegister(TRC);

@@ -6935,9 +7031,9 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,

const Constant *C = ConstantInt::get(Int32Ty, NumLPads);

// MachineConstantPool wants an explicit alignment.

- unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty);

+ unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);

if (Align == 0)

- Align = getDataLayout()->getTypeAllocSize(C->getType());

+ Align = MF->getDataLayout().getTypeAllocSize(C->getType());

unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);

unsigned VReg1 = MRI->createVirtualRegister(TRC);

@@ -7313,9 +7409,9 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,

const Constant *C = ConstantInt::get(Int32Ty, LoopSize);

// MachineConstantPool wants an explicit alignment.

- unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty);

+ unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);

if (Align == 0)

- Align = getDataLayout()->getTypeAllocSize(C->getType());

+ Align = MF->getDataLayout().getTypeAllocSize(C->getType());

unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);

if (IsThumb1)

@@ -8001,7 +8097,7 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,

// Build operand list.

SmallVector<SDValue, 8> Ops;

Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, dl,

- TLI.getPointerTy()));

+ TLI.getPointerTy(DAG.getDataLayout())));

// Input is the vector.

Ops.push_back(Vec);

@@ -8681,7 +8777,7 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,

std::min(4U, LD->getAlignment() / 2));

DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));

- if (DCI.DAG.getTargetLoweringInfo().isBigEndian())

+ if (DCI.DAG.getDataLayout().isBigEndian())

std::swap (NewLD1, NewLD2);

SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);

return Result;

@@ -9312,7 +9408,9 @@ static SDValue PerformSTORECombine(SDNode *N,

SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);

SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);

for (unsigned i = 0; i < NumElems; ++i)

- ShuffleVec[i] = TLI.isBigEndian() ? (i+1) * SizeRatio - 1 : i * SizeRatio;

+ ShuffleVec[i] = DAG.getDataLayout().isBigEndian()

+ ? (i + 1) * SizeRatio - 1

+ : i * SizeRatio;

// Can't shuffle using an illegal type.

if (!TLI.isTypeLegal(WideVecVT)) return SDValue();

@@ -9339,8 +9437,8 @@ static SDValue PerformSTORECombine(SDNode *N,

assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());

SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);

SmallVector<SDValue, 8> Chains;

- SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8, DL,

- TLI.getPointerTy());

+ SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL,

+ TLI.getPointerTy(DAG.getDataLayout()));

SDValue BasePtr = St->getBasePtr();

// Perform one or more big stores into memory.

@@ -9367,7 +9465,7 @@ static SDValue PerformSTORECombine(SDNode *N,

if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&

StVal.getNode()->hasOneUse()) {

SelectionDAG &DAG = DCI.DAG;

- bool isBigEndian = DAG.getTargetLoweringInfo().isBigEndian();

+ bool isBigEndian = DAG.getDataLayout().isBigEndian();

SDLoc DL(St);

SDValue BasePtr = St->getBasePtr();

SDValue NewST1 = DAG.getStore(St->getChain(), DL,

@@ -10078,7 +10176,7 @@ bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,

// For any little-endian targets with neon, we can support unaligned ld/st

// of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.

// A big-endian target may also explicitly support unaligned accesses

- if (Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian())) {

+ if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) {

if (Fast)

*Fast = true;

return true;

@@ -10317,10 +10415,10 @@ bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,

/// isLegalAddressingMode - Return true if the addressing mode represented

/// by AM is legal for this target, for a load/store of the specified type.

-bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,

- Type *Ty,

+bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL,

+ const AddrMode &AM, Type *Ty,

unsigned AS) const {

- EVT VT = getValueType(Ty, true);

+ EVT VT = getValueType(DL, Ty, true);

if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))

return false;

@@ -10664,7 +10762,7 @@ bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const {

/// getConstraintType - Given a constraint letter, return the type of

/// constraint it is for this target.

ARMTargetLowering::ConstraintType

-ARMTargetLowering::getConstraintType(const std::string &Constraint) const {

+ARMTargetLowering::getConstraintType(StringRef Constraint) const {

if (Constraint.size() == 1) {

switch (Constraint[0]) {

default: break;

@@ -10723,10 +10821,8 @@ ARMTargetLowering::getSingleConstraintMatchWeight(

}

typedef std::pair<unsigned, const TargetRegisterClass*> RCPair;

-RCPair

-ARMTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,

- const std::string &Constraint,

- MVT VT) const {

+RCPair ARMTargetLowering::getRegForInlineAsmConstraint(

+ const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {

if (Constraint.size() == 1) {

// GCC ARM Constraint Letters

switch (Constraint[0]) {

@@ -10974,7 +11070,7 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {

}

SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),

- getPointerTy());

+ getPointerTy(DAG.getDataLayout()));

Type *RetTy = (Type*)StructType::get(Ty, Ty, nullptr);

@@ -11083,7 +11179,8 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,

case Intrinsic::arm_neon_vld4lane: {

Info.opc = ISD::INTRINSIC_W_CHAIN;

// Conservatively set memVT to the entire set of vectors loaded.

- uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8;

+ auto &DL = I.getCalledFunction()->getParent()->getDataLayout();

+ uint64_t NumElts = DL.getTypeAllocSize(I.getType()) / 8;

Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);

Info.ptrVal = I.getArgOperand(0);

Info.offset = 0;

@@ -11103,12 +11200,13 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,

case Intrinsic::arm_neon_vst4lane: {

Info.opc = ISD::INTRINSIC_VOID;

// Conservatively set memVT to the entire set of vectors stored.

+ auto &DL = I.getCalledFunction()->getParent()->getDataLayout();

unsigned NumElts = 0;

for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {

Type *ArgTy = I.getArgOperand(ArgI)->getType();

if (!ArgTy->isVectorTy())

break;

- NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8;

+ NumElts += DL.getTypeAllocSize(ArgTy) / 8;

}

Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);

Info.ptrVal = I.getArgOperand(0);

@@ -11122,12 +11220,13 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,

}

case Intrinsic::arm_ldaex:

case Intrinsic::arm_ldrex: {

+ auto &DL = I.getCalledFunction()->getParent()->getDataLayout();

PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());

Info.opc = ISD::INTRINSIC_W_CHAIN;

Info.memVT = MVT::getVT(PtrTy->getElementType());

Info.ptrVal = I.getArgOperand(0);

Info.offset = 0;

- Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType());

+ Info.align = DL.getABITypeAlignment(PtrTy->getElementType());

Info.vol = true;

Info.readMem = true;

Info.writeMem = false;

@@ -11135,12 +11234,13 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,

}

case Intrinsic::arm_stlex:

case Intrinsic::arm_strex: {

+ auto &DL = I.getCalledFunction()->getParent()->getDataLayout();

PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());

Info.opc = ISD::INTRINSIC_W_CHAIN;

Info.memVT = MVT::getVT(PtrTy->getElementType());

Info.ptrVal = I.getArgOperand(1);

Info.offset = 0;

- Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType());

+ Info.align = DL.getABITypeAlignment(PtrTy->getElementType());

Info.vol = true;

Info.readMem = false;

Info.writeMem = true;

@@ -11427,9 +11527,9 @@ bool ARMTargetLowering::lowerInterleavedLoad(

VectorType *VecTy = Shuffles[0]->getType();

Type *EltTy = VecTy->getVectorElementType();

- const DataLayout *DL = getDataLayout();

- unsigned VecSize = DL->getTypeAllocSizeInBits(VecTy);

- bool EltIs64Bits = DL->getTypeAllocSizeInBits(EltTy) == 64;

+ const DataLayout &DL = LI->getModule()->getDataLayout();

+ unsigned VecSize = DL.getTypeAllocSizeInBits(VecTy);

+ bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64;

// Skip illegal vector types and vector types of i64/f64 element (vldN doesn't

// support i64/f64 element).

@@ -11439,8 +11539,8 @@ bool ARMTargetLowering::lowerInterleavedLoad(

// A pointer vector can not be the return type of the ldN intrinsics. Need to

// load integer vectors first and then convert to pointer vectors.

if (EltTy->isPointerTy())

- VecTy = VectorType::get(DL->getIntPtrType(EltTy),

- VecTy->getVectorNumElements());

+ VecTy =

+ VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements());

static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,

Intrinsic::arm_neon_vld3,

@@ -11517,9 +11617,9 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,

Type *EltTy = VecTy->getVectorElementType();

VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts);

- const DataLayout *DL = getDataLayout();

- unsigned SubVecSize = DL->getTypeAllocSizeInBits(SubVecTy);

- bool EltIs64Bits = DL->getTypeAllocSizeInBits(EltTy) == 64;

+ const DataLayout &DL = SI->getModule()->getDataLayout();

+ unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy);

+ bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64;

// Skip illegal sub vector types and vector types of i64/f64 element (vstN

// doesn't support i64/f64 element).

@@ -11533,7 +11633,7 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,

// StN intrinsics don't support pointer vectors as arguments. Convert pointer

// vectors to integer vectors.

if (EltTy->isPointerTy()) {

- Type *IntTy = DL->getIntPtrType(EltTy);

+ Type *IntTy = DL.getIntPtrType(EltTy);

// Convert to the corresponding integer vector.

Type *IntVecTy =

diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 74396392f8e3..efc9020c193a 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h

@@ -249,7 +249,8 @@ namespace llvm {

}

/// getSetCCResultType - Return the value type to use for ISD::SETCC.

- EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;

+ EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,

+ EVT VT) const override;

MachineBasicBlock *

EmitInstrWithCustomInserter(MachineInstr *MI,

@@ -286,8 +287,8 @@ namespace llvm {

/// isLegalAddressingMode - Return true if the addressing mode represented

/// by AM is legal for this target, for a load/store of the specified type.

- bool isLegalAddressingMode(const AddrMode &AM, Type *Ty,

- unsigned AS) const override;

+ bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,

+ Type *Ty, unsigned AS) const override;

bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const;

/// isLegalICmpImmediate - Return true if the specified immediate is legal

@@ -324,8 +325,7 @@ namespace llvm {

bool ExpandInlineAsm(CallInst *CI) const override;

- ConstraintType

- getConstraintType(const std::string &Constraint) const override;

+ ConstraintType getConstraintType(StringRef Constraint) const override;

/// Examine constraint string and operand type and determine a weight value.

/// The operand object must already have been set up with the operand type.

@@ -334,8 +334,7 @@ namespace llvm {

std::pair<unsigned, const TargetRegisterClass *>

getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,

- const std::string &Constraint,

- MVT VT) const override;

+ StringRef Constraint, MVT VT) const override;

/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops

/// vector. If it is invalid, don't add anything to Ops. If hasMemory is

@@ -345,8 +344,8 @@ namespace llvm {

std::vector<SDValue> &Ops,

SelectionDAG &DAG) const override;

- unsigned getInlineAsmMemConstraint(

- const std::string &ConstraintCode) const override {

+ unsigned

+ getInlineAsmMemConstraint(StringRef ConstraintCode) const override {

if (ConstraintCode == "Q")

return InlineAsm::Constraint_Q;

else if (ConstraintCode.size() == 2) {

@@ -533,7 +532,8 @@ namespace llvm {

SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;

SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;

- unsigned getRegisterByName(const char* RegName, EVT VT) const override;

+ unsigned getRegisterByName(const char* RegName, EVT VT,

+ SelectionDAG &DAG) const override;

/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster

/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be

diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index b8cac135baf6..61c45af26fe1 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td

@@ -306,8 +306,8 @@ def HasSlowVDUP32 : Predicate<"Subtarget->isSwift()">;

def UseVMOVSR : Predicate<"Subtarget->isCortexA9() || !Subtarget->useNEONForSinglePrecisionFP()">;

def DontUseVMOVSR : Predicate<"!Subtarget->isCortexA9() && Subtarget->useNEONForSinglePrecisionFP()">;

-def IsLE : Predicate<"getTargetLowering()->isLittleEndian()">;

-def IsBE : Predicate<"getTargetLowering()->isBigEndian()">;

+def IsLE : Predicate<"MF->getDataLayout().isLittleEndian()">;

+def IsBE : Predicate<"MF->getDataLayout().isBigEndian()">;

//===----------------------------------------------------------------------===//

// ARM Flag Definitions.

diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 245c9e869bf6..37352810c99f 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp

@@ -31,11 +31,13 @@

#include "llvm/CodeGen/MachineInstr.h"

#include "llvm/CodeGen/MachineInstrBuilder.h"

#include "llvm/CodeGen/MachineRegisterInfo.h"

-#include "llvm/CodeGen/RegisterScavenging.h"

+#include "llvm/CodeGen/RegisterClassInfo.h"

#include "llvm/CodeGen/SelectionDAGNodes.h"

+#include "llvm/CodeGen/LivePhysRegs.h"

#include "llvm/IR/DataLayout.h"

#include "llvm/IR/DerivedTypes.h"

#include "llvm/IR/Function.h"

+#include "llvm/Support/Allocator.h"

#include "llvm/Support/Debug.h"

#include "llvm/Support/ErrorHandling.h"

#include "llvm/Support/raw_ostream.h"

@@ -65,12 +67,18 @@ namespace {

static char ID;

ARMLoadStoreOpt() : MachineFunctionPass(ID) {}

+ const MachineFunction *MF;

const TargetInstrInfo *TII;

const TargetRegisterInfo *TRI;

+ const MachineRegisterInfo *MRI;

const ARMSubtarget *STI;

const TargetLowering *TL;

ARMFunctionInfo *AFI;

- RegScavenger *RS;

+ LivePhysRegs LiveRegs;

+ RegisterClassInfo RegClassInfo;

+ MachineBasicBlock::const_iterator LiveRegPos;

+ bool LiveRegsValid;

+ bool RegClassInfoValid;

bool isThumb1, isThumb2;

bool runOnMachineFunction(MachineFunction &Fn) override;

@@ -80,64 +88,60 @@ namespace {

}

private:

+ /// A set of load/store MachineInstrs with same base register sorted by

+ /// offset.

struct MemOpQueueEntry {

- int Offset;

- unsigned Reg;

- bool isKill;

- unsigned Position;

- MachineBasicBlock::iterator MBBI;

- bool Merged;

- MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,

- MachineBasicBlock::iterator i)

- : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}

+ MachineInstr *MI;

+ int Offset; ///< Load/Store offset.

+ unsigned Position; ///< Position as counted from end of basic block.

+ MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position)

+ : MI(MI), Offset(Offset), Position(Position) {}

};

typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;

- typedef MemOpQueue::iterator MemOpQueueIter;

- void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,

- const MemOpQueue &MemOps, unsigned DefReg,

- unsigned RangeBegin, unsigned RangeEnd);

+ /// A set of MachineInstrs that fulfill (nearly all) conditions to get

+ /// merged into a LDM/STM.

+ struct MergeCandidate {

+ /// List of instructions ordered by load/store offset.

+ SmallVector<MachineInstr*, 4> Instrs;

+ /// Index in Instrs of the instruction being latest in the schedule.

+ unsigned LatestMIIdx;

+ /// Index in Instrs of the instruction being earliest in the schedule.

+ unsigned EarliestMIIdx;

+ /// Index into the basic block where the merged instruction will be

+ /// inserted. (See MemOpQueueEntry.Position)

+ unsigned InsertPos;

+ /// Whether the instructions can be merged into a ldm/stm instruction.

+ bool CanMergeToLSMulti;

+ /// Whether the instructions can be merged into a ldrd/strd instruction.

+ bool CanMergeToLSDouble;

+ };

+ SpecificBumpPtrAllocator<MergeCandidate> Allocator;

+ SmallVector<const MergeCandidate*,4> Candidates;

+ SmallVector<MachineInstr*,4> MergeBaseCandidates;

+ void moveLiveRegsBefore(const MachineBasicBlock &MBB,

+ MachineBasicBlock::const_iterator Before);

+ unsigned findFreeReg(const TargetRegisterClass &RegClass);

void UpdateBaseRegUses(MachineBasicBlock &MBB,

MachineBasicBlock::iterator MBBI,

- DebugLoc dl, unsigned Base, unsigned WordOffset,

+ DebugLoc DL, unsigned Base, unsigned WordOffset,

ARMCC::CondCodes Pred, unsigned PredReg);

- bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,

- int Offset, unsigned Base, bool BaseKill, unsigned Opcode,

- ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,

- DebugLoc dl,

- ArrayRef<std::pair<unsigned, bool> > Regs,

- ArrayRef<unsigned> ImpDefs);

- void MergeOpsUpdate(MachineBasicBlock &MBB,

- MemOpQueue &MemOps,

- unsigned memOpsBegin,

- unsigned memOpsEnd,

- unsigned insertAfter,

- int Offset,

- unsigned Base,

- bool BaseKill,

- unsigned Opcode,

- ARMCC::CondCodes Pred,

- unsigned PredReg,

- unsigned Scratch,

- DebugLoc dl,

- SmallVectorImpl<MachineBasicBlock::iterator> &Merges);

- void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,

- unsigned Opcode, unsigned Size,

- ARMCC::CondCodes Pred, unsigned PredReg,

- unsigned Scratch, MemOpQueue &MemOps,

- SmallVectorImpl<MachineBasicBlock::iterator> &Merges);

- void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);

+ MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB,

+ MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,

+ bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,

+ DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs);

+ MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB,

+ MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,

+ bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,

+ DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const;

+ void FormCandidates(const MemOpQueue &MemOps);

+ MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);

bool FixInvalidRegPairOp(MachineBasicBlock &MBB,

MachineBasicBlock::iterator &MBBI);

- bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,

- MachineBasicBlock::iterator MBBI,

- const TargetInstrInfo *TII,

- bool &Advance,

- MachineBasicBlock::iterator &I);

- bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,

- MachineBasicBlock::iterator MBBI,

- bool &Advance,

- MachineBasicBlock::iterator &I);

+ bool MergeBaseUpdateLoadStore(MachineInstr *MI);

+ bool MergeBaseUpdateLSMultiple(MachineInstr *MI);

+ bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;

bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);

bool MergeReturnIntoLDM(MachineBasicBlock &MBB);

};

@@ -185,6 +189,14 @@ static int getMemoryOpOffset(const MachineInstr *MI) {

return Offset;

}

+static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) {

+ return MI.getOperand(1);

+static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) {

+ return MI.getOperand(0);

static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {

switch (Opcode) {

default: llvm_unreachable("Unhandled opcode!");

@@ -348,6 +360,10 @@ static bool isi32Store(unsigned Opc) {

return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);

}

+static bool isLoadSingle(unsigned Opc) {

+ return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;

static unsigned getImmScale(unsigned Opc) {

switch (Opc) {

default: llvm_unreachable("Unhandled opcode!");

@@ -365,12 +381,55 @@ static unsigned getImmScale(unsigned Opc) {

}

+static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {

+ switch (MI->getOpcode()) {

+ default: return 0;

+ case ARM::LDRi12:

+ case ARM::STRi12:

+ case ARM::tLDRi:

+ case ARM::tSTRi:

+ case ARM::tLDRspi:

+ case ARM::tSTRspi:

+ case ARM::t2LDRi8:

+ case ARM::t2LDRi12:

+ case ARM::t2STRi8:

+ case ARM::t2STRi12:

+ case ARM::VLDRS:

+ case ARM::VSTRS:

+ return 4;

+ case ARM::VLDRD:

+ case ARM::VSTRD:

+ return 8;

+ case ARM::LDMIA:

+ case ARM::LDMDA:

+ case ARM::LDMDB:

+ case ARM::LDMIB:

+ case ARM::STMIA:

+ case ARM::STMDA:

+ case ARM::STMDB:

+ case ARM::STMIB:

+ case ARM::tLDMIA:

+ case ARM::tLDMIA_UPD:

+ case ARM::tSTMIA_UPD:

+ case ARM::t2LDMIA:

+ case ARM::t2LDMDB:

+ case ARM::t2STMIA:

+ case ARM::t2STMDB:

+ case ARM::VLDMSIA:

+ case ARM::VSTMSIA:

+ return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;

+ case ARM::VLDMDIA:

+ case ARM::VSTMDIA:

+ return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;

+ }

/// Update future uses of the base register with the offset introduced

/// due to writeback. This function only works on Thumb1.

void

ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,

MachineBasicBlock::iterator MBBI,

- DebugLoc dl, unsigned Base,

+ DebugLoc DL, unsigned Base,

unsigned WordOffset,

ARMCC::CondCodes Pred, unsigned PredReg) {

assert(isThumb1 && "Can only update base register uses for Thumb1!");

@@ -398,7 +457,7 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,

Offset = MO.getImm() - WordOffset * getImmScale(Opc);

// If storing the base register, it needs to be reset first.

- unsigned InstrSrcReg = MBBI->getOperand(0).getReg();

+ unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();

if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))

MO.setImm(Offset);

@@ -439,7 +498,7 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,

if (InsertSub) {

// An instruction above couldn't be updated, so insert a sub.

- AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)

+ AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)

.addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);

return;

}

@@ -457,31 +516,65 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,

// See PR21029.

if (MBBI != MBB.end()) --MBBI;

AddDefaultT1CC(

- BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)

+ BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)

.addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);

}

+/// Return the first register of class \p RegClass that is not in \p Regs.

+unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {

+ if (!RegClassInfoValid) {

+ RegClassInfo.runOnMachineFunction(*MF);

+ RegClassInfoValid = true;

+ }

+ for (unsigned Reg : RegClassInfo.getOrder(&RegClass))

+ if (!LiveRegs.contains(Reg))

+ return Reg;

+ return 0;

+/// Compute live registers just before instruction \p Before (in normal schedule

+/// direction). Computes backwards so multiple queries in the same block must

+/// come in reverse order.

+void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,

+ MachineBasicBlock::const_iterator Before) {

+ // Initialize if we never queried in this block.

+ if (!LiveRegsValid) {

+ LiveRegs.init(TRI);

+ LiveRegs.addLiveOuts(&MBB, true);

+ LiveRegPos = MBB.end();

+ LiveRegsValid = true;

+ }

+ // Move backward just before the "Before" position.

+ while (LiveRegPos != Before) {

+ --LiveRegPos;

+ LiveRegs.stepBackward(*LiveRegPos);

+ }

+static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,

+ unsigned Reg) {

+ for (const std::pair<unsigned, bool> &R : Regs)

+ if (R.first == Reg)

+ return true;

+ return false;

/// Create and insert a LDM or STM with Base as base register and registers in

/// Regs as the register operands that would be loaded / stored. It returns

/// true if the transformation is done.

-bool

-ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,

- MachineBasicBlock::iterator MBBI,

- int Offset, unsigned Base, bool BaseKill,

- unsigned Opcode, ARMCC::CondCodes Pred,

- unsigned PredReg, unsigned Scratch, DebugLoc dl,

- ArrayRef<std::pair<unsigned, bool> > Regs,

- ArrayRef<unsigned> ImpDefs) {

- // Only a single register to load / store. Don't bother.

+MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB,

+ MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,

+ bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,

+ DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) {

unsigned NumRegs = Regs.size();

- if (NumRegs <= 1)

- return false;

+ assert(NumRegs > 1);

// For Thumb1 targets, it might be necessary to clobber the CPSR to merge.

// Compute liveness information for that register to make the decision.

bool SafeToClobberCPSR = !isThumb1 ||

- (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) ==

+ (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==

MachineBasicBlock::LQR_Dead);

bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.

@@ -489,17 +582,14 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,

// Exception: If the base register is in the input reglist, Thumb1 LDM is

// non-writeback.

// It's also not possible to merge an STR of the base register in Thumb1.

- if (isThumb1)

- for (const std::pair<unsigned, bool> &R : Regs)

- if (Base == R.first) {

- assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");

- if (Opcode == ARM::tLDRi) {

- Writeback = false;

- break;

- } else if (Opcode == ARM::tSTRi) {

- return false;

- }

+ if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) {

+ assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");

+ if (Opcode == ARM::tLDRi) {

+ Writeback = false;

+ } else if (Opcode == ARM::tSTRi) {

+ return nullptr;

+ }

ARM_AM::AMSubMode Mode = ARM_AM::ia;

// VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.

@@ -516,18 +606,18 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,

} else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {

// Check if this is a supported opcode before inserting instructions to

// calculate a new base register.

- if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;

+ if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;

// If starting offset isn't zero, insert a MI to materialize a new base.

// But only do so if it is cost effective, i.e. merging more than two

// loads / stores.

if (NumRegs <= 2)

- return false;

+ return nullptr;

// On Thumb1, it's not worth materializing a new base register without

// clobbering the CPSR (i.e. not using ADDS/SUBS).

if (!SafeToClobberCPSR)

- return false;

+ return nullptr;

unsigned NewBase;

if (isi32Load(Opcode)) {

@@ -535,10 +625,17 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,

// use as the new base.

NewBase = Regs[NumRegs-1].first;

} else {

- // Use the scratch register to use as a new base.

- NewBase = Scratch;

+ // Find a free register that we can use as scratch register.

+ moveLiveRegsBefore(MBB, InsertBefore);

+ // The merged instruction does not exist yet but will use several Regs if

+ // it is a Store.

+ if (!isLoadSingle(Opcode))

+ for (const std::pair<unsigned, bool> &R : Regs)

+ LiveRegs.addReg(R.first);

+ NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);

if (NewBase == 0)

- return false;

+ return nullptr;

}

int BaseOpc =

@@ -557,7 +654,12 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,

if (!TL->isLegalAddImmediate(Offset))

// FIXME: Try add with register operand?

- return false; // Probably not worth it then.

+ return nullptr; // Probably not worth it then.

+ // We can only append a kill flag to the add/sub input if the value is not

+ // used in the register list of the stm as well.

+ bool KillOldBase = BaseKill &&

+ (!isi32Store(Opcode) || !ContainsReg(Regs, Base));

if (isThumb1) {

// Thumb1: depending on immediate size, use either

@@ -572,43 +674,44 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,

!STI->hasV6Ops()) {

// thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr

if (Pred != ARMCC::AL)

- return false;

- BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase)

- .addReg(Base, getKillRegState(BaseKill));

+ return nullptr;

+ BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)

+ .addReg(Base, getKillRegState(KillOldBase));

} else

- BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)

- .addReg(Base, getKillRegState(BaseKill))

+ BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)

+ .addReg(Base, getKillRegState(KillOldBase))

.addImm(Pred).addReg(PredReg);

- // Set up BaseKill and Base correctly to insert the ADDS/SUBS below.

+ // The following ADDS/SUBS becomes an update.

Base = NewBase;

- BaseKill = false;

+ KillOldBase = true;

}

if (BaseOpc == ARM::tADDrSPi) {

assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");

- BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)

- .addReg(Base, getKillRegState(BaseKill)).addImm(Offset/4)

+ BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)

+ .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4)

.addImm(Pred).addReg(PredReg);

} else

- AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true)

- .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)

+ AddDefaultT1CC(

+ BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true)

+ .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)

.addImm(Pred).addReg(PredReg);

} else {

- BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)

- .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)

+ BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)

+ .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)

.addImm(Pred).addReg(PredReg).addReg(0);

}

Base = NewBase;

BaseKill = true; // New base is always killed straight away.

}

- bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||

- Opcode == ARM::VLDRD);

+ bool isDef = isLoadSingle(Opcode);

// Get LS multiple opcode. Note that for Thumb1 this might be an opcode with

// base register writeback.

Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);

- if (!Opcode) return false;

+ if (!Opcode)

+ return nullptr;

// Check if a Thumb1 LDM/STM merge is safe. This is the case if:

// - There is no writeback (LDM of base register),

@@ -619,7 +722,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,

// It's safe to return here since the code to materialize a new base register

// above is also conditional on SafeToClobberCPSR.

if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)

- return false;

+ return nullptr;

MachineInstrBuilder MIB;

@@ -628,7 +731,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,

// Update tLDMIA with writeback if necessary.

Opcode = ARM::tLDMIA_UPD;

- MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));

+ MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));

// Thumb1: we might need to set base writeback when building the MI.

MIB.addReg(Base, getDefRegState(true))

@@ -637,381 +740,257 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,

// The base isn't dead after a merged instruction with writeback.

// Insert a sub instruction after the newly formed instruction to reset.

if (!BaseKill)

- UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);

+ UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);

} else {

// No writeback, simply build the MachineInstr.

- MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));

+ MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));

MIB.addReg(Base, getKillRegState(BaseKill));

}

MIB.addImm(Pred).addReg(PredReg);

for (const std::pair<unsigned, bool> &R : Regs)

- MIB = MIB.addReg(R.first, getDefRegState(isDef)

- | getKillRegState(R.second));

+ MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));

- // Add implicit defs for super-registers.

- for (unsigned ImpDef : ImpDefs)

- MIB.addReg(ImpDef, RegState::ImplicitDefine);

- return true;

+ return MIB.getInstr();

}

-/// Find all instructions using a given imp-def within a range.

-///

-/// We are trying to combine a range of instructions, one of which (located at

-/// position RangeBegin) implicitly defines a register. The final LDM/STM will

-/// be placed at RangeEnd, and so any uses of this definition between RangeStart

-/// and RangeEnd must be modified to use an undefined value.

-///

-/// The live range continues until we find a second definition or one of the

-/// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so

-/// we must consider all uses and decide which are relevant in a second pass.

-void ARMLoadStoreOpt::findUsesOfImpDef(

- SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,

- unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {

- std::map<unsigned, MachineOperand *> Uses;

- unsigned LastLivePos = RangeEnd;

- // First we find all uses of this register with Position between RangeBegin

- // and RangeEnd, any or all of these could be uses of a definition at

- // RangeBegin. We also record the latest position a definition at RangeBegin

- // would be considered live.

- for (unsigned i = 0; i < MemOps.size(); ++i) {

- MachineInstr &MI = *MemOps[i].MBBI;

- unsigned MIPosition = MemOps[i].Position;

- if (MIPosition <= RangeBegin || MIPosition > RangeEnd)

- continue;

- // If this instruction defines the register, then any later use will be of

- // that definition rather than ours.

- if (MI.definesRegister(DefReg))

- LastLivePos = std::min(LastLivePos, MIPosition);

- MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);

- if (!UseOp)

- continue;

- // If this instruction kills the register then (assuming liveness is

- // correct when we start) we don't need to think about anything after here.

- if (UseOp->isKill())

- LastLivePos = std::min(LastLivePos, MIPosition);

- Uses[MIPosition] = UseOp;

- }

- // Now we traverse the list of all uses, and append the ones that actually use

- // our definition to the requested list.

- for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),

- E = Uses.end();

- I != E; ++I) {

- // List is sorted by position so once we've found one out of range there

- // will be no more to consider.

- if (I->first > LastLivePos)

- break;

- UsesOfImpDefs.push_back(I->second);

+MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB,

+ MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,

+ bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,

+ DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const {

+ bool IsLoad = isi32Load(Opcode);

+ assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");

+ unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;

+ assert(Regs.size() == 2);

+ MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,

+ TII->get(LoadStoreOpcode));

+ if (IsLoad) {

+ MIB.addReg(Regs[0].first, RegState::Define)

+ .addReg(Regs[1].first, RegState::Define);

+ } else {

+ MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))

+ .addReg(Regs[1].first, getKillRegState(Regs[1].second));

}

+ MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);

+ return MIB.getInstr();

}

/// Call MergeOps and update MemOps and merges accordingly on success.

-void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,

- MemOpQueue &memOps,

- unsigned memOpsBegin, unsigned memOpsEnd,

- unsigned insertAfter, int Offset,

- unsigned Base, bool BaseKill,

- unsigned Opcode,

- ARMCC::CondCodes Pred, unsigned PredReg,

- unsigned Scratch,

- DebugLoc dl,

- SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {

- // First calculate which of the registers should be killed by the merged

- // instruction.

- const unsigned insertPos = memOps[insertAfter].Position;

- SmallSet<unsigned, 4> KilledRegs;

- DenseMap<unsigned, unsigned> Killer;

- for (unsigned i = 0, e = memOps.size(); i != e; ++i) {

- if (i == memOpsBegin) {

- i = memOpsEnd;

- if (i == e)

- break;

- }

- if (memOps[i].Position < insertPos && memOps[i].isKill) {

- unsigned Reg = memOps[i].Reg;

+MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {

+ const MachineInstr *First = Cand.Instrs.front();

+ unsigned Opcode = First->getOpcode();

+ bool IsLoad = isLoadSingle(Opcode);

+ SmallVector<std::pair<unsigned, bool>, 8> Regs;

+ SmallVector<unsigned, 4> ImpDefs;

+ DenseSet<unsigned> KilledRegs;

+ // Determine list of registers and list of implicit super-register defs.

+ for (const MachineInstr *MI : Cand.Instrs) {

+ const MachineOperand &MO = getLoadStoreRegOp(*MI);

+ unsigned Reg = MO.getReg();

+ bool IsKill = MO.isKill();

+ if (IsKill)

KilledRegs.insert(Reg);

- Killer[Reg] = i;

+ Regs.push_back(std::make_pair(Reg, IsKill));

+ if (IsLoad) {

+ // Collect any implicit defs of super-registers, after merging we can't

+ // be sure anymore that we properly preserved these live ranges and must

+ // removed these implicit operands.

+ for (const MachineOperand &MO : MI->implicit_operands()) {

+ if (!MO.isReg() || !MO.isDef() || MO.isDead())

+ continue;

+ assert(MO.isImplicit());

+ unsigned DefReg = MO.getReg();

+ if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end())

+ continue;

+ // We can ignore cases where the super-reg is read and written.

+ if (MI->readsRegister(DefReg))

+ continue;

+ ImpDefs.push_back(DefReg);

+ }

}

- for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {

- MachineOperand &TransferOp = memOps[i].MBBI->getOperand(0);

- if (TransferOp.isUse() && TransferOp.getReg() == Base)

- BaseKill = false;

+ // Attempt the merge.

+ typedef MachineBasicBlock::iterator iterator;

+ MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];

+ iterator InsertBefore = std::next(iterator(LatestMI));

+ MachineBasicBlock &MBB = *LatestMI->getParent();

+ unsigned Offset = getMemoryOpOffset(First);

+ unsigned Base = getLoadStoreBaseOp(*First).getReg();

+ bool BaseKill = LatestMI->killsRegister(Base);

+ unsigned PredReg = 0;

+ ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg);

+ DebugLoc DL = First->getDebugLoc();

+ MachineInstr *Merged = nullptr;

+ if (Cand.CanMergeToLSDouble)

+ Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,

+ Opcode, Pred, PredReg, DL, Regs);

+ if (!Merged && Cand.CanMergeToLSMulti)

+ Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,

+ Opcode, Pred, PredReg, DL, Regs);

+ if (!Merged)

+ return nullptr;

+ // Determine earliest instruction that will get removed. We then keep an

+ // iterator just above it so the following erases don't invalidated it.

+ iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);

+ bool EarliestAtBegin = false;

+ if (EarliestI == MBB.begin()) {

+ EarliestAtBegin = true;

+ } else {

+ EarliestI = std::prev(EarliestI);

}

- SmallVector<std::pair<unsigned, bool>, 8> Regs;

- SmallVector<unsigned, 8> ImpDefs;

- SmallVector<MachineOperand *, 8> UsesOfImpDefs;

- for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {

- unsigned Reg = memOps[i].Reg;

- // If we are inserting the merged operation after an operation that

- // uses the same register, make sure to transfer any kill flag.

- bool isKill = memOps[i].isKill || KilledRegs.count(Reg);

- Regs.push_back(std::make_pair(Reg, isKill));

- // Collect any implicit defs of super-registers. They must be preserved.

- for (const MachineOperand &MO : memOps[i].MBBI->operands()) {

- if (!MO.isReg() || !MO.isDef() || !MO.isImplicit() || MO.isDead())

- continue;

- unsigned DefReg = MO.getReg();

- if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())

- ImpDefs.push_back(DefReg);

- // There may be other uses of the definition between this instruction and

- // the eventual LDM/STM position. These should be marked undef if the

- // merge takes place.

- findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,

- insertPos);

+ // Remove instructions which have been merged.

+ for (MachineInstr *MI : Cand.Instrs)

+ MBB.erase(MI);

+ // Determine range between the earliest removed instruction and the new one.

+ if (EarliestAtBegin)

+ EarliestI = MBB.begin();

+ else

+ EarliestI = std::next(EarliestI);

+ auto FixupRange = make_range(EarliestI, iterator(Merged));

+ if (isLoadSingle(Opcode)) {

+ // If the previous loads defined a super-reg, then we have to mark earlier

+ // operands undef; Replicate the super-reg def on the merged instruction.

+ for (MachineInstr &MI : FixupRange) {

+ for (unsigned &ImpDefReg : ImpDefs) {

+ for (MachineOperand &MO : MI.implicit_operands()) {

+ if (!MO.isReg() || MO.getReg() != ImpDefReg)

+ continue;

+ if (MO.readsReg())

+ MO.setIsUndef();

+ else if (MO.isDef())

+ ImpDefReg = 0;

+ }

}

- }

- // Try to do the merge.

- MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;

- ++Loc;

- if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,

- Pred, PredReg, Scratch, dl, Regs, ImpDefs))

- return;

- // Merge succeeded, update records.

- Merges.push_back(std::prev(Loc));

- // In gathering loads together, we may have moved the imp-def of a register

- // past one of its uses. This is OK, since we know better than the rest of

- // LLVM what's OK with ARM loads and stores; but we still have to adjust the

- // affected uses.

- for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),

- E = UsesOfImpDefs.end();

- I != E; ++I)

- (*I)->setIsUndef();

- for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {

- // Remove kill flags from any memops that come before insertPos.

- if (Regs[i-memOpsBegin].second) {

- unsigned Reg = Regs[i-memOpsBegin].first;

- if (KilledRegs.count(Reg)) {

- unsigned j = Killer[Reg];

- int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);

- assert(Idx >= 0 && "Cannot find killing operand");

- memOps[j].MBBI->getOperand(Idx).setIsKill(false);

- memOps[j].isKill = false;

+ MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);

+ for (unsigned ImpDef : ImpDefs)

+ MIB.addReg(ImpDef, RegState::ImplicitDefine);

+ } else {

+ // Remove kill flags: We are possibly storing the values later now.

+ assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);

+ for (MachineInstr &MI : FixupRange) {

+ for (MachineOperand &MO : MI.uses()) {

+ if (!MO.isReg() || !MO.isKill())

+ continue;

+ if (KilledRegs.count(MO.getReg()))

+ MO.setIsKill(false);

}

- memOps[i].isKill = true;

}

- MBB.erase(memOps[i].MBBI);

- // Update this memop to refer to the merged instruction.

- // We may need to move kill flags again.

- memOps[i].Merged = true;

- memOps[i].MBBI = Merges.back();

- memOps[i].Position = insertPos;

+ assert(ImpDefs.empty());

}

- // Update memOps offsets, since they may have been modified by MergeOps.

- for (auto &MemOp : memOps) {

- MemOp.Offset = getMemoryOpOffset(MemOp.MBBI);

- }

+ return Merged;

}

-/// Merge a number of load / store instructions into one or more load / store

-/// multiple instructions.

-void

-ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,

- unsigned Base, unsigned Opcode, unsigned Size,

- ARMCC::CondCodes Pred, unsigned PredReg,

- unsigned Scratch, MemOpQueue &MemOps,

- SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {

- bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);

- int Offset = MemOps[SIndex].Offset;

- int SOffset = Offset;

- unsigned insertAfter = SIndex;

- MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;

- DebugLoc dl = Loc->getDebugLoc();

- const MachineOperand &PMO = Loc->getOperand(0);

- unsigned PReg = PMO.getReg();

- unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);

- unsigned Count = 1;

- unsigned Limit = ~0U;

- bool BaseKill = false;

- // vldm / vstm limit are 32 for S variants, 16 for D variants.

+static bool isValidLSDoubleOffset(int Offset) {

+ unsigned Value = abs(Offset);

+ // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally

+ // multiplied by 4.

+ return (Value % 4) == 0 && Value < 1024;

- switch (Opcode) {

- default: break;

- case ARM::VSTRS:

- Limit = 32;

- break;

- case ARM::VSTRD:

- Limit = 16;

- break;

- case ARM::VLDRD:

- Limit = 16;

- break;

- case ARM::VLDRS:

- Limit = 32;

- break;

- }

+/// Find candidates for load/store multiple merge in list of MemOpQueueEntries.

+void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {

+ const MachineInstr *FirstMI = MemOps[0].MI;

+ unsigned Opcode = FirstMI->getOpcode();

+ bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);

+ unsigned Size = getLSMultipleTransferSize(FirstMI);

+ unsigned SIndex = 0;

+ unsigned EIndex = MemOps.size();

+ do {

+ // Look at the first instruction.

+ const MachineInstr *MI = MemOps[SIndex].MI;

+ int Offset = MemOps[SIndex].Offset;

+ const MachineOperand &PMO = getLoadStoreRegOp(*MI);

+ unsigned PReg = PMO.getReg();

+ unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);

+ unsigned Latest = SIndex;

+ unsigned Earliest = SIndex;

+ unsigned Count = 1;

+ bool CanMergeToLSDouble =

+ STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);

+ // ARM errata 602117: LDRD with base in list may result in incorrect base

+ // register when interrupted or faulted.

+ if (STI->isCortexM3() && isi32Load(Opcode) &&

+ PReg == getLoadStoreBaseOp(*MI).getReg())

+ CanMergeToLSDouble = false;

+ bool CanMergeToLSMulti = true;

+ // On swift vldm/vstm starting with an odd register number as that needs

+ // more uops than single vldrs.

+ if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1)

+ CanMergeToLSMulti = false;

+ // Merge following instructions where possible.

+ for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {

+ int NewOffset = MemOps[I].Offset;

+ if (NewOffset != Offset + (int)Size)

+ break;

+ const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);

+ unsigned Reg = MO.getReg();

+ unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);

+ // See if the current load/store may be part of a multi load/store.

+ bool PartOfLSMulti = CanMergeToLSMulti;

+ if (PartOfLSMulti) {

+ // Cannot load from SP

+ if (Reg == ARM::SP)

+ PartOfLSMulti = false;

+ // Register numbers must be in ascending order.

+ else if (RegNum <= PRegNum)

+ PartOfLSMulti = false;

+ // For VFP / NEON load/store multiples, the registers must be

+ // consecutive and within the limit on the number of registers per

+ // instruction.

+ else if (!isNotVFP && RegNum != PRegNum+1)

+ PartOfLSMulti = false;

+ }

+ // See if the current load/store may be part of a double load/store.

+ bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;

- for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {

- int NewOffset = MemOps[i].Offset;

- const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);

- unsigned Reg = MO.getReg();

- unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);

- // Register numbers must be in ascending order. For VFP / NEON load and

- // store multiples, the registers must also be consecutive and within the

- // limit on the number of registers per instruction.

- if (Reg != ARM::SP &&

- NewOffset == Offset + (int)Size &&

- ((isNotVFP && RegNum > PRegNum) ||

- ((Count < Limit) && RegNum == PRegNum+1)) &&

- // On Swift we don't want vldm/vstm to start with a odd register num

- // because Q register unaligned vldm/vstm need more uops.

- (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) {

+ if (!PartOfLSMulti && !PartOfLSDouble)

+ break;

+ CanMergeToLSMulti &= PartOfLSMulti;

+ CanMergeToLSDouble &= PartOfLSDouble;

+ // Track MemOp with latest and earliest position (Positions are

+ // counted in reverse).

+ unsigned Position = MemOps[I].Position;

+ if (Position < MemOps[Latest].Position)

+ Latest = I;

+ else if (Position > MemOps[Earliest].Position)

+ Earliest = I;

+ // Prepare for next MemOp.

Offset += Size;

PRegNum = RegNum;

- ++Count;

- } else {

- // Can't merge this in. Try merge the earlier ones first.

- // We need to compute BaseKill here because the MemOps may have been

- // reordered.

- BaseKill = Loc->killsRegister(Base);

- MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base,

- BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);

- MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,

- MemOps, Merges);

- return;

}

- if (MemOps[i].Position > MemOps[insertAfter].Position) {

- insertAfter = i;

- Loc = MemOps[i].MBBI;

- }

- BaseKill = Loc->killsRegister(Base);

- MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,

- Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);

-static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,

- unsigned Bytes, unsigned Limit,

- ARMCC::CondCodes Pred, unsigned PredReg) {

- unsigned MyPredReg = 0;

- if (!MI)

- return false;

- bool CheckCPSRDef = false;

- switch (MI->getOpcode()) {

- default: return false;

- case ARM::tSUBi8:

- case ARM::t2SUBri:

- case ARM::SUBri:

- CheckCPSRDef = true;

- break;

- case ARM::tSUBspi:

- break;

- }

- // Make sure the offset fits in 8 bits.

- if (Bytes == 0 || (Limit && Bytes >= Limit))

- return false;

- unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||

- MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME

- if (!(MI->getOperand(0).getReg() == Base &&

- MI->getOperand(1).getReg() == Base &&

- (MI->getOperand(2).getImm() * Scale) == Bytes &&

- getInstrPredicate(MI, MyPredReg) == Pred &&

- MyPredReg == PredReg))

- return false;

- return CheckCPSRDef ? !definesCPSR(MI) : true;

-static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,

- unsigned Bytes, unsigned Limit,

- ARMCC::CondCodes Pred, unsigned PredReg) {

- unsigned MyPredReg = 0;

- if (!MI)

- return false;

- bool CheckCPSRDef = false;

- switch (MI->getOpcode()) {

- default: return false;

- case ARM::tADDi8:

- case ARM::t2ADDri:

- case ARM::ADDri:

- CheckCPSRDef = true;

- break;

- case ARM::tADDspi:

- break;

- }

- if (Bytes == 0 || (Limit && Bytes >= Limit))

- // Make sure the offset fits in 8 bits.

- return false;

- unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||

- MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME

- if (!(MI->getOperand(0).getReg() == Base &&

- MI->getOperand(1).getReg() == Base &&

- (MI->getOperand(2).getImm() * Scale) == Bytes &&

- getInstrPredicate(MI, MyPredReg) == Pred &&

- MyPredReg == PredReg))

- return false;

- return CheckCPSRDef ? !definesCPSR(MI) : true;

-static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {

- switch (MI->getOpcode()) {

- default: return 0;

- case ARM::LDRi12:

- case ARM::STRi12:

- case ARM::tLDRi:

- case ARM::tSTRi:

- case ARM::tLDRspi:

- case ARM::tSTRspi:

- case ARM::t2LDRi8:

- case ARM::t2LDRi12:

- case ARM::t2STRi8:

- case ARM::t2STRi12:

- case ARM::VLDRS:

- case ARM::VSTRS:

- return 4;

- case ARM::VLDRD:

- case ARM::VSTRD:

- return 8;

- case ARM::LDMIA:

- case ARM::LDMDA:

- case ARM::LDMDB:

- case ARM::LDMIB:

- case ARM::STMIA:

- case ARM::STMDA:

- case ARM::STMDB:

- case ARM::STMIB:

- case ARM::tLDMIA:

- case ARM::tLDMIA_UPD:

- case ARM::tSTMIA_UPD:

- case ARM::t2LDMIA:

- case ARM::t2LDMDB:

- case ARM::t2STMIA:

- case ARM::t2STMDB:

- case ARM::VLDMSIA:

- case ARM::VSTMSIA:

- return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;

- case ARM::VLDMDIA:

- case ARM::VSTMDIA:

- return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;

- }

+ // Form a candidate from the Ops collected so far.

+ MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;

+ for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)

+ Candidate->Instrs.push_back(MemOps[C].MI);

+ Candidate->LatestMIIdx = Latest - SIndex;

+ Candidate->EarliestMIIdx = Earliest - SIndex;

+ Candidate->InsertPos = MemOps[Latest].Position;

+ if (Count == 1)

+ CanMergeToLSMulti = CanMergeToLSDouble = false;

+ Candidate->CanMergeToLSMulti = CanMergeToLSMulti;

+ Candidate->CanMergeToLSDouble = CanMergeToLSDouble;

+ Candidates.push_back(Candidate);

+ // Continue after the chain.

+ SIndex += Count;

+ } while (SIndex < EIndex);

}

static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,

@@ -1081,6 +1060,75 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,

}

+/// Check if the given instruction increments or decrements a register and

+/// return the amount it is incremented/decremented. Returns 0 if the CPSR flags

+/// generated by the instruction are possibly read as well.

+static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,

+ ARMCC::CondCodes Pred, unsigned PredReg) {

+ bool CheckCPSRDef;

+ int Scale;

+ switch (MI.getOpcode()) {

+ case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;

+ case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;

+ case ARM::t2SUBri:

+ case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;

+ case ARM::t2ADDri:

+ case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;

+ case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;

+ case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;

+ default: return 0;

+ }

+ unsigned MIPredReg;

+ if (MI.getOperand(0).getReg() != Reg ||

+ MI.getOperand(1).getReg() != Reg ||

+ getInstrPredicate(&MI, MIPredReg) != Pred ||

+ MIPredReg != PredReg)

+ return 0;

+ if (CheckCPSRDef && definesCPSR(&MI))

+ return 0;

+ return MI.getOperand(2).getImm() * Scale;

+/// Searches for an increment or decrement of \p Reg before \p MBBI.

+static MachineBasicBlock::iterator

+findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg,

+ ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {

+ Offset = 0;

+ MachineBasicBlock &MBB = *MBBI->getParent();

+ MachineBasicBlock::iterator BeginMBBI = MBB.begin();

+ MachineBasicBlock::iterator EndMBBI = MBB.end();

+ if (MBBI == BeginMBBI)

+ return EndMBBI;

+ // Skip debug values.

+ MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);

+ while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI)

+ --PrevMBBI;

+ Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);

+ return Offset == 0 ? EndMBBI : PrevMBBI;

+/// Searches for a increment or decrement of \p Reg after \p MBBI.

+static MachineBasicBlock::iterator

+findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg,

+ ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {

+ Offset = 0;

+ MachineBasicBlock &MBB = *MBBI->getParent();

+ MachineBasicBlock::iterator EndMBBI = MBB.end();

+ MachineBasicBlock::iterator NextMBBI = std::next(MBBI);

+ // Skip debug values.

+ while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())

+ ++NextMBBI;

+ if (NextMBBI == EndMBBI)

+ return EndMBBI;

+ Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);

+ return Offset == 0 ? EndMBBI : NextMBBI;

/// Fold proceeding/trailing inc/dec of base register into the

/// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:

///

@@ -1093,21 +1141,17 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,

/// ldmia rn, <ra, rb, rc>

/// =>

/// ldmdb rn!, <ra, rb, rc>

-bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,

- MachineBasicBlock::iterator MBBI,

- bool &Advance,

- MachineBasicBlock::iterator &I) {

+bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {

// Thumb1 is already using updating loads/stores.

if (isThumb1) return false;

- MachineInstr *MI = MBBI;

- unsigned Base = MI->getOperand(0).getReg();

- bool BaseKill = MI->getOperand(0).isKill();

- unsigned Bytes = getLSMultipleTransferSize(MI);

+ const MachineOperand &BaseOP = MI->getOperand(0);

+ unsigned Base = BaseOP.getReg();

+ bool BaseKill = BaseOP.isKill();

unsigned PredReg = 0;

ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);

unsigned Opcode = MI->getOpcode();

- DebugLoc dl = MI->getDebugLoc();

+ DebugLoc DL = MI->getDebugLoc();

// Can't use an updating ld/st if the base register is also a dest

// register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.

@@ -1115,55 +1159,27 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,

if (MI->getOperand(i).getReg() == Base)

return false;

- bool DoMerge = false;

+ int Bytes = getLSMultipleTransferSize(MI);

+ MachineBasicBlock &MBB = *MI->getParent();

+ MachineBasicBlock::iterator MBBI(MI);

+ int Offset;

+ MachineBasicBlock::iterator MergeInstr

+ = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);

ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);

- // Try merging with the previous instruction.

- MachineBasicBlock::iterator BeginMBBI = MBB.begin();

- if (MBBI != BeginMBBI) {

- MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);

- while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())

- --PrevMBBI;

- if (Mode == ARM_AM::ia &&

- isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {

- Mode = ARM_AM::db;

- DoMerge = true;

- } else if (Mode == ARM_AM::ib &&

- isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {

- Mode = ARM_AM::da;

- DoMerge = true;

- }

- if (DoMerge)

- MBB.erase(PrevMBBI);

- }

- // Try merging with the next instruction.

- MachineBasicBlock::iterator EndMBBI = MBB.end();

- if (!DoMerge && MBBI != EndMBBI) {

- MachineBasicBlock::iterator NextMBBI = std::next(MBBI);

- while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())

- ++NextMBBI;

- if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&

- isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {

- DoMerge = true;

- } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&

- isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {

- DoMerge = true;

- }

- if (DoMerge) {

- if (NextMBBI == I) {

- Advance = true;

- ++I;

- }

- MBB.erase(NextMBBI);

- }

+ if (Mode == ARM_AM::ia && Offset == -Bytes) {

+ Mode = ARM_AM::db;

+ } else if (Mode == ARM_AM::ib && Offset == -Bytes) {

+ Mode = ARM_AM::da;

+ } else {

+ MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);

+ if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&

+ ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes))

+ return false;

}

- if (!DoMerge)

- return false;

+ MBB.erase(MergeInstr);

unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);

- MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))

+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))

.addReg(Base, getDefRegState(true)) // WB base register

.addReg(Base, getKillRegState(BaseKill))

.addImm(Pred).addReg(PredReg);

@@ -1231,21 +1247,15 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,

/// Fold proceeding/trailing inc/dec of base register into the

/// LDR/STR/FLD{D|S}/FST{D|S} op when possible:

-bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,

- MachineBasicBlock::iterator MBBI,

- const TargetInstrInfo *TII,

- bool &Advance,

- MachineBasicBlock::iterator &I) {

+bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {

// Thumb1 doesn't have updating LDR/STR.

// FIXME: Use LDM/STM with single register instead.

if (isThumb1) return false;

- MachineInstr *MI = MBBI;

- unsigned Base = MI->getOperand(1).getReg();

- bool BaseKill = MI->getOperand(1).isKill();

- unsigned Bytes = getLSMultipleTransferSize(MI);

+ unsigned Base = getLoadStoreBaseOp(*MI).getReg();

+ bool BaseKill = getLoadStoreBaseOp(*MI).isKill();

unsigned Opcode = MI->getOpcode();

- DebugLoc dl = MI->getDebugLoc();

+ DebugLoc DL = MI->getDebugLoc();

bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||

Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);

bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);

@@ -1255,7 +1265,6 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,

if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)

return false;

- bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;

// Can't do the merge if the destination register is the same as the would-be

// writeback register.

if (MI->getOperand(0).getReg() == Base)

@@ -1263,64 +1272,38 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,

unsigned PredReg = 0;

ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);

- bool DoMerge = false;

- ARM_AM::AddrOpc AddSub = ARM_AM::add;

- unsigned NewOpc = 0;

- // AM2 - 12 bits, thumb2 - 8 bits.

- unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);

- // Try merging with the previous instruction.

- MachineBasicBlock::iterator BeginMBBI = MBB.begin();

- if (MBBI != BeginMBBI) {

- MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);

- while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())

- --PrevMBBI;

- if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {

- DoMerge = true;

- AddSub = ARM_AM::sub;

- } else if (!isAM5 &&

- isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {

- DoMerge = true;

- }

- if (DoMerge) {

- NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);

- MBB.erase(PrevMBBI);

- }

- // Try merging with the next instruction.

- MachineBasicBlock::iterator EndMBBI = MBB.end();

- if (!DoMerge && MBBI != EndMBBI) {

- MachineBasicBlock::iterator NextMBBI = std::next(MBBI);

- while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())

- ++NextMBBI;

- if (!isAM5 &&

- isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {

- DoMerge = true;

- AddSub = ARM_AM::sub;

- } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {

- DoMerge = true;

- }

- if (DoMerge) {

- NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);

- if (NextMBBI == I) {

- Advance = true;

- ++I;

- }

- MBB.erase(NextMBBI);

- }

+ int Bytes = getLSMultipleTransferSize(MI);

+ MachineBasicBlock &MBB = *MI->getParent();

+ MachineBasicBlock::iterator MBBI(MI);

+ int Offset;

+ MachineBasicBlock::iterator MergeInstr

+ = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);

+ unsigned NewOpc;

+ if (!isAM5 && Offset == Bytes) {

+ NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);

+ } else if (Offset == -Bytes) {

+ NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);

+ } else {

+ MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);

+ if (Offset == Bytes) {

+ NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add);

+ } else if (!isAM5 && Offset == -Bytes) {

+ NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);

+ } else

+ return false;

}

+ MBB.erase(MergeInstr);

- if (!DoMerge)

- return false;

+ ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;

+ bool isLd = isLoadSingle(Opcode);

if (isAM5) {

// VLDM[SD]_UPD, VSTM[SD]_UPD

// (There are no base-updating versions of VLDR/VSTR instructions, but the

// updating load/store-multiple instructions can be used with only one

// register.)

MachineOperand &MO = MI->getOperand(0);

- BuildMI(MBB, MBBI, dl, TII->get(NewOpc))

+ BuildMI(MBB, MBBI, DL, TII->get(NewOpc))

.addReg(Base, getDefRegState(true)) // WB base register

.addReg(Base, getKillRegState(isLd ? BaseKill : false))

.addImm(Pred).addReg(PredReg)

@@ -1330,20 +1313,18 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,

if (isAM2) {

// LDR_PRE, LDR_POST

if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {

- int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;

- BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())

+ BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())

.addReg(Base, RegState::Define)

.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);

} else {

- int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);

- BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())

+ int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);

+ BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())

.addReg(Base, RegState::Define)

- .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);

+ .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);

}

} else {

- int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;

// t2LDR_PRE, t2LDR_POST

- BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())

+ BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())

.addReg(Base, RegState::Define)

.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);

}

@@ -1353,15 +1334,14 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,

// the vestigal zero-reg offset register. When that's fixed, this clause

// can be removed entirely.

if (isAM2 && NewOpc == ARM::STR_POST_IMM) {

- int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);

+ int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);

// STR_PRE, STR_POST

- BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)

+ BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)

.addReg(MO.getReg(), getKillRegState(MO.isKill()))

- .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);

+ .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);

} else {

- int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;

// t2STR_PRE, t2STR_POST

- BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)

+ BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)

.addReg(MO.getReg(), getKillRegState(MO.isKill()))

.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);

}

@@ -1371,6 +1351,66 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,

return true;

}

+bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {

+ unsigned Opcode = MI.getOpcode();

+ assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&

+ "Must have t2STRDi8 or t2LDRDi8");

+ if (MI.getOperand(3).getImm() != 0)

+ return false;

+ // Behaviour for writeback is undefined if base register is the same as one

+ // of the others.

+ const MachineOperand &BaseOp = MI.getOperand(2);

+ unsigned Base = BaseOp.getReg();

+ const MachineOperand &Reg0Op = MI.getOperand(0);

+ const MachineOperand &Reg1Op = MI.getOperand(1);

+ if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)

+ return false;

+ unsigned PredReg;

+ ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg);

+ MachineBasicBlock::iterator MBBI(MI);

+ MachineBasicBlock &MBB = *MI.getParent();

+ int Offset;

+ MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred,

+ PredReg, Offset);

+ unsigned NewOpc;

+ if (Offset == 8 || Offset == -8) {

+ NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;

+ } else {

+ MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);

+ if (Offset == 8 || Offset == -8) {

+ NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;

+ } else

+ return false;

+ }

+ MBB.erase(MergeInstr);

+ DebugLoc DL = MI.getDebugLoc();

+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));

+ if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {

+ MIB.addOperand(Reg0Op).addOperand(Reg1Op)

+ .addReg(BaseOp.getReg(), RegState::Define);

+ } else {

+ assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);

+ MIB.addReg(BaseOp.getReg(), RegState::Define)

+ .addOperand(Reg0Op).addOperand(Reg1Op);

+ }

+ MIB.addReg(BaseOp.getReg(), RegState::Kill)

+ .addImm(Offset).addImm(Pred).addReg(PredReg);

+ assert(TII->get(Opcode).getNumOperands() == 6 &&

+ TII->get(NewOpc).getNumOperands() == 7 &&

+ "Unexpected number of operands in Opcode specification.");

+ // Transfer implicit operands.

+ for (const MachineOperand &MO : MI.implicit_operands())

+ MIB.addOperand(MO);

+ MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());

+ MBB.erase(MBBI);

+ return true;

/// Returns true if instruction is a memory operation that this pass is capable

/// of operating on.

static bool isMemoryOp(const MachineInstr *MI) {

@@ -1426,26 +1466,10 @@ static bool isMemoryOp(const MachineInstr *MI) {

return false;

}

-/// Advance register scavenger to just before the earliest memory op that is

-/// being merged.

-void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {

- MachineBasicBlock::iterator Loc = MemOps[0].MBBI;

- unsigned Position = MemOps[0].Position;

- for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {

- if (MemOps[i].Position < Position) {

- Position = MemOps[i].Position;

- Loc = MemOps[i].MBBI;

- }

- if (Loc != MBB.begin())

- RS->forward(std::prev(Loc));

static void InsertLDR_STR(MachineBasicBlock &MBB,

MachineBasicBlock::iterator &MBBI,

int Offset, bool isDef,

- DebugLoc dl, unsigned NewOpc,

+ DebugLoc DL, unsigned NewOpc,

unsigned Reg, bool RegDeadKill, bool RegUndef,

unsigned BaseReg, bool BaseKill, bool BaseUndef,

bool OffKill, bool OffUndef,

@@ -1491,7 +1515,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,

if (!Errata602117 && !NonConsecutiveRegs)

return false;

- MachineBasicBlock::iterator NewBBI = MBBI;

bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;

bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;

bool EvenDeadKill = isLd ?

@@ -1531,7 +1554,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,

getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));

++NumSTRD2STM;

}

- NewBBI = std::prev(MBBI);

} else {

// Split into two instructions.

unsigned NewOpc = (isLd)

@@ -1553,7 +1575,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,

OddReg, OddDeadKill, false,

BaseReg, false, BaseUndef, false, OffUndef,

Pred, PredReg, TII, isT2);

- NewBBI = std::prev(MBBI);

InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,

EvenReg, EvenDeadKill, false,

BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,

@@ -1573,7 +1594,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,

EvenReg, EvenDeadKill, EvenUndef,

BaseReg, false, BaseUndef, false, OffUndef,

Pred, PredReg, TII, isT2);

- NewBBI = std::prev(MBBI);

InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,

OddReg, OddDeadKill, OddUndef,

BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,

@@ -1585,191 +1605,160 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,

++NumSTRD2STR;

}

- MBB.erase(MI);

- MBBI = NewBBI;

+ MBBI = MBB.erase(MBBI);

return true;

}

/// An optimization pass to turn multiple LDR / STR ops of the same base and

/// incrementing offset into LDM / STM ops.

bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {

- unsigned NumMerges = 0;

- unsigned NumMemOps = 0;

MemOpQueue MemOps;

unsigned CurrBase = 0;

unsigned CurrOpc = ~0u;

- unsigned CurrSize = 0;

ARMCC::CondCodes CurrPred = ARMCC::AL;

- unsigned CurrPredReg = 0;

unsigned Position = 0;

- SmallVector<MachineBasicBlock::iterator,4> Merges;

- RS->enterBasicBlock(&MBB);

- MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();

- while (MBBI != E) {

+ assert(Candidates.size() == 0);

+ assert(MergeBaseCandidates.size() == 0);

+ LiveRegsValid = false;

+ for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();

+ I = MBBI) {

+ // The instruction in front of the iterator is the one we look at.

+ MBBI = std::prev(I);

if (FixInvalidRegPairOp(MBB, MBBI))

continue;

+ ++Position;

- bool Advance = false;

- bool TryMerge = false;

- bool isMemOp = isMemoryOp(MBBI);

- if (isMemOp) {

+ if (isMemoryOp(MBBI)) {

unsigned Opcode = MBBI->getOpcode();

- unsigned Size = getLSMultipleTransferSize(MBBI);

const MachineOperand &MO = MBBI->getOperand(0);

unsigned Reg = MO.getReg();

- bool isKill = MO.isDef() ? false : MO.isKill();

- unsigned Base = MBBI->getOperand(1).getReg();

+ unsigned Base = getLoadStoreBaseOp(*MBBI).getReg();

unsigned PredReg = 0;

ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);

int Offset = getMemoryOpOffset(MBBI);

- // Watch out for:

- // r4 := ldr [r5]

- // r5 := ldr [r5, #4]

- // r6 := ldr [r5, #8]

- //

- // The second ldr has effectively broken the chain even though it

- // looks like the later ldr(s) use the same base register. Try to

- // merge the ldr's so far, including this one. But don't try to

- // combine the following ldr(s).

- bool Clobber = isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg();

- // Watch out for:

- // r4 := ldr [r0, #8]

- // r4 := ldr [r0, #4]

- //

- // The optimization may reorder the second ldr in front of the first

- // ldr, which violates write after write(WAW) dependence. The same as

- // str. Try to merge inst(s) already in MemOps.

- bool Overlap = false;

- for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {

- if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {

- Overlap = true;

- break;

- }

- if (CurrBase == 0 && !Clobber) {

+ if (CurrBase == 0) {

// Start of a new chain.

CurrBase = Base;

CurrOpc = Opcode;

- CurrSize = Size;

CurrPred = Pred;

- CurrPredReg = PredReg;

- MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));

- ++NumMemOps;

- Advance = true;

- } else if (!Overlap) {

- if (Clobber) {

- TryMerge = true;

- Advance = true;

+ MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));

+ continue;

+ }

+ // Note: No need to match PredReg in the next if.

+ if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {

+ // Watch out for:

+ // r4 := ldr [r0, #8]

+ // r4 := ldr [r0, #4]

+ // or

+ // r0 := ldr [r0]

+ // If a load overrides the base register or a register loaded by

+ // another load in our chain, we cannot take this instruction.

+ bool Overlap = false;

+ if (isLoadSingle(Opcode)) {

+ Overlap = (Base == Reg);

+ if (!Overlap) {

+ for (const MemOpQueueEntry &E : MemOps) {

+ if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {

+ Overlap = true;

+ break;

+ }

}

- if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {

- // No need to match PredReg.

- // Continue adding to the queue.

+ if (!Overlap) {

+ // Check offset and sort memory operation into the current chain.

if (Offset > MemOps.back().Offset) {

- MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,

- Position, MBBI));

- ++NumMemOps;

- Advance = true;

+ MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));

+ continue;

} else {

- for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();

- I != E; ++I) {

- if (Offset < I->Offset) {

- MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,

- Position, MBBI));

- ++NumMemOps;

- Advance = true;

+ MemOpQueue::iterator MI, ME;

+ for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {

+ if (Offset < MI->Offset) {

+ // Found a place to insert.

break;

- } else if (Offset == I->Offset) {

- // Collision! This can't be merged!

+ }

+ if (Offset == MI->Offset) {

+ // Collision, abort.

+ MI = ME;

break;

}

+ if (MI != MemOps.end()) {

+ MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position));

+ continue;

+ }

}

- }

- if (MBBI->isDebugValue()) {

- ++MBBI;

- if (MBBI == E)

- // Reach the end of the block, try merging the memory instructions.

- TryMerge = true;

- } else if (Advance) {

- ++Position;

- ++MBBI;

- if (MBBI == E)

- // Reach the end of the block, try merging the memory instructions.

- TryMerge = true;

- } else {

- TryMerge = true;

+ // Don't advance the iterator; The op will start a new chain next.

+ MBBI = I;

+ --Position;

+ // Fallthrough to look into existing chain.

+ } else if (MBBI->isDebugValue()) {

+ continue;

+ } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||

+ MBBI->getOpcode() == ARM::t2STRDi8) {

+ // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions

+ // remember them because we may still be able to merge add/sub into them.

+ MergeBaseCandidates.push_back(MBBI);

}

- if (TryMerge) {

- if (NumMemOps > 1) {

- // Try to find a free register to use as a new base in case it's needed.

- // First advance to the instruction just before the start of the chain.

- AdvanceRS(MBB, MemOps);

- // Find a scratch register.

- unsigned Scratch =

- RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass);

- // Process the load / store instructions.

- RS->forward(std::prev(MBBI));

- // Merge ops.

- Merges.clear();

- MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,

- CurrPred, CurrPredReg, Scratch, MemOps, Merges);

- // Try folding preceding/trailing base inc/dec into the generated

- // LDM/STM ops.

- for (unsigned i = 0, e = Merges.size(); i < e; ++i)

- if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))

- ++NumMerges;

- NumMerges += Merges.size();

- // Try folding preceding/trailing base inc/dec into those load/store

- // that were not merged to form LDM/STM ops.

- for (unsigned i = 0; i != NumMemOps; ++i)

- if (!MemOps[i].Merged)

- if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))

- ++NumMerges;

- // RS may be pointing to an instruction that's deleted.

- RS->skipTo(std::prev(MBBI));

- } else if (NumMemOps == 1) {

- // Try folding preceding/trailing base inc/dec into the single

- // load/store.

- if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {

- ++NumMerges;

- RS->forward(std::prev(MBBI));

- }

+ // If we are here then the chain is broken; Extract candidates for a merge.

+ if (MemOps.size() > 0) {

+ FormCandidates(MemOps);

+ // Reset for the next chain.

CurrBase = 0;

CurrOpc = ~0u;

- CurrSize = 0;

CurrPred = ARMCC::AL;

- CurrPredReg = 0;

- if (NumMemOps) {

- MemOps.clear();

- NumMemOps = 0;

- }

+ MemOps.clear();

+ }

+ if (MemOps.size() > 0)

+ FormCandidates(MemOps);

- // If iterator hasn't been advanced and this is not a memory op, skip it.

- // It can't start a new chain anyway.

- if (!Advance && !isMemOp && MBBI != E) {

- ++Position;

- ++MBBI;

+ // Sort candidates so they get processed from end to begin of the basic

+ // block later; This is necessary for liveness calculation.

+ auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {

+ return M0->InsertPos < M1->InsertPos;

+ };

+ std::sort(Candidates.begin(), Candidates.end(), LessThan);

+ // Go through list of candidates and merge.

+ bool Changed = false;

+ for (const MergeCandidate *Candidate : Candidates) {

+ if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {

+ MachineInstr *Merged = MergeOpsUpdate(*Candidate);

+ // Merge preceding/trailing base inc/dec into the merged op.

+ if (Merged) {

+ Changed = true;

+ unsigned Opcode = Merged->getOpcode();

+ if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)

+ MergeBaseUpdateLSDouble(*Merged);

+ else

+ MergeBaseUpdateLSMultiple(Merged);

+ } else {

+ for (MachineInstr *MI : Candidate->Instrs) {

+ if (MergeBaseUpdateLoadStore(MI))

+ Changed = true;

+ }

}

+ } else {

+ assert(Candidate->Instrs.size() == 1);

+ if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))

+ Changed = true;

}

- return NumMerges > 0;

+ Candidates.clear();

+ // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.

+ for (MachineInstr *MI : MergeBaseCandidates)

+ MergeBaseUpdateLSDouble(*MI);

+ MergeBaseCandidates.clear();

+ return Changed;

}

/// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")

@@ -1814,12 +1803,14 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {

}

bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {

+ MF = &Fn;

STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());

TL = STI->getTargetLowering();

AFI = Fn.getInfo<ARMFunctionInfo>();

TII = STI->getInstrInfo();

TRI = STI->getRegisterInfo();

- RS = new RegScavenger();

+ MRI = &Fn.getRegInfo();

+ RegClassInfoValid = false;

isThumb2 = AFI->isThumb2Function();

isThumb1 = AFI->isThumbFunction() && !isThumb2;

@@ -1832,7 +1823,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {

Modified |= MergeReturnIntoLDM(MBB);

}

- delete RS;

+ Allocator.DestroyAll();

return Modified;

}

@@ -2219,7 +2210,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {

continue;

int Opc = MI->getOpcode();

- bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;

+ bool isLd = isLoadSingle(Opc);

unsigned Base = MI->getOperand(1).getReg();

int Offset = getMemoryOpOffset(MI);

diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index a59cf9851108..6cafbbb9f8eb 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp

@@ -18,12 +18,6 @@ using namespace llvm;

#define DEBUG_TYPE "arm-selectiondag-info"

-ARMSelectionDAGInfo::ARMSelectionDAGInfo(const DataLayout &DL)

- : TargetSelectionDAGInfo(&DL) {}

-ARMSelectionDAGInfo::~ARMSelectionDAGInfo() {

// Emit, if possible, a specialized version of the given Libcall. Typically this

// means selecting the appropriately aligned version, but we also convert memset

// of 0 into memclr.

@@ -83,7 +77,7 @@ EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl,

TargetLowering::ArgListTy Args;

TargetLowering::ArgListEntry Entry;

- Entry.Ty = TLI->getDataLayout()->getIntPtrType(*DAG.getContext());

+ Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());

Entry.Node = Dst;

Args.push_back(Entry);

if (AEABILibcall == AEABI_MEMCLR) {

@@ -121,12 +115,14 @@ EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl,

{ "__aeabi_memclr", "__aeabi_memclr4", "__aeabi_memclr8" }

};

TargetLowering::CallLoweringInfo CLI(DAG);

- CLI.setDebugLoc(dl).setChain(Chain)

- .setCallee(TLI->getLibcallCallingConv(LC),

- Type::getVoidTy(*DAG.getContext()),

- DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],

- TLI->getPointerTy()), std::move(Args), 0)

- .setDiscardResult();

+ CLI.setDebugLoc(dl)

+ .setChain(Chain)

+ .setCallee(

+ TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),

+ DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],

+ TLI->getPointerTy(DAG.getDataLayout())),

+ std::move(Args), 0)

+ .setDiscardResult();

std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);

return CallResult.second;

diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h
index 1db190f41e1a..289879ee1d7e 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.h
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.h

@@ -37,8 +37,6 @@ namespace ARM_AM {

class ARMSelectionDAGInfo : public TargetSelectionDAGInfo {

public:

- explicit ARMSelectionDAGInfo(const DataLayout &DL);

- ~ARMSelectionDAGInfo();

SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,

SDValue Chain,

diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 55808dfb9efe..002c3e9b6291 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp

@@ -112,7 +112,6 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,

: ARMGenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),

ARMProcClass(None), stackAlignment(4), CPUString(CPU), IsLittle(IsLittle),

TargetTriple(TT), Options(TM.Options), TM(TM),

- TSInfo(*TM.getDataLayout()),

FrameLowering(initializeFrameLowering(CPU, FS)),

// At this point initializeSubtargetDependencies has been called so

// we can query directly.

@@ -172,6 +171,7 @@ void ARMSubtarget::initializeEnvironment() {

AllowsUnalignedMem = false;

Thumb2DSP = false;

UseNaClTrap = false;

+ GenLongCalls = false;

UnsafeFPMath = false;

}

@@ -286,7 +286,7 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,

if (RelocM == Reloc::Static)

return false;

- bool isDecl = GV->isDeclarationForLinker();

+ bool isDef = GV->isStrongDefinitionForLinker();

if (!isTargetMachO()) {

// Extra load is needed for all externally visible.

@@ -294,34 +294,22 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,

return false;

return true;

} else {

- if (RelocM == Reloc::PIC_) {

- // If this is a strong reference to a definition, it is definitely not

- // through a stub.

- if (!isDecl && !GV->isWeakForLinker())

- return false;

- // Unless we have a symbol with hidden visibility, we have to go through a

- // normal $non_lazy_ptr stub because this symbol might be resolved late.

- if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.

- return true;

+ // If this is a strong reference to a definition, it is definitely not

+ // through a stub.

+ if (isDef)

+ return false;

+ // Unless we have a symbol with hidden visibility, we have to go through a

+ // normal $non_lazy_ptr stub because this symbol might be resolved late.

+ if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.

+ return true;

+ if (RelocM == Reloc::PIC_) {

// If symbol visibility is hidden, we have a stub for common symbol

// references and external declarations.

- if (isDecl || GV->hasCommonLinkage())

+ if (GV->isDeclarationForLinker() || GV->hasCommonLinkage())

// Hidden $non_lazy_ptr reference.

return true;

- return false;

- } else {

- // If this is a strong reference to a definition, it is definitely not

- // through a stub.

- if (!isDecl && !GV->isWeakForLinker())

- return false;

- // Unless we have a symbol with hidden visibility, we have to go through a

- // normal $non_lazy_ptr stub because this symbol might be resolved late.

- if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.

- return true;

}

diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 9909a6a6d198..dd101df9b63d 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h

@@ -206,6 +206,9 @@ protected:

/// NaCl TRAP instruction is generated instead of the regular TRAP.

bool UseNaClTrap;

+ /// Generate calls via indirect call instructions.

+ bool GenLongCalls;

/// Target machine allowed unsafe FP math (such as use of NEON fp)

bool UnsafeFPMath;

@@ -342,6 +345,7 @@ public:

bool hasMPExtension() const { return HasMPExtension; }

bool hasThumb2DSP() const { return Thumb2DSP; }

bool useNaClTrap() const { return UseNaClTrap; }

+ bool genLongCalls() const { return GenLongCalls; }

bool hasFP16() const { return HasFP16; }

bool hasD16() const { return HasD16; }

diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 6e81bd2d349d..93495d66ae70 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp

@@ -80,8 +80,7 @@ computeTargetABI(const Triple &TT, StringRef CPU,

// FIXME: This is duplicated code from the front end and should be unified.

if (TT.isOSBinFormatMachO()) {

if (TT.getEnvironment() == llvm::Triple::EABI ||

- (TT.getOS() == llvm::Triple::UnknownOS &&

- TT.getObjectFormat() == llvm::Triple::MachO) ||

+ (TT.getOS() == llvm::Triple::UnknownOS && TT.isOSBinFormatMachO()) ||

CPU.startswith("cortex-m")) {

TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;

} else {

@@ -104,8 +103,8 @@ computeTargetABI(const Triple &TT, StringRef CPU,

TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;

break;

default:

- if (TT.getOS() == llvm::Triple::NetBSD)

- TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;

+ if (TT.isOSNetBSD())

+ TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;

else

TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;

break;

diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index f4901fc24e44..2f194cf7ae06 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp

@@ -61,14 +61,14 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {

if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND ||

ISD == ISD::FP_EXTEND)) {

- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);

+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);

int Idx = CostTableLookup(NEONFltDblTbl, ISD, LT.second);

if (Idx != -1)

return LT.first * NEONFltDblTbl[Idx].Cost;

}

- EVT SrcTy = TLI->getValueType(Src);

- EVT DstTy = TLI->getValueType(Dst);

+ EVT SrcTy = TLI->getValueType(DL, Src);

+ EVT DstTy = TLI->getValueType(DL, Dst);

if (!SrcTy.isSimple() || !DstTy.isSimple())

return BaseT::getCastInstrCost(Opcode, Dst, Src);

@@ -282,8 +282,8 @@ unsigned ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,

{ ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 }

};

- EVT SelCondTy = TLI->getValueType(CondTy);

- EVT SelValTy = TLI->getValueType(ValTy);

+ EVT SelCondTy = TLI->getValueType(DL, CondTy);

+ EVT SelValTy = TLI->getValueType(DL, ValTy);

if (SelCondTy.isSimple() && SelValTy.isSimple()) {

int Idx = ConvertCostTableLookup(NEONVectorSelectTbl, ISD,

SelCondTy.getSimpleVT(),

@@ -292,7 +292,7 @@ unsigned ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,

return NEONVectorSelectTbl[Idx].Cost;

}

- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);

+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);

return LT.first;

}

@@ -353,7 +353,7 @@ unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,

{ISD::VECTOR_SHUFFLE, MVT::v8i16, 2},

{ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}};

- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);

+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);

int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);

if (Idx == -1)

@@ -379,7 +379,7 @@ unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,

{ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};

- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);

+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);

int Idx =

CostTableLookup(NEONAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);

if (Idx == -1)

@@ -395,7 +395,7 @@ unsigned ARMTTIImpl::getArithmeticInstrCost(

TTI::OperandValueProperties Opd2PropInfo) {

int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);

- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);

+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);

const unsigned FunctionCallDivCost = 20;

const unsigned ReciprocalDivCost = 10;

@@ -468,7 +468,7 @@ unsigned ARMTTIImpl::getArithmeticInstrCost(

unsigned ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,

unsigned Alignment,

unsigned AddressSpace) {

- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);

+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);

if (Src->isVectorTy() && Alignment != 16 &&

Src->getVectorElementType()->isDoubleTy()) {

@@ -488,12 +488,12 @@ unsigned ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,

assert(isa<VectorType>(VecTy) && "Expect a vector type");

// vldN/vstN doesn't support vector types of i64/f64 element.

- bool EltIs64Bits = DL->getTypeAllocSizeInBits(VecTy->getScalarType()) == 64;

+ bool EltIs64Bits = DL.getTypeAllocSizeInBits(VecTy->getScalarType()) == 64;

if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits) {

unsigned NumElts = VecTy->getVectorNumElements();

Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);

- unsigned SubVecSize = TLI->getDataLayout()->getTypeAllocSize(SubVecTy);

+ unsigned SubVecSize = DL.getTypeAllocSize(SubVecTy);

// vldN/vstN only support legal vector types of size 64 or 128 in bits.

if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128))

diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h
index f2e5db655ccf..84f256f73722 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/lib/Target/ARM/ARMTargetTransformInfo.h

@@ -42,7 +42,8 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {

public:

explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, Function &F)

- : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {}

+ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),

+ TLI(ST->getTargetLowering()) {}

// Provide value semantics. MSVC requires that we spell all of these out.

ARMTTIImpl(const ARMTTIImpl &Arg)

@@ -50,18 +51,6 @@ public:

ARMTTIImpl(ARMTTIImpl &&Arg)

: BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),

TLI(std::move(Arg.TLI)) {}

- ARMTTIImpl &operator=(const ARMTTIImpl &RHS) {

- BaseT::operator=(static_cast<const BaseT &>(RHS));

- ST = RHS.ST;

- TLI = RHS.TLI;

- return *this;

- }

- ARMTTIImpl &operator=(ARMTTIImpl &&RHS) {

- BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));

- ST = std::move(RHS.ST);

- TLI = std::move(RHS.TLI);

- return *this;

- }

/// \name Scalar TTI Implementations

/// @{

diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index c2db74619871..f8f0eb2d4baa 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp

@@ -189,9 +189,9 @@ class ARMAsmParser : public MCTargetAsmParser {

return getParser().Error(L, Msg, Ranges);

}

- bool validatetLDMRegList(MCInst Inst, const OperandVector &Operands,

+ bool validatetLDMRegList(const MCInst &Inst, const OperandVector &Operands,

unsigned ListNo, bool IsARPop = false);

- bool validatetSTMRegList(MCInst Inst, const OperandVector &Operands,

+ bool validatetSTMRegList(const MCInst &Inst, const OperandVector &Operands,

unsigned ListNo);

int tryParseRegister();

@@ -242,6 +242,8 @@ class ARMAsmParser : public MCTargetAsmParser {

bool &CanAcceptCarrySet,

bool &CanAcceptPredicationCode);

+ void tryConvertingToTwoOperandForm(StringRef Mnemonic, bool CarrySetting,

+ OperandVector &Operands);

bool isThumb() const {

// FIXME: Can tablegen auto-generate this?

return STI.getFeatureBits()[ARM::ModeThumb];

@@ -5465,6 +5467,92 @@ void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,

CanAcceptPredicationCode = true;

}

+// \brief Some Thumb instructions have two operand forms that are not

+// available as three operand, convert to two operand form if possible.

+//

+// FIXME: We would really like to be able to tablegen'erate this.

+void ARMAsmParser::tryConvertingToTwoOperandForm(StringRef Mnemonic,

+ bool CarrySetting,

+ OperandVector &Operands) {

+ if (Operands.size() != 6)

+ return;

+ const auto &Op3 = static_cast<ARMOperand &>(*Operands[3]);

+ auto &Op4 = static_cast<ARMOperand &>(*Operands[4]);

+ if (!Op3.isReg() || !Op4.isReg())

+ return;

+ auto Op3Reg = Op3.getReg();

+ auto Op4Reg = Op4.getReg();

+ // For most Thumb2 cases we just generate the 3 operand form and reduce

+ // it in processInstruction(), but the 3 operand form of ADD (t2ADDrr)

+ // won't accept SP or PC so we do the transformation here taking care

+ // with immediate range in the 'add sp, sp #imm' case.

+ auto &Op5 = static_cast<ARMOperand &>(*Operands[5]);

+ if (isThumbTwo()) {

+ if (Mnemonic != "add")

+ return;

+ bool TryTransform = Op3Reg == ARM::PC || Op4Reg == ARM::PC ||

+ (Op5.isReg() && Op5.getReg() == ARM::PC);

+ if (!TryTransform) {

+ TryTransform = (Op3Reg == ARM::SP || Op4Reg == ARM::SP ||

+ (Op5.isReg() && Op5.getReg() == ARM::SP)) &&

+ !(Op3Reg == ARM::SP && Op4Reg == ARM::SP &&

+ Op5.isImm() && !Op5.isImm0_508s4());

+ }

+ if (!TryTransform)

+ return;

+ } else if (!isThumbOne())

+ return;

+ if (!(Mnemonic == "add" || Mnemonic == "sub" || Mnemonic == "and" ||

+ Mnemonic == "eor" || Mnemonic == "lsl" || Mnemonic == "lsr" ||

+ Mnemonic == "asr" || Mnemonic == "adc" || Mnemonic == "sbc" ||

+ Mnemonic == "ror" || Mnemonic == "orr" || Mnemonic == "bic"))

+ return;

+ // If first 2 operands of a 3 operand instruction are the same

+ // then transform to 2 operand version of the same instruction

+ // e.g. 'adds r0, r0, #1' transforms to 'adds r0, #1'

+ bool Transform = Op3Reg == Op4Reg;

+ // For communtative operations, we might be able to transform if we swap

+ // Op4 and Op5. The 'ADD Rdm, SP, Rdm' form is already handled specially

+ // as tADDrsp.

+ const ARMOperand *LastOp = &Op5;

+ bool Swap = false;

+ if (!Transform && Op5.isReg() && Op3Reg == Op5.getReg() &&

+ ((Mnemonic == "add" && Op4Reg != ARM::SP) ||

+ Mnemonic == "and" || Mnemonic == "eor" ||

+ Mnemonic == "adc" || Mnemonic == "orr")) {

+ Swap = true;

+ LastOp = &Op4;

+ Transform = true;

+ }

+ // If both registers are the same then remove one of them from

+ // the operand list, with certain exceptions.

+ if (Transform) {

+ // Don't transform 'adds Rd, Rd, Rm' or 'sub{s} Rd, Rd, Rm' because the

+ // 2 operand forms don't exist.

+ if (((Mnemonic == "add" && CarrySetting) || Mnemonic == "sub") &&

+ LastOp->isReg())

+ Transform = false;

+ // Don't transform 'add/sub{s} Rd, Rd, #imm' if the immediate fits into

+ // 3-bits because the ARMARM says not to.

+ if ((Mnemonic == "add" || Mnemonic == "sub") && LastOp->isImm0_7())

+ Transform = false;

+ }

+ if (Transform) {

+ if (Swap)

+ std::swap(Op4, Op5);

+ Operands.erase(Operands.begin() + 3);

+ }

bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,

OperandVector &Operands) {

// FIXME: This is all horribly hacky. We really need a better way to deal

@@ -5838,6 +5926,8 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,

"VFP/Neon double precision register expected");

}

+ tryConvertingToTwoOperandForm(Mnemonic, CarrySetting, Operands);

// Some instructions, mostly Thumb, have forms for the same mnemonic that

// do and don't have a cc_out optional-def operand. With some spot-checks

// of the operand list, we can figure out which variant we're trying to

@@ -5901,48 +5991,6 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,

}

- // If first 2 operands of a 3 operand instruction are the same

- // then transform to 2 operand version of the same instruction

- // e.g. 'adds r0, r0, #1' transforms to 'adds r0, #1'

- // FIXME: We would really like to be able to tablegen'erate this.

- if (isThumbOne() && Operands.size() == 6 &&

- (Mnemonic == "add" || Mnemonic == "sub" || Mnemonic == "and" ||

- Mnemonic == "eor" || Mnemonic == "lsl" || Mnemonic == "lsr" ||

- Mnemonic == "asr" || Mnemonic == "adc" || Mnemonic == "sbc" ||

- Mnemonic == "ror" || Mnemonic == "orr" || Mnemonic == "bic")) {

- ARMOperand &Op3 = static_cast<ARMOperand &>(*Operands[3]);

- ARMOperand &Op4 = static_cast<ARMOperand &>(*Operands[4]);

- ARMOperand &Op5 = static_cast<ARMOperand &>(*Operands[5]);

- // If both registers are the same then remove one of them from

- // the operand list.

- if (Op3.isReg() && Op4.isReg() && Op3.getReg() == Op4.getReg()) {

- // If 3rd operand (variable Op5) is a register and the instruction is adds/sub

- // then do not transform as the backend already handles this instruction

- // correctly.

- if (!Op5.isReg() || !((Mnemonic == "add" && CarrySetting) || Mnemonic == "sub")) {

- Operands.erase(Operands.begin() + 3);

- if (Mnemonic == "add" && !CarrySetting) {

- // Special case for 'add' (not 'adds') instruction must

- // remove the CCOut operand as well.

- Operands.erase(Operands.begin() + 1);

- }

- // If instruction is 'add' and first two register operands

- // use SP register, then remove one of the SP registers from

- // the instruction.

- // FIXME: We would really like to be able to tablegen'erate this.

- if (isThumbOne() && Operands.size() == 5 && Mnemonic == "add" && !CarrySetting) {

- ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[2]);

- ARMOperand &Op3 = static_cast<ARMOperand &>(*Operands[3]);

- if (Op2.isReg() && Op3.isReg() && Op2.getReg() == ARM::SP && Op3.getReg() == ARM::SP) {

- Operands.erase(Operands.begin() + 2);

- }

// GNU Assembler extension (compatibility)

if ((Mnemonic == "ldrd" || Mnemonic == "strd")) {

ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[2]);

@@ -5985,8 +6033,9 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,

// return 'true' if register list contains non-low GPR registers,

// 'false' otherwise. If Reg is in the register list or is HiReg, set

// 'containsReg' to true.

-static bool checkLowRegisterList(MCInst Inst, unsigned OpNo, unsigned Reg,

- unsigned HiReg, bool &containsReg) {

+static bool checkLowRegisterList(const MCInst &Inst, unsigned OpNo,

+ unsigned Reg, unsigned HiReg,

+ bool &containsReg) {

containsReg = false;

for (unsigned i = OpNo; i < Inst.getNumOperands(); ++i) {

unsigned OpReg = Inst.getOperand(i).getReg();

@@ -6001,8 +6050,8 @@ static bool checkLowRegisterList(MCInst Inst, unsigned OpNo, unsigned Reg,

// Check if the specified regisgter is in the register list of the inst,

// starting at the indicated operand number.

-static bool listContainsReg(MCInst &Inst, unsigned OpNo, unsigned Reg) {

- for (unsigned i = OpNo; i < Inst.getNumOperands(); ++i) {

+static bool listContainsReg(const MCInst &Inst, unsigned OpNo, unsigned Reg) {

+ for (unsigned i = OpNo, e = Inst.getNumOperands(); i < e; ++i) {

unsigned OpReg = Inst.getOperand(i).getReg();

if (OpReg == Reg)

return true;

@@ -6020,7 +6069,7 @@ static bool instIsBreakpoint(const MCInst &Inst) {

}

-bool ARMAsmParser::validatetLDMRegList(MCInst Inst,

+bool ARMAsmParser::validatetLDMRegList(const MCInst &Inst,

const OperandVector &Operands,

unsigned ListNo, bool IsARPop) {

const ARMOperand &Op = static_cast<const ARMOperand &>(*Operands[ListNo]);

@@ -6043,7 +6092,7 @@ bool ARMAsmParser::validatetLDMRegList(MCInst Inst,

return false;

}

-bool ARMAsmParser::validatetSTMRegList(MCInst Inst,

+bool ARMAsmParser::validatetSTMRegList(const MCInst &Inst,

const OperandVector &Operands,

unsigned ListNo) {

const ARMOperand &Op = static_cast<const ARMOperand &>(*Operands[ListNo]);

@@ -8167,8 +8216,16 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,

// If the destination and first source operand are the same, and

// there's no setting of the flags, use encoding T2 instead of T3.

// Note that this is only for ADD, not SUB. This mirrors the system

- // 'as' behaviour. Make sure the wide encoding wasn't explicit.

- if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() ||

+ // 'as' behaviour. Also take advantage of ADD being commutative.

+ // Make sure the wide encoding wasn't explicit.

+ bool Swap = false;

+ auto DestReg = Inst.getOperand(0).getReg();

+ bool Transform = DestReg == Inst.getOperand(1).getReg();

+ if (!Transform && DestReg == Inst.getOperand(2).getReg()) {

+ Transform = true;

+ Swap = true;

+ }

+ if (!Transform ||

Inst.getOperand(5).getReg() != 0 ||

(static_cast<ARMOperand &>(*Operands[3]).isToken() &&

static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w"))

@@ -8177,7 +8234,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,

TmpInst.setOpcode(ARM::tADDhirr);

TmpInst.addOperand(Inst.getOperand(0));

- TmpInst.addOperand(Inst.getOperand(2));

+ TmpInst.addOperand(Inst.getOperand(Swap ? 1 : 2));

TmpInst.addOperand(Inst.getOperand(3));

TmpInst.addOperand(Inst.getOperand(4));

Inst = TmpInst;

@@ -9176,8 +9233,7 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) {

return false;

}

- STI.InitMCProcessorInfo(CPU, "");

- STI.InitCPUSchedModel(CPU);

+ STI.setDefaultFeatures(CPU);

setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));

return false;

diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 4d12bfb5d60f..d17fdb95dbdf 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp

@@ -1362,7 +1362,7 @@ MCTargetStreamer *createARMNullTargetStreamer(MCStreamer &S) {

MCTargetStreamer *createARMObjectTargetStreamer(MCStreamer &S,

const MCSubtargetInfo &STI) {

const Triple &TT = STI.getTargetTriple();

- if (TT.getObjectFormat() == Triple::ELF)

+ if (TT.isOSBinFormatELF())

return new ARMTargetELFStreamer(S);

return new ARMTargetStreamer(S);

}

diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index fafe25ae5be5..21c9fc1e58b2 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp

@@ -31,7 +31,7 @@ using namespace llvm;

#define GET_REGINFO_MC_DESC

#include "ARMGenRegisterInfo.inc"

-static bool getMCRDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,

+static bool getMCRDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,

std::string &Info) {

if (STI.getFeatureBits()[llvm::ARM::HasV7Ops] &&

(MI.getOperand(0).isImm() && MI.getOperand(0).getImm() == 15) &&

@@ -63,7 +63,7 @@ static bool getMCRDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,

return false;

}

-static bool getITDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,

+static bool getITDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,

std::string &Info) {

if (STI.getFeatureBits()[llvm::ARM::HasV8Ops] && MI.getOperand(1).isImm() &&

MI.getOperand(1).getImm() != 8) {

@@ -75,7 +75,7 @@ static bool getITDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,

return false;

}

-static bool getARMStoreDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,

+static bool getARMStoreDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,

std::string &Info) {

assert(!STI.getFeatureBits()[llvm::ARM::ModeThumb] &&

"cannot predicate thumb instructions");

@@ -92,7 +92,7 @@ static bool getARMStoreDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,

return false;

}

-static bool getARMLoadDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,

+static bool getARMLoadDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,

std::string &Info) {

assert(!STI.getFeatureBits()[llvm::ARM::ModeThumb] &&

"cannot predicate thumb instructions");

@@ -257,9 +257,7 @@ MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(const Triple &TT,

ArchFS = FS;

}

- MCSubtargetInfo *X = new MCSubtargetInfo();

- InitARMMCSubtargetInfo(X, TT, CPU, ArchFS);

- return X;

+ return createARMMCSubtargetInfoImpl(TT, CPU, ArchFS);

}

static MCInstrInfo *createARMMCInstrInfo() {

@@ -268,7 +266,7 @@ static MCInstrInfo *createARMMCInstrInfo() {

return X;

}

-static MCRegisterInfo *createARMMCRegisterInfo(StringRef Triple) {

+static MCRegisterInfo *createARMMCRegisterInfo(const Triple &Triple) {

MCRegisterInfo *X = new MCRegisterInfo();

InitARMMCRegisterInfo(X, ARM::LR, 0, 0, ARM::PC);

return X;

@@ -279,10 +277,10 @@ static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI,

MCAsmInfo *MAI;

if (TheTriple.isOSDarwin() || TheTriple.isOSBinFormatMachO())

MAI = new ARMMCAsmInfoDarwin(TheTriple);

- else if (TheTriple.isWindowsItaniumEnvironment())

- MAI = new ARMCOFFMCAsmInfoGNU();

else if (TheTriple.isWindowsMSVCEnvironment())

MAI = new ARMCOFFMCAsmInfoMicrosoft();

+ else if (TheTriple.isOSWindows())

+ MAI = new ARMCOFFMCAsmInfoGNU();

else

MAI = new ARMELFMCAsmInfo(TheTriple);

@@ -292,14 +290,13 @@ static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI,

return MAI;

}

-static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM,

+static MCCodeGenInfo *createARMMCCodeGenInfo(const Triple &TT, Reloc::Model RM,

CodeModel::Model CM,

CodeGenOpt::Level OL) {

MCCodeGenInfo *X = new MCCodeGenInfo();

if (RM == Reloc::Default) {

- Triple TheTriple(TT);

// Default relocation model on Darwin is PIC, not DynamicNoPIC.

- RM = TheTriple.isOSDarwin() ? Reloc::PIC_ : Reloc::DynamicNoPIC;

+ RM = TT.isOSDarwin() ? Reloc::PIC_ : Reloc::DynamicNoPIC;

}

X->initMCCodeGenInfo(RM, CM, OL);

return X;

diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index 77cd890e4cad..3b4358b5d9bf 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp

@@ -365,7 +365,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,

// frame pointer stack slot, the target is ELF and the function has FP, or

// the target uses var sized objects.

if (NumBytes) {

- assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) &&

+ assert(!MFI->getPristineRegs(MF).test(ARM::R4) &&

"No scratch register to restore SP from FP!");

emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,

TII, *RegInfo);