aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/ARM
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2015-08-07 23:01:33 +0000
committerDimitry Andric <dim@FreeBSD.org>2015-08-07 23:01:33 +0000
commitee8648bdac07986a0f1ec897b02ec82a2f144d46 (patch)
tree52d1861acda1205241ee35a94aa63129c604d469 /lib/Target/ARM
parent1a82d4c088707c791c792f6822f611b47a12bdfe (diff)
downloadsrc-ee8648bdac07986a0f1ec897b02ec82a2f144d46.tar.gz
src-ee8648bdac07986a0f1ec897b02ec82a2f144d46.zip
Vendor import of llvm trunk r242221:vendor/llvm/llvm-trunk-r242221
Notes
Notes: svn path=/vendor/llvm/dist/; revision=286425 svn path=/vendor/llvm/llvm-trunk-r242221/; revision=286426; tag=vendor/llvm/llvm-trunk-r242221
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r--lib/Target/ARM/ARM.td4
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp3
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp20
-rw-r--r--lib/Target/ARM/ARMCallingConv.td3
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp45
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp43
-rw-r--r--lib/Target/ARM/ARMFrameLowering.h4
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp47
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp328
-rw-r--r--lib/Target/ARM/ARMISelLowering.h20
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td4
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp1453
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.cpp22
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.h2
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp38
-rw-r--r--lib/Target/ARM/ARMSubtarget.h4
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp7
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp24
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.h15
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp166
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp23
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp2
23 files changed, 1209 insertions, 1070 deletions
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 96b4742da2bb..ef609a66d032 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -150,6 +150,10 @@ def FeatureAClass : SubtargetFeature<"aclass", "ARMProcClass", "AClass",
def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true",
"NaCl trap">;
+def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true",
+ "Generate calls via indirect call "
+ "instructions">;
+
// ARM ISAs.
def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true",
"Support ARM v4T instructions">;
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index b1a11d626bda..9f43e732bd73 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1230,8 +1230,7 @@ ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
Reloc::Model RM = MF.getTarget().getRelocationModel();
if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
- assert(getSubtarget().getTargetTriple().getObjectFormat() ==
- Triple::MachO &&
+ assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() &&
"LOAD_STACK_GUARD currently supported only for MachO.");
expandLoadStackGuard(MI, RM);
MI->getParent()->erase(MI);
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 3f79a9b53d70..e7d5be7753e4 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -127,7 +127,7 @@ ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
BitVector ARMBaseRegisterInfo::
getReservedRegs(const MachineFunction &MF) const {
const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
- const TargetFrameLowering *TFI = STI.getFrameLowering();
+ const ARMFrameLowering *TFI = getFrameLowering(MF);
// FIXME: avoid re-calculating this every time.
BitVector Reserved(getNumRegs());
@@ -194,7 +194,7 @@ unsigned
ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
- const TargetFrameLowering *TFI = STI.getFrameLowering();
+ const ARMFrameLowering *TFI = getFrameLowering(MF);
switch (RC->getID()) {
default:
@@ -302,7 +302,7 @@ ARMBaseRegisterInfo::updateRegAllocHint(unsigned Reg, unsigned NewReg,
bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const ARMFrameLowering *TFI = getFrameLowering(MF);
// When outgoing call frames are so large that we adjust the stack pointer
// around the call, we can no longer use the stack pointer to reach the
@@ -333,6 +333,7 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
const MachineRegisterInfo *MRI = &MF.getRegInfo();
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const ARMFrameLowering *TFI = getFrameLowering(MF);
// We can't realign the stack if:
// 1. Dynamic stack realignment is explicitly disabled,
// 2. This is a Thumb1 function (it's not useful, so we don't bother), or
@@ -347,7 +348,7 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
return false;
// We may also need a base pointer if there are dynamic allocas or stack
// pointer adjustments around calls.
- if (MF.getSubtarget().getFrameLowering()->hasReservedCallFrame(MF))
+ if (TFI->hasReservedCallFrame(MF))
return true;
// A base pointer is required and allowed. Check that it isn't too late to
// reserve it.
@@ -357,9 +358,9 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
bool ARMBaseRegisterInfo::
needsStackRealignment(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const ARMFrameLowering *TFI = getFrameLowering(MF);
const Function *F = MF.getFunction();
- unsigned StackAlign =
- MF.getSubtarget().getFrameLowering()->getStackAlignment();
+ unsigned StackAlign = TFI->getStackAlignment();
bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
F->hasFnAttribute(Attribute::StackAlignment));
@@ -378,7 +379,7 @@ cannotEliminateFrame(const MachineFunction &MF) const {
unsigned
ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
- const TargetFrameLowering *TFI = STI.getFrameLowering();
+ const ARMFrameLowering *TFI = getFrameLowering(MF);
if (TFI->hasFP(MF))
return getFramePointerReg(STI);
@@ -517,7 +518,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
// Note that the incoming offset is based on the SP value at function entry,
// so it'll be negative.
MachineFunction &MF = *MI->getParent()->getParent();
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const ARMFrameLowering *TFI = getFrameLowering(MF);
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -694,8 +695,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineFunction &MF = *MBB.getParent();
const ARMBaseInstrInfo &TII =
*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
- const ARMFrameLowering *TFI = static_cast<const ARMFrameLowering *>(
- MF.getSubtarget().getFrameLowering());
+ const ARMFrameLowering *TFI = getFrameLowering(MF);
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
assert(!AFI->isThumb1OnlyFunction() &&
"This eliminateFrameIndex does not support Thumb1!");
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index 7dd21ecbe91b..27cf06b995a0 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -142,6 +142,9 @@ def CC_ARM_AAPCS : CallingConv<[
// Handles byval parameters.
CCIfByVal<CCPassByVal<4, 4>>,
+ // The 'nest' parameter, if any, is passed in R12.
+ CCIfNest<CCAssignToReg<[R12]>>,
+
// Handle all vector types as either f64 or v2f64.
CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 4175b4af86e6..fdd0763ea608 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -49,8 +49,6 @@
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
-extern cl::opt<bool> EnableARMLongCalls;
-
namespace {
// All possible address modes, plus some.
@@ -685,7 +683,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
}
unsigned ARMFastISel::fastMaterializeConstant(const Constant *C) {
- EVT CEVT = TLI.getValueType(C->getType(), true);
+ EVT CEVT = TLI.getValueType(DL, C->getType(), true);
// Only handle simple types.
if (!CEVT.isSimple()) return 0;
@@ -732,7 +730,7 @@ unsigned ARMFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
}
bool ARMFastISel::isTypeLegal(Type *Ty, MVT &VT) {
- EVT evt = TLI.getValueType(Ty, true);
+ EVT evt = TLI.getValueType(DL, Ty, true);
// Only handle simple types.
if (evt == MVT::Other || !evt.isSimple()) return false;
@@ -786,12 +784,13 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
return ARMComputeAddress(U->getOperand(0), Addr);
case Instruction::IntToPtr:
// Look past no-op inttoptrs.
- if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
+ TLI.getPointerTy(DL))
return ARMComputeAddress(U->getOperand(0), Addr);
break;
case Instruction::PtrToInt:
// Look past no-op ptrtoints.
- if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
return ARMComputeAddress(U->getOperand(0), Addr);
break;
case Instruction::GetElementPtr: {
@@ -1365,7 +1364,7 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
bool isZExt) {
Type *Ty = Src1Value->getType();
- EVT SrcEVT = TLI.getValueType(Ty, true);
+ EVT SrcEVT = TLI.getValueType(DL, Ty, true);
if (!SrcEVT.isSimple()) return false;
MVT SrcVT = SrcEVT.getSimpleVT();
@@ -1557,7 +1556,7 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
return false;
Value *Src = I->getOperand(0);
- EVT SrcEVT = TLI.getValueType(Src->getType(), true);
+ EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true);
if (!SrcEVT.isSimple())
return false;
MVT SrcVT = SrcEVT.getSimpleVT();
@@ -1750,7 +1749,7 @@ bool ARMFastISel::SelectRem(const Instruction *I, bool isSigned) {
}
bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
- EVT DestVT = TLI.getValueType(I->getType(), true);
+ EVT DestVT = TLI.getValueType(DL, I->getType(), true);
// We can get here in the case when we have a binary operation on a non-legal
// type and the target independent selector doesn't know how to handle it.
@@ -1790,7 +1789,7 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
}
bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
- EVT FPVT = TLI.getValueType(I->getType(), true);
+ EVT FPVT = TLI.getValueType(DL, I->getType(), true);
if (!FPVT.isSimple()) return false;
MVT VT = FPVT.getSimpleVT();
@@ -2095,7 +2094,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
CallingConv::ID CC = F.getCallingConv();
if (Ret->getNumOperands() > 0) {
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
+ GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
@@ -2122,7 +2121,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
return false;
unsigned SrcReg = Reg + VA.getValNo();
- EVT RVEVT = TLI.getValueType(RV->getType());
+ EVT RVEVT = TLI.getValueType(DL, RV->getType());
if (!RVEVT.isSimple()) return false;
MVT RVVT = RVEVT.getSimpleVT();
MVT DestVT = VA.getValVT();
@@ -2173,7 +2172,7 @@ unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) {
unsigned ARMFastISel::getLibcallReg(const Twine &Name) {
// Manually compute the global's type to avoid building it when unnecessary.
Type *GVTy = Type::getInt32PtrTy(*Context, /*AS=*/0);
- EVT LCREVT = TLI.getValueType(GVTy);
+ EVT LCREVT = TLI.getValueType(DL, GVTy);
if (!LCREVT.isSimple()) return 0;
GlobalValue *GV = new GlobalVariable(M, Type::getInt32Ty(*Context), false,
@@ -2246,19 +2245,19 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
return false;
unsigned CalleeReg = 0;
- if (EnableARMLongCalls) {
+ if (Subtarget->genLongCalls()) {
CalleeReg = getLibcallReg(TLI.getLibcallName(Call));
if (CalleeReg == 0) return false;
}
// Issue the call.
- unsigned CallOpc = ARMSelectCallOp(EnableARMLongCalls);
+ unsigned CallOpc = ARMSelectCallOp(Subtarget->genLongCalls());
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
DbgLoc, TII.get(CallOpc));
// BL / BLX don't take a predicate, but tBL / tBLX do.
if (isThumb2)
AddDefaultPred(MIB);
- if (EnableARMLongCalls)
+ if (Subtarget->genLongCalls())
MIB.addReg(CalleeReg);
else
MIB.addExternalSymbol(TLI.getLibcallName(Call));
@@ -2380,7 +2379,7 @@ bool ARMFastISel::SelectCall(const Instruction *I,
bool UseReg = false;
const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
- if (!GV || EnableARMLongCalls) UseReg = true;
+ if (!GV || Subtarget->genLongCalls()) UseReg = true;
unsigned CalleeReg = 0;
if (UseReg) {
@@ -2576,8 +2575,8 @@ bool ARMFastISel::SelectTrunc(const Instruction *I) {
Value *Op = I->getOperand(0);
EVT SrcVT, DestVT;
- SrcVT = TLI.getValueType(Op->getType(), true);
- DestVT = TLI.getValueType(I->getType(), true);
+ SrcVT = TLI.getValueType(DL, Op->getType(), true);
+ DestVT = TLI.getValueType(DL, I->getType(), true);
if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
return false;
@@ -2742,8 +2741,8 @@ bool ARMFastISel::SelectIntExt(const Instruction *I) {
if (!SrcReg) return false;
EVT SrcEVT, DestEVT;
- SrcEVT = TLI.getValueType(SrcTy, true);
- DestEVT = TLI.getValueType(DestTy, true);
+ SrcEVT = TLI.getValueType(DL, SrcTy, true);
+ DestEVT = TLI.getValueType(DL, DestTy, true);
if (!SrcEVT.isSimple()) return false;
if (!DestEVT.isSimple()) return false;
@@ -2763,7 +2762,7 @@ bool ARMFastISel::SelectShift(const Instruction *I,
return false;
// Only handle i32 now.
- EVT DestVT = TLI.getValueType(I->getType(), true);
+ EVT DestVT = TLI.getValueType(DL, I->getType(), true);
if (DestVT != MVT::i32)
return false;
@@ -3026,7 +3025,7 @@ bool ARMFastISel::fastLowerArguments() {
if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
return false;
- EVT ArgVT = TLI.getValueType(ArgTy);
+ EVT ArgVT = TLI.getValueType(DL, ArgTy);
if (!ArgVT.isSimple()) return false;
switch (ArgVT.getSimpleVT().SimpleTy) {
case MVT::i8:
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index a52e49780e27..6744000afe2b 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -800,7 +800,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
// This is bad, if an interrupt is taken after the mov, sp is in an
// inconsistent state.
// Use the first callee-saved register as a scratch register.
- assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) &&
+ assert(!MFI->getPristineRegs(MF).test(ARM::R4) &&
"No scratch register to restore SP from FP!");
emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
ARMCC::AL, 0, TII);
@@ -1470,7 +1470,8 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
// callee-saved vector registers after realigning the stack. The vst1 and vld1
// instructions take alignment hints that can improve performance.
//
-static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) {
+static void
+checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) {
MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
if (!SpillAlignedNEONRegs)
return;
@@ -1497,10 +1498,9 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) {
// callee-saved registers in order, but it can happen that there are holes in
// the range. Registers above the hole will be spilled to the standard DPRCS
// area.
- MachineRegisterInfo &MRI = MF.getRegInfo();
unsigned NumSpills = 0;
for (; NumSpills < 8; ++NumSpills)
- if (!MRI.isPhysRegUsed(ARM::D8 + NumSpills))
+ if (!SavedRegs.test(ARM::D8 + NumSpills))
break;
// Don't do this for just one d-register. It's not worth it.
@@ -1511,12 +1511,13 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) {
MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
// A scratch register is required for the vst1 / vld1 instructions.
- MF.getRegInfo().setPhysRegUsed(ARM::R4);
+ SavedRegs.set(ARM::R4);
}
-void
-ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const {
+void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
+ BitVector &SavedRegs,
+ RegScavenger *RS) const {
+ TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
// This tells PEI to spill the FP as if it is any other callee-save register
// to take advantage the eliminateFrameIndex machinery. This also ensures it
// is spilled in the order specified by getCalleeSavedRegs() to make it easier
@@ -1543,12 +1544,12 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// FIXME: It will be better just to find spare register here.
if (AFI->isThumb2Function() &&
(MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))
- MRI.setPhysRegUsed(ARM::R4);
+ SavedRegs.set(ARM::R4);
if (AFI->isThumb1OnlyFunction()) {
// Spill LR if Thumb1 function uses variable length argument lists.
if (AFI->getArgRegsSaveSize() > 0)
- MRI.setPhysRegUsed(ARM::LR);
+ SavedRegs.set(ARM::LR);
// Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know
// for sure what the stack size will be, but for this, an estimate is good
@@ -1558,23 +1559,23 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// FIXME: It will be better just to find spare register here.
unsigned StackSize = MFI->estimateStackSize(MF);
if (MFI->hasVarSizedObjects() || StackSize > 508)
- MRI.setPhysRegUsed(ARM::R4);
+ SavedRegs.set(ARM::R4);
}
// See if we can spill vector registers to aligned stack.
- checkNumAlignedDPRCS2Regs(MF);
+ checkNumAlignedDPRCS2Regs(MF, SavedRegs);
// Spill the BasePtr if it's used.
if (RegInfo->hasBasePointer(MF))
- MRI.setPhysRegUsed(RegInfo->getBaseRegister());
+ SavedRegs.set(RegInfo->getBaseRegister());
// Don't spill FP if the frame can be eliminated. This is determined
- // by scanning the callee-save registers to see if any is used.
+ // by scanning the callee-save registers to see if any is modified.
const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
for (unsigned i = 0; CSRegs[i]; ++i) {
unsigned Reg = CSRegs[i];
bool Spilled = false;
- if (MRI.isPhysRegUsed(Reg)) {
+ if (SavedRegs.test(Reg)) {
Spilled = true;
CanEliminateFrame = false;
}
@@ -1668,7 +1669,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
// Spill LR as well so we can fold BX_RET to the registers restore (LDM).
if (!LRSpilled && CS1Spilled) {
- MRI.setPhysRegUsed(ARM::LR);
+ SavedRegs.set(ARM::LR);
NumGPRSpills++;
SmallVectorImpl<unsigned>::iterator LRPos;
LRPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(),
@@ -1681,7 +1682,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
}
if (hasFP(MF)) {
- MRI.setPhysRegUsed(FramePtr);
+ SavedRegs.set(FramePtr);
auto FPPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(),
FramePtr);
if (FPPos != UnspilledCS1GPRs.end())
@@ -1700,7 +1701,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// Don't spill high register if the function is thumb
if (!AFI->isThumbFunction() ||
isARMLowRegister(Reg) || Reg == ARM::LR) {
- MRI.setPhysRegUsed(Reg);
+ SavedRegs.set(Reg);
if (!MRI.isReserved(Reg))
ExtraCSSpill = true;
break;
@@ -1708,7 +1709,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
}
} else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
unsigned Reg = UnspilledCS2GPRs.front();
- MRI.setPhysRegUsed(Reg);
+ SavedRegs.set(Reg);
if (!MRI.isReserved(Reg))
ExtraCSSpill = true;
}
@@ -1747,7 +1748,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
}
if (Extras.size() && NumExtras == 0) {
for (unsigned i = 0, e = Extras.size(); i != e; ++i) {
- MRI.setPhysRegUsed(Extras[i]);
+ SavedRegs.set(Extras[i]);
}
} else if (!AFI->isThumb1OnlyFunction()) {
// note: Thumb1 functions spill to R12, not the stack. Reserve a slot
@@ -1761,7 +1762,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
}
if (ForceLRSpill) {
- MRI.setPhysRegUsed(ARM::LR);
+ SavedRegs.set(ARM::LR);
AFI->setLRIsSpilledForFarJump(true);
}
}
diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h
index d763d17a506f..6fdc5eff5e47 100644
--- a/lib/Target/ARM/ARMFrameLowering.h
+++ b/lib/Target/ARM/ARMFrameLowering.h
@@ -54,8 +54,8 @@ public:
unsigned &FrameReg, int SPAdj) const;
int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
- void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const override;
+ void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+ RegScavenger *RS) const override;
void adjustForSegmentedStacks(MachineFunction &MF,
MachineBasicBlock &MBB) const override;
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 50afb192b331..b110628a0a86 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -533,7 +533,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
if (N.getOpcode() == ISD::FrameIndex) {
// Match frame index.
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
return true;
}
@@ -556,7 +557,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
}
OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
return true;
@@ -702,7 +704,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
Base = N;
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
} else if (N.getOpcode() == ARMISD::Wrapper &&
N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
Base = N.getOperand(0);
@@ -722,7 +725,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
}
Offset = CurDAG->getRegister(0, MVT::i32);
@@ -900,7 +904,8 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
Base = N;
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
}
Offset = CurDAG->getRegister(0, MVT::i32);
Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
@@ -915,7 +920,8 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
}
Offset = CurDAG->getRegister(0, MVT::i32);
@@ -964,7 +970,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
Base = N;
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
} else if (N.getOpcode() == ARMISD::Wrapper &&
N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
Base = N.getOperand(0);
@@ -981,7 +988,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
}
ARM_AM::AddrOpc AddSub = ARM_AM::add;
@@ -1215,7 +1223,8 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
MachineFrameInfo *MFI = MF->getFrameInfo();
if (MFI->getObjectAlignment(FI) < 4)
MFI->setObjectAlignment(FI, 4);
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
return true;
}
@@ -1237,7 +1246,8 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
MachineFrameInfo *MFI = MF->getFrameInfo();
if (MFI->getObjectAlignment(FI) < 4)
MFI->setObjectAlignment(FI, 4);
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
}
OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
return true;
@@ -1285,7 +1295,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
if (N.getOpcode() == ISD::FrameIndex) {
// Match frame index.
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
return true;
}
@@ -1314,7 +1325,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
}
OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
return true;
@@ -1343,7 +1355,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
}
OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
return true;
@@ -1438,7 +1451,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
}
OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
@@ -2510,7 +2524,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
if (UseCP) {
SDValue CPIdx = CurDAG->getTargetConstantPool(
ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
- TLI->getPointerTy());
+ TLI->getPointerTy(CurDAG->getDataLayout()));
SDNode *ResNode;
if (Subtarget->isThumb()) {
@@ -2540,7 +2554,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case ISD::FrameIndex: {
// Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ SDValue TFI = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
if (Subtarget->isThumb1Only()) {
// Set the alignment of the frame object to 4, to avoid having to generate
// more than one ADD
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 4b2105b7442f..e335784f6d87 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -60,11 +60,6 @@ STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
-cl::opt<bool>
-EnableARMLongCalls("arm-long-calls", cl::Hidden,
- cl::desc("Generate calls via indirect call instructions"),
- cl::init(false));
-
static cl::opt<bool>
ARMInterworking("arm-interworking", cl::Hidden,
cl::desc("Enable / disable ARM interworking (for debugging only)"),
@@ -548,6 +543,27 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTPOP, MVT::v4i16, Custom);
setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);
+ // NEON does not have single instruction CTTZ for vectors.
+ setOperationAction(ISD::CTTZ, MVT::v8i8, Custom);
+ setOperationAction(ISD::CTTZ, MVT::v4i16, Custom);
+ setOperationAction(ISD::CTTZ, MVT::v2i32, Custom);
+ setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
+
+ setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
+ setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
+ setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
+ setOperationAction(ISD::CTTZ, MVT::v2i64, Custom);
+
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i8, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i16, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i32, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v1i64, Custom);
+
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);
+
// NEON only has FMA instructions as of VFP4.
if (!Subtarget->hasVFP4()) {
setOperationAction(ISD::FMA, MVT::v2f32, Expand);
@@ -1149,8 +1165,10 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
return nullptr;
}
-EVT ARMTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
- if (!VT.isVector()) return getPointerTy();
+EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
+ EVT VT) const {
+ if (!VT.isVector())
+ return getPointerTy(DL);
return VT.changeVectorElementTypeToInteger();
}
@@ -1429,7 +1447,8 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
ISD::ArgFlagsTy Flags) const {
unsigned LocMemOffset = VA.getLocMemOffset();
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
- PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+ PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
+ StackPtr, PtrOff);
return DAG.getStore(Chain, dl, Arg, PtrOff,
MachinePointerInfo::getStack(LocMemOffset),
false, false, 0);
@@ -1453,7 +1472,8 @@ void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG,
else {
assert(NextVA.isMemLoc());
if (!StackPtr.getNode())
- StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
+ StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
+ getPointerTy(DAG.getDataLayout()));
MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
dl, DAG, NextVA,
@@ -1526,7 +1546,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Chain = DAG.getCALLSEQ_START(Chain,
DAG.getIntPtrConstant(NumBytes, dl, true), dl);
- SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
+ SDValue StackPtr =
+ DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
RegsToPassVector RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
@@ -1607,7 +1628,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
unsigned RegBegin, RegEnd;
CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT =
+ DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
unsigned int i, j;
for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
@@ -1628,12 +1650,12 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
if (Flags.getByValSize() > 4*offset) {
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
unsigned LocMemOffset = VA.getLocMemOffset();
SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
- SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
- StkPtrOff);
+ SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff);
SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
- SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
+ SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
MVT::i32);
SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl,
@@ -1693,8 +1715,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool isARMFunc = false;
bool isLocalARMFunc = false;
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ auto PtrVt = getPointerTy(DAG.getDataLayout());
- if (EnableARMLongCalls) {
+ if (Subtarget->genLongCalls()) {
assert((Subtarget->isTargetWindows() ||
getTargetMachine().getRelocationModel() == Reloc::Static) &&
"long-calls with non-static relocation model!");
@@ -1709,12 +1732,11 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
// Get the address of the callee into a register
- SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- Callee = DAG.getLoad(getPointerTy(), dl,
- DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(), false, false,
+ false, 0);
} else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
const char *Sym = S->getSymbol();
@@ -1724,29 +1746,28 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
ARMPCLabelIndex, 0);
// Get the address of the callee into a register
- SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- Callee = DAG.getLoad(getPointerTy(), dl,
- DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(), false, false,
+ false, 0);
}
} else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
const GlobalValue *GV = G->getGlobal();
isDirect = true;
- bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
- bool isStub = (isExt && Subtarget->isTargetMachO()) &&
+ bool isDef = GV->isStrongDefinitionForLinker();
+ bool isStub = (!isDef && Subtarget->isTargetMachO()) &&
getTargetMachine().getRelocationModel() != Reloc::Static;
isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
// ARM call to a local ARM function is predicable.
- isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
+ isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
// tBX takes a register source operand.
if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
- Callee = DAG.getNode(ARMISD::WrapperPIC, dl, getPointerTy(),
- DAG.getTargetGlobalAddress(GV, dl, getPointerTy(),
- 0, ARMII::MO_NONLAZY));
- Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee,
+ Callee = DAG.getNode(
+ ARMISD::WrapperPIC, dl, PtrVt,
+ DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
+ Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), Callee,
MachinePointerInfo::getGOT(), false, false, true, 0);
} else if (Subtarget->isTargetCOFF()) {
assert(Subtarget->isTargetWindows() &&
@@ -1754,20 +1775,20 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
unsigned TargetFlags = GV->hasDLLImportStorageClass()
? ARMII::MO_DLLIMPORT
: ARMII::MO_NO_FLAG;
- Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), /*Offset=*/0,
- TargetFlags);
+ Callee =
+ DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0, TargetFlags);
if (GV->hasDLLImportStorageClass())
- Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
- DAG.getNode(ARMISD::Wrapper, dl, getPointerTy(),
- Callee), MachinePointerInfo::getGOT(),
- false, false, false, 0);
+ Callee =
+ DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
+ DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
+ MachinePointerInfo::getGOT(), false, false, false, 0);
} else {
// On ELF targets for PIC code, direct calls should go through the PLT
unsigned OpFlags = 0;
if (Subtarget->isTargetELF() &&
getTargetMachine().getRelocationModel() == Reloc::PIC_)
OpFlags = ARMII::MO_PLT;
- Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
+ Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, OpFlags);
}
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
isDirect = true;
@@ -1781,22 +1802,20 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
ARMConstantPoolValue *CPV =
ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
ARMPCLabelIndex, 4);
- SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- Callee = DAG.getLoad(getPointerTy(), dl,
- DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(), false, false,
+ false, 0);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
- Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
- getPointerTy(), Callee, PICLabel);
+ Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
} else {
unsigned OpFlags = 0;
// On ELF targets for PIC code, direct calls should go through the PLT
if (Subtarget->isTargetELF() &&
getTargetMachine().getRelocationModel() == Reloc::PIC_)
OpFlags = ARMII::MO_PLT;
- Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags);
+ Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, OpFlags);
}
}
@@ -2433,7 +2452,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned ARMPCLabelIndex = 0;
SDLoc DL(Op);
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
Reloc::Model RelocM = getTargetMachine().getRelocationModel();
SDValue CPAddr;
@@ -2462,7 +2481,7 @@ SDValue
ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
SelectionDAG &DAG) const {
SDLoc dl(GA);
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -2508,7 +2527,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
SDLoc dl(GA);
SDValue Offset;
SDValue Chain = DAG.getEntryNode();
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
// Get the Thread Pointer
SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
@@ -2574,7 +2593,7 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
SelectionDAG &DAG) const {
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc dl(Op);
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
@@ -2617,7 +2636,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
SelectionDAG &DAG) const {
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc dl(Op);
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
Reloc::Model RelocM = getTargetMachine().getRelocationModel();
@@ -2648,7 +2667,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
const ARMII::TOF TargetFlags =
(GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG);
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Result;
SDLoc DL(Op);
@@ -2672,7 +2691,7 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc dl(Op);
unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
ARMConstantPoolValue *CPV =
@@ -2716,14 +2735,14 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(1));
}
case Intrinsic::arm_thread_pointer: {
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
}
case Intrinsic::eh_sjlj_lsda: {
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
Reloc::Model RelocM = getTargetMachine().getRelocationModel();
SDValue CPAddr;
unsigned PCAdj = (RelocM != Reloc::PIC_)
@@ -2820,7 +2839,7 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
SDLoc dl(Op);
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
@@ -2850,7 +2869,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true);
// Create load node to retrieve arguments from the stack.
- SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
MachinePointerInfo::getFixedStack(FI),
false, false, false, 0);
@@ -2904,8 +2923,9 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
if (REnd != RBegin)
ArgOffset = -4 * (ARM::R4 - RBegin);
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
int FrameIndex = MFI->CreateFixedObject(ArgSize, ArgOffset, false);
- SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());
+ SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
SmallVector<SDValue, 4> MemOps;
const TargetRegisterClass *RC =
@@ -2918,8 +2938,7 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
DAG.getStore(Val.getValue(1), dl, Val, FIN,
MachinePointerInfo(OrigArg, 4 * i), false, false, 0);
MemOps.push_back(Store);
- FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
- DAG.getConstant(4, dl, getPointerTy()));
+ FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
}
if (!MemOps.empty())
@@ -3013,6 +3032,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
@@ -3035,7 +3055,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
SDValue ArgValue2;
if (VA.isMemLoc()) {
int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
- SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
MachinePointerInfo::getFixedStack(FI),
false, false, false, 0);
@@ -3122,7 +3142,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, CurOrigArg,
CurByValIndex, VA.getLocMemOffset(),
Flags.getByValSize());
- InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy()));
+ InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
CCInfo.nextInRegsParam();
} else {
unsigned FIOffset = VA.getLocMemOffset();
@@ -3130,7 +3150,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
FIOffset, true);
// Create load nodes to retrieve arguments from the stack.
- SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
MachinePointerInfo::getFixedStack(FI),
false, false, false, 0));
@@ -3855,7 +3875,7 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
SDValue Index = Op.getOperand(2);
SDLoc dl(Op);
- EVT PTy = getPointerTy();
+ EVT PTy = getPointerTy(DAG.getDataLayout());
JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
@@ -4102,8 +4122,8 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
-unsigned ARMTargetLowering::getRegisterByName(const char* RegName,
- EVT VT) const {
+unsigned ARMTargetLowering::getRegisterByName(const char* RegName, EVT VT,
+ SelectionDAG &DAG) const {
unsigned Reg = StringSwitch<unsigned>(RegName)
.Case("sp", ARM::SP)
.Default(0);
@@ -4163,7 +4183,7 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
// Turn f64->i64 into VMOVRRD.
if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
SDValue Cvt;
- if (TLI.isBigEndian() && SrcVT.isVector() &&
+ if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() &&
SrcVT.getVectorNumElements() > 1)
Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32),
@@ -4283,8 +4303,82 @@ SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
- EVT VT = N->getValueType(0);
SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ if (VT.isVector()) {
+ assert(ST->hasNEON());
+
+ // Compute the least significant set bit: LSB = X & -X
+ SDValue X = N->getOperand(0);
+ SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X);
+ SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX);
+
+ EVT ElemTy = VT.getVectorElementType();
+
+ if (ElemTy == MVT::i8) {
+ // Compute with: cttz(x) = ctpop(lsb - 1)
+ SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
+ DAG.getTargetConstant(1, dl, ElemTy));
+ SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
+ return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
+ }
+
+ if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) &&
+ (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {
+ // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0
+ unsigned NumBits = ElemTy.getSizeInBits();
+ SDValue WidthMinus1 =
+ DAG.getNode(ARMISD::VMOVIMM, dl, VT,
+ DAG.getTargetConstant(NumBits - 1, dl, ElemTy));
+ SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB);
+ return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ);
+ }
+
+ // Compute with: cttz(x) = ctpop(lsb - 1)
+
+ // Since we can only compute the number of bits in a byte with vcnt.8, we
+ // have to gather the result with pairwise addition (vpaddl) for i16, i32,
+ // and i64.
+
+ // Compute LSB - 1.
+ SDValue Bits;
+ if (ElemTy == MVT::i64) {
+ // Load constant 0xffff'ffff'ffff'ffff to register.
+ SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
+ DAG.getTargetConstant(0x1eff, dl, MVT::i32));
+ Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF);
+ } else {
+ SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
+ DAG.getTargetConstant(1, dl, ElemTy));
+ Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
+ }
+
+ // Count #bits with vcnt.8.
+ EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
+ SDValue BitsVT8 = DAG.getNode(ISD::BITCAST, dl, VT8Bit, Bits);
+ SDValue Cnt8 = DAG.getNode(ISD::CTPOP, dl, VT8Bit, BitsVT8);
+
+ // Gather the #bits with vpaddl (pairwise add.)
+ EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
+ SDValue Cnt16 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT16Bit,
+ DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
+ Cnt8);
+ if (ElemTy == MVT::i16)
+ return Cnt16;
+
+ EVT VT32Bit = VT.is64BitVector() ? MVT::v2i32 : MVT::v4i32;
+ SDValue Cnt32 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT32Bit,
+ DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
+ Cnt16);
+ if (ElemTy == MVT::i32)
+ return Cnt32;
+
+ assert(ElemTy == MVT::i64);
+ SDValue Cnt64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
+ Cnt32);
+ return Cnt64;
+ }
if (!ST->hasV6T2Ops())
return SDValue();
@@ -4730,7 +4824,7 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
ImmMask <<= 1;
}
- if (DAG.getTargetLoweringInfo().isBigEndian())
+ if (DAG.getDataLayout().isBigEndian())
// swap higher and lower 32 bit word
Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4);
@@ -5868,7 +5962,7 @@ static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
if (BVN->getValueType(0) != MVT::v4i32 ||
BVN->getOpcode() != ISD::BUILD_VECTOR)
return false;
- unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
+ unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
unsigned HiElt = 1 - LoElt;
ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
@@ -6013,7 +6107,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
SDNode *BVN = N->getOperand(0).getNode();
assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&
BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
- unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
+ unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), MVT::v2i32,
BVN->getOperand(LowElt), BVN->getOperand(LowElt+2));
}
@@ -6342,18 +6436,19 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
SDValue Arg = Op.getOperand(0);
EVT ArgVT = Arg.getValueType();
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// Pair of floats / doubles used to pass the result.
StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
// Create stack object for sret.
- const uint64_t ByteSize = TLI.getDataLayout()->getTypeAllocSize(RetTy);
- const unsigned StackAlign = TLI.getDataLayout()->getPrefTypeAlignment(RetTy);
+ auto &DL = DAG.getDataLayout();
+ const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
+ const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy);
int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
- SDValue SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy());
+ SDValue SRet = DAG.getFrameIndex(FrameIdx, getPointerTy(DL));
ArgListTy Args;
ArgListEntry Entry;
@@ -6373,7 +6468,7 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
const char *LibcallName = (ArgVT == MVT::f64)
? "__sincos_stret" : "__sincosf_stret";
- SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
+ SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
@@ -6387,7 +6482,7 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
MachinePointerInfo(), false, false, false, 0);
// Address of cos field.
- SDValue Add = DAG.getNode(ISD::ADD, dl, getPointerTy(), SRet,
+ SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet,
DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));
SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add,
MachinePointerInfo(), false, false, false, 0);
@@ -6487,7 +6582,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
case ISD::SRL_PARTS:
case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
- case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
case ISD::SETCC: return LowerVSETCC(Op, DAG);
case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
@@ -6845,9 +6941,9 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
// MachineConstantPool wants an explicit alignment.
- unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty);
+ unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);
if (Align == 0)
- Align = getDataLayout()->getTypeAllocSize(C->getType());
+ Align = MF->getDataLayout().getTypeAllocSize(C->getType());
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
unsigned VReg1 = MRI->createVirtualRegister(TRC);
@@ -6935,9 +7031,9 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
// MachineConstantPool wants an explicit alignment.
- unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty);
+ unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);
if (Align == 0)
- Align = getDataLayout()->getTypeAllocSize(C->getType());
+ Align = MF->getDataLayout().getTypeAllocSize(C->getType());
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
unsigned VReg1 = MRI->createVirtualRegister(TRC);
@@ -7313,9 +7409,9 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
// MachineConstantPool wants an explicit alignment.
- unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty);
+ unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);
if (Align == 0)
- Align = getDataLayout()->getTypeAllocSize(C->getType());
+ Align = MF->getDataLayout().getTypeAllocSize(C->getType());
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
if (IsThumb1)
@@ -8001,7 +8097,7 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
// Build operand list.
SmallVector<SDValue, 8> Ops;
Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, dl,
- TLI.getPointerTy()));
+ TLI.getPointerTy(DAG.getDataLayout())));
// Input is the vector.
Ops.push_back(Vec);
@@ -8681,7 +8777,7 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
std::min(4U, LD->getAlignment() / 2));
DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
- if (DCI.DAG.getTargetLoweringInfo().isBigEndian())
+ if (DCI.DAG.getDataLayout().isBigEndian())
std::swap (NewLD1, NewLD2);
SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
return Result;
@@ -9312,7 +9408,9 @@ static SDValue PerformSTORECombine(SDNode *N,
SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i < NumElems; ++i)
- ShuffleVec[i] = TLI.isBigEndian() ? (i+1) * SizeRatio - 1 : i * SizeRatio;
+ ShuffleVec[i] = DAG.getDataLayout().isBigEndian()
+ ? (i + 1) * SizeRatio - 1
+ : i * SizeRatio;
// Can't shuffle using an illegal type.
if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
@@ -9339,8 +9437,8 @@ static SDValue PerformSTORECombine(SDNode *N,
assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
SmallVector<SDValue, 8> Chains;
- SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8, DL,
- TLI.getPointerTy());
+ SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL,
+ TLI.getPointerTy(DAG.getDataLayout()));
SDValue BasePtr = St->getBasePtr();
// Perform one or more big stores into memory.
@@ -9367,7 +9465,7 @@ static SDValue PerformSTORECombine(SDNode *N,
if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
StVal.getNode()->hasOneUse()) {
SelectionDAG &DAG = DCI.DAG;
- bool isBigEndian = DAG.getTargetLoweringInfo().isBigEndian();
+ bool isBigEndian = DAG.getDataLayout().isBigEndian();
SDLoc DL(St);
SDValue BasePtr = St->getBasePtr();
SDValue NewST1 = DAG.getStore(St->getChain(), DL,
@@ -10078,7 +10176,7 @@ bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
// For any little-endian targets with neon, we can support unaligned ld/st
// of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
// A big-endian target may also explicitly support unaligned accesses
- if (Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian())) {
+ if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) {
if (Fast)
*Fast = true;
return true;
@@ -10317,10 +10415,10 @@ bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
-bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty,
+bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL,
+ const AddrMode &AM, Type *Ty,
unsigned AS) const {
- EVT VT = getValueType(Ty, true);
+ EVT VT = getValueType(DL, Ty, true);
if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
return false;
@@ -10664,7 +10762,7 @@ bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const {
/// getConstraintType - Given a constraint letter, return the type of
/// constraint it is for this target.
ARMTargetLowering::ConstraintType
-ARMTargetLowering::getConstraintType(const std::string &Constraint) const {
+ARMTargetLowering::getConstraintType(StringRef Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
default: break;
@@ -10723,10 +10821,8 @@ ARMTargetLowering::getSingleConstraintMatchWeight(
}
typedef std::pair<unsigned, const TargetRegisterClass*> RCPair;
-RCPair
-ARMTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
- const std::string &Constraint,
- MVT VT) const {
+RCPair ARMTargetLowering::getRegForInlineAsmConstraint(
+ const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
if (Constraint.size() == 1) {
// GCC ARM Constraint Letters
switch (Constraint[0]) {
@@ -10974,7 +11070,7 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
}
SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
- getPointerTy());
+ getPointerTy(DAG.getDataLayout()));
Type *RetTy = (Type*)StructType::get(Ty, Ty, nullptr);
@@ -11083,7 +11179,8 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::arm_neon_vld4lane: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
// Conservatively set memVT to the entire set of vectors loaded.
- uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8;
+ auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
+ uint64_t NumElts = DL.getTypeAllocSize(I.getType()) / 8;
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
@@ -11103,12 +11200,13 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::arm_neon_vst4lane: {
Info.opc = ISD::INTRINSIC_VOID;
// Conservatively set memVT to the entire set of vectors stored.
+ auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
unsigned NumElts = 0;
for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
Type *ArgTy = I.getArgOperand(ArgI)->getType();
if (!ArgTy->isVectorTy())
break;
- NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8;
+ NumElts += DL.getTypeAllocSize(ArgTy) / 8;
}
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(0);
@@ -11122,12 +11220,13 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
}
case Intrinsic::arm_ldaex:
case Intrinsic::arm_ldrex: {
+ auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(PtrTy->getElementType());
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
- Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType());
+ Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
Info.vol = true;
Info.readMem = true;
Info.writeMem = false;
@@ -11135,12 +11234,13 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
}
case Intrinsic::arm_stlex:
case Intrinsic::arm_strex: {
+ auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(PtrTy->getElementType());
Info.ptrVal = I.getArgOperand(1);
Info.offset = 0;
- Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType());
+ Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
Info.vol = true;
Info.readMem = false;
Info.writeMem = true;
@@ -11427,9 +11527,9 @@ bool ARMTargetLowering::lowerInterleavedLoad(
VectorType *VecTy = Shuffles[0]->getType();
Type *EltTy = VecTy->getVectorElementType();
- const DataLayout *DL = getDataLayout();
- unsigned VecSize = DL->getTypeAllocSizeInBits(VecTy);
- bool EltIs64Bits = DL->getTypeAllocSizeInBits(EltTy) == 64;
+ const DataLayout &DL = LI->getModule()->getDataLayout();
+ unsigned VecSize = DL.getTypeAllocSizeInBits(VecTy);
+ bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64;
// Skip illegal vector types and vector types of i64/f64 element (vldN doesn't
// support i64/f64 element).
@@ -11439,8 +11539,8 @@ bool ARMTargetLowering::lowerInterleavedLoad(
// A pointer vector can not be the return type of the ldN intrinsics. Need to
// load integer vectors first and then convert to pointer vectors.
if (EltTy->isPointerTy())
- VecTy = VectorType::get(DL->getIntPtrType(EltTy),
- VecTy->getVectorNumElements());
+ VecTy =
+ VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements());
static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
Intrinsic::arm_neon_vld3,
@@ -11517,9 +11617,9 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
Type *EltTy = VecTy->getVectorElementType();
VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts);
- const DataLayout *DL = getDataLayout();
- unsigned SubVecSize = DL->getTypeAllocSizeInBits(SubVecTy);
- bool EltIs64Bits = DL->getTypeAllocSizeInBits(EltTy) == 64;
+ const DataLayout &DL = SI->getModule()->getDataLayout();
+ unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy);
+ bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64;
// Skip illegal sub vector types and vector types of i64/f64 element (vstN
// doesn't support i64/f64 element).
@@ -11533,7 +11633,7 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
// StN intrinsics don't support pointer vectors as arguments. Convert pointer
// vectors to integer vectors.
if (EltTy->isPointerTy()) {
- Type *IntTy = DL->getIntPtrType(EltTy);
+ Type *IntTy = DL.getIntPtrType(EltTy);
// Convert to the corresponding integer vector.
Type *IntVecTy =
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 74396392f8e3..efc9020c193a 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -249,7 +249,8 @@ namespace llvm {
}
/// getSetCCResultType - Return the value type to use for ISD::SETCC.
- EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
+ EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
+ EVT VT) const override;
MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr *MI,
@@ -286,8 +287,8 @@ namespace llvm {
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
- bool isLegalAddressingMode(const AddrMode &AM, Type *Ty,
- unsigned AS) const override;
+ bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
+ Type *Ty, unsigned AS) const override;
bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
/// isLegalICmpImmediate - Return true if the specified immediate is legal
@@ -324,8 +325,7 @@ namespace llvm {
bool ExpandInlineAsm(CallInst *CI) const override;
- ConstraintType
- getConstraintType(const std::string &Constraint) const override;
+ ConstraintType getConstraintType(StringRef Constraint) const override;
/// Examine constraint string and operand type and determine a weight value.
/// The operand object must already have been set up with the operand type.
@@ -334,8 +334,7 @@ namespace llvm {
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
- const std::string &Constraint,
- MVT VT) const override;
+ StringRef Constraint, MVT VT) const override;
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops. If hasMemory is
@@ -345,8 +344,8 @@ namespace llvm {
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
- unsigned getInlineAsmMemConstraint(
- const std::string &ConstraintCode) const override {
+ unsigned
+ getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
if (ConstraintCode == "Q")
return InlineAsm::Constraint_Q;
else if (ConstraintCode.size() == 2) {
@@ -533,7 +532,8 @@ namespace llvm {
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
- unsigned getRegisterByName(const char* RegName, EVT VT) const override;
+ unsigned getRegisterByName(const char* RegName, EVT VT,
+ SelectionDAG &DAG) const override;
/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index b8cac135baf6..61c45af26fe1 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -306,8 +306,8 @@ def HasSlowVDUP32 : Predicate<"Subtarget->isSwift()">;
def UseVMOVSR : Predicate<"Subtarget->isCortexA9() || !Subtarget->useNEONForSinglePrecisionFP()">;
def DontUseVMOVSR : Predicate<"!Subtarget->isCortexA9() && Subtarget->useNEONForSinglePrecisionFP()">;
-def IsLE : Predicate<"getTargetLowering()->isLittleEndian()">;
-def IsBE : Predicate<"getTargetLowering()->isBigEndian()">;
+def IsLE : Predicate<"MF->getDataLayout().isLittleEndian()">;
+def IsBE : Predicate<"MF->getDataLayout().isBigEndian()">;
//===----------------------------------------------------------------------===//
// ARM Flag Definitions.
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 245c9e869bf6..37352810c99f 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -31,11 +31,13 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
+#include "llvm/Support/Allocator.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -65,12 +67,18 @@ namespace {
static char ID;
ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
+ const MachineFunction *MF;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
+ const MachineRegisterInfo *MRI;
const ARMSubtarget *STI;
const TargetLowering *TL;
ARMFunctionInfo *AFI;
- RegScavenger *RS;
+ LivePhysRegs LiveRegs;
+ RegisterClassInfo RegClassInfo;
+ MachineBasicBlock::const_iterator LiveRegPos;
+ bool LiveRegsValid;
+ bool RegClassInfoValid;
bool isThumb1, isThumb2;
bool runOnMachineFunction(MachineFunction &Fn) override;
@@ -80,64 +88,60 @@ namespace {
}
private:
+ /// A set of load/store MachineInstrs with same base register sorted by
+ /// offset.
struct MemOpQueueEntry {
- int Offset;
- unsigned Reg;
- bool isKill;
- unsigned Position;
- MachineBasicBlock::iterator MBBI;
- bool Merged;
- MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
- MachineBasicBlock::iterator i)
- : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
+ MachineInstr *MI;
+ int Offset; ///< Load/Store offset.
+ unsigned Position; ///< Position as counted from end of basic block.
+ MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position)
+ : MI(MI), Offset(Offset), Position(Position) {}
};
typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
- typedef MemOpQueue::iterator MemOpQueueIter;
- void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
- const MemOpQueue &MemOps, unsigned DefReg,
- unsigned RangeBegin, unsigned RangeEnd);
+ /// A set of MachineInstrs that fulfill (nearly all) conditions to get
+ /// merged into a LDM/STM.
+ struct MergeCandidate {
+ /// List of instructions ordered by load/store offset.
+ SmallVector<MachineInstr*, 4> Instrs;
+ /// Index in Instrs of the instruction being latest in the schedule.
+ unsigned LatestMIIdx;
+ /// Index in Instrs of the instruction being earliest in the schedule.
+ unsigned EarliestMIIdx;
+ /// Index into the basic block where the merged instruction will be
+ /// inserted. (See MemOpQueueEntry.Position)
+ unsigned InsertPos;
+ /// Whether the instructions can be merged into a ldm/stm instruction.
+ bool CanMergeToLSMulti;
+ /// Whether the instructions can be merged into a ldrd/strd instruction.
+ bool CanMergeToLSDouble;
+ };
+ SpecificBumpPtrAllocator<MergeCandidate> Allocator;
+ SmallVector<const MergeCandidate*,4> Candidates;
+ SmallVector<MachineInstr*,4> MergeBaseCandidates;
+
+ void moveLiveRegsBefore(const MachineBasicBlock &MBB,
+ MachineBasicBlock::const_iterator Before);
+ unsigned findFreeReg(const TargetRegisterClass &RegClass);
void UpdateBaseRegUses(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- DebugLoc dl, unsigned Base, unsigned WordOffset,
+ DebugLoc DL, unsigned Base, unsigned WordOffset,
ARMCC::CondCodes Pred, unsigned PredReg);
- bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
- ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
- DebugLoc dl,
- ArrayRef<std::pair<unsigned, bool> > Regs,
- ArrayRef<unsigned> ImpDefs);
- void MergeOpsUpdate(MachineBasicBlock &MBB,
- MemOpQueue &MemOps,
- unsigned memOpsBegin,
- unsigned memOpsEnd,
- unsigned insertAfter,
- int Offset,
- unsigned Base,
- bool BaseKill,
- unsigned Opcode,
- ARMCC::CondCodes Pred,
- unsigned PredReg,
- unsigned Scratch,
- DebugLoc dl,
- SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
- void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
- unsigned Opcode, unsigned Size,
- ARMCC::CondCodes Pred, unsigned PredReg,
- unsigned Scratch, MemOpQueue &MemOps,
- SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
- void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
+ MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
+ bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
+ DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs);
+ MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
+ bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
+ DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const;
+ void FormCandidates(const MemOpQueue &MemOps);
+ MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI);
- bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- const TargetInstrInfo *TII,
- bool &Advance,
- MachineBasicBlock::iterator &I);
- bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- bool &Advance,
- MachineBasicBlock::iterator &I);
+ bool MergeBaseUpdateLoadStore(MachineInstr *MI);
+ bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
+ bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
};
@@ -185,6 +189,14 @@ static int getMemoryOpOffset(const MachineInstr *MI) {
return Offset;
}
+static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) {
+ return MI.getOperand(1);
+}
+
+static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) {
+ return MI.getOperand(0);
+}
+
static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
switch (Opcode) {
default: llvm_unreachable("Unhandled opcode!");
@@ -348,6 +360,10 @@ static bool isi32Store(unsigned Opc) {
return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
}
+static bool isLoadSingle(unsigned Opc) {
+ return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
+}
+
static unsigned getImmScale(unsigned Opc) {
switch (Opc) {
default: llvm_unreachable("Unhandled opcode!");
@@ -365,12 +381,55 @@ static unsigned getImmScale(unsigned Opc) {
}
}
+static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default: return 0;
+ case ARM::LDRi12:
+ case ARM::STRi12:
+ case ARM::tLDRi:
+ case ARM::tSTRi:
+ case ARM::tLDRspi:
+ case ARM::tSTRspi:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRi12:
+ case ARM::t2STRi8:
+ case ARM::t2STRi12:
+ case ARM::VLDRS:
+ case ARM::VSTRS:
+ return 4;
+ case ARM::VLDRD:
+ case ARM::VSTRD:
+ return 8;
+ case ARM::LDMIA:
+ case ARM::LDMDA:
+ case ARM::LDMDB:
+ case ARM::LDMIB:
+ case ARM::STMIA:
+ case ARM::STMDA:
+ case ARM::STMDB:
+ case ARM::STMIB:
+ case ARM::tLDMIA:
+ case ARM::tLDMIA_UPD:
+ case ARM::tSTMIA_UPD:
+ case ARM::t2LDMIA:
+ case ARM::t2LDMDB:
+ case ARM::t2STMIA:
+ case ARM::t2STMDB:
+ case ARM::VLDMSIA:
+ case ARM::VSTMSIA:
+ return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
+ case ARM::VLDMDIA:
+ case ARM::VSTMDIA:
+ return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
+ }
+}
+
/// Update future uses of the base register with the offset introduced
/// due to writeback. This function only works on Thumb1.
void
ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- DebugLoc dl, unsigned Base,
+ DebugLoc DL, unsigned Base,
unsigned WordOffset,
ARMCC::CondCodes Pred, unsigned PredReg) {
assert(isThumb1 && "Can only update base register uses for Thumb1!");
@@ -398,7 +457,7 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
Offset = MO.getImm() - WordOffset * getImmScale(Opc);
// If storing the base register, it needs to be reset first.
- unsigned InstrSrcReg = MBBI->getOperand(0).getReg();
+ unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
MO.setImm(Offset);
@@ -439,7 +498,7 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
if (InsertSub) {
// An instruction above couldn't be updated, so insert a sub.
- AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
+ AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
.addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
return;
}
@@ -457,31 +516,65 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
// See PR21029.
if (MBBI != MBB.end()) --MBBI;
AddDefaultT1CC(
- BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
.addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
}
}
+/// Return the first register of class \p RegClass that is not in \p Regs.
+unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
+ if (!RegClassInfoValid) {
+ RegClassInfo.runOnMachineFunction(*MF);
+ RegClassInfoValid = true;
+ }
+
+ for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
+ if (!LiveRegs.contains(Reg))
+ return Reg;
+ return 0;
+}
+
+/// Compute live registers just before instruction \p Before (in normal schedule
+/// direction). Computes backwards so multiple queries in the same block must
+/// come in reverse order.
+void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
+ MachineBasicBlock::const_iterator Before) {
+ // Initialize if we never queried in this block.
+ if (!LiveRegsValid) {
+ LiveRegs.init(TRI);
+ LiveRegs.addLiveOuts(&MBB, true);
+ LiveRegPos = MBB.end();
+ LiveRegsValid = true;
+ }
+ // Move backward just before the "Before" position.
+ while (LiveRegPos != Before) {
+ --LiveRegPos;
+ LiveRegs.stepBackward(*LiveRegPos);
+ }
+}
+
+static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
+ unsigned Reg) {
+ for (const std::pair<unsigned, bool> &R : Regs)
+ if (R.first == Reg)
+ return true;
+ return false;
+}
+
/// Create and insert a LDM or STM with Base as base register and registers in
/// Regs as the register operands that would be loaded / stored. It returns
/// true if the transformation is done.
-bool
-ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- int Offset, unsigned Base, bool BaseKill,
- unsigned Opcode, ARMCC::CondCodes Pred,
- unsigned PredReg, unsigned Scratch, DebugLoc dl,
- ArrayRef<std::pair<unsigned, bool> > Regs,
- ArrayRef<unsigned> ImpDefs) {
- // Only a single register to load / store. Don't bother.
+MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
+ bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
+ DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) {
unsigned NumRegs = Regs.size();
- if (NumRegs <= 1)
- return false;
+ assert(NumRegs > 1);
// For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
// Compute liveness information for that register to make the decision.
bool SafeToClobberCPSR = !isThumb1 ||
- (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) ==
+ (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
MachineBasicBlock::LQR_Dead);
bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
@@ -489,17 +582,14 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
// Exception: If the base register is in the input reglist, Thumb1 LDM is
// non-writeback.
// It's also not possible to merge an STR of the base register in Thumb1.
- if (isThumb1)
- for (const std::pair<unsigned, bool> &R : Regs)
- if (Base == R.first) {
- assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
- if (Opcode == ARM::tLDRi) {
- Writeback = false;
- break;
- } else if (Opcode == ARM::tSTRi) {
- return false;
- }
- }
+ if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) {
+ assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
+ if (Opcode == ARM::tLDRi) {
+ Writeback = false;
+ } else if (Opcode == ARM::tSTRi) {
+ return nullptr;
+ }
+ }
ARM_AM::AMSubMode Mode = ARM_AM::ia;
// VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
@@ -516,18 +606,18 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
} else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
// Check if this is a supported opcode before inserting instructions to
// calculate a new base register.
- if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
+ if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
// If starting offset isn't zero, insert a MI to materialize a new base.
// But only do so if it is cost effective, i.e. merging more than two
// loads / stores.
if (NumRegs <= 2)
- return false;
+ return nullptr;
// On Thumb1, it's not worth materializing a new base register without
// clobbering the CPSR (i.e. not using ADDS/SUBS).
if (!SafeToClobberCPSR)
- return false;
+ return nullptr;
unsigned NewBase;
if (isi32Load(Opcode)) {
@@ -535,10 +625,17 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
// use as the new base.
NewBase = Regs[NumRegs-1].first;
} else {
- // Use the scratch register to use as a new base.
- NewBase = Scratch;
+ // Find a free register that we can use as scratch register.
+ moveLiveRegsBefore(MBB, InsertBefore);
+ // The merged instruction does not exist yet but will use several Regs if
+ // it is a Store.
+ if (!isLoadSingle(Opcode))
+ for (const std::pair<unsigned, bool> &R : Regs)
+ LiveRegs.addReg(R.first);
+
+ NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
if (NewBase == 0)
- return false;
+ return nullptr;
}
int BaseOpc =
@@ -557,7 +654,12 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
if (!TL->isLegalAddImmediate(Offset))
// FIXME: Try add with register operand?
- return false; // Probably not worth it then.
+ return nullptr; // Probably not worth it then.
+
+ // We can only append a kill flag to the add/sub input if the value is not
+ // used in the register list of the stm as well.
+ bool KillOldBase = BaseKill &&
+ (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
if (isThumb1) {
// Thumb1: depending on immediate size, use either
@@ -572,43 +674,44 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
!STI->hasV6Ops()) {
// thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
if (Pred != ARMCC::AL)
- return false;
- BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase)
- .addReg(Base, getKillRegState(BaseKill));
+ return nullptr;
+ BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
+ .addReg(Base, getKillRegState(KillOldBase));
} else
- BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
- .addReg(Base, getKillRegState(BaseKill))
+ BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
+ .addReg(Base, getKillRegState(KillOldBase))
.addImm(Pred).addReg(PredReg);
- // Set up BaseKill and Base correctly to insert the ADDS/SUBS below.
+ // The following ADDS/SUBS becomes an update.
Base = NewBase;
- BaseKill = false;
+ KillOldBase = true;
}
if (BaseOpc == ARM::tADDrSPi) {
assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
- BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
- .addReg(Base, getKillRegState(BaseKill)).addImm(Offset/4)
+ BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
+ .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4)
.addImm(Pred).addReg(PredReg);
} else
- AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true)
- .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
+ AddDefaultT1CC(
+ BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true)
+ .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
.addImm(Pred).addReg(PredReg);
} else {
- BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
- .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
+ BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
+ .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
.addImm(Pred).addReg(PredReg).addReg(0);
}
Base = NewBase;
BaseKill = true; // New base is always killed straight away.
}
- bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
- Opcode == ARM::VLDRD);
+ bool isDef = isLoadSingle(Opcode);
// Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
// base register writeback.
Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
- if (!Opcode) return false;
+ if (!Opcode)
+ return nullptr;
// Check if a Thumb1 LDM/STM merge is safe. This is the case if:
// - There is no writeback (LDM of base register),
@@ -619,7 +722,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
// It's safe to return here since the code to materialize a new base register
// above is also conditional on SafeToClobberCPSR.
if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
- return false;
+ return nullptr;
MachineInstrBuilder MIB;
@@ -628,7 +731,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
// Update tLDMIA with writeback if necessary.
Opcode = ARM::tLDMIA_UPD;
- MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
+ MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
// Thumb1: we might need to set base writeback when building the MI.
MIB.addReg(Base, getDefRegState(true))
@@ -637,381 +740,257 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
// The base isn't dead after a merged instruction with writeback.
// Insert a sub instruction after the newly formed instruction to reset.
if (!BaseKill)
- UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
+ UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
} else {
// No writeback, simply build the MachineInstr.
- MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
+ MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
MIB.addReg(Base, getKillRegState(BaseKill));
}
MIB.addImm(Pred).addReg(PredReg);
for (const std::pair<unsigned, bool> &R : Regs)
- MIB = MIB.addReg(R.first, getDefRegState(isDef)
- | getKillRegState(R.second));
+ MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
- // Add implicit defs for super-registers.
- for (unsigned ImpDef : ImpDefs)
- MIB.addReg(ImpDef, RegState::ImplicitDefine);
-
- return true;
+ return MIB.getInstr();
}
-/// Find all instructions using a given imp-def within a range.
-///
-/// We are trying to combine a range of instructions, one of which (located at
-/// position RangeBegin) implicitly defines a register. The final LDM/STM will
-/// be placed at RangeEnd, and so any uses of this definition between RangeStart
-/// and RangeEnd must be modified to use an undefined value.
-///
-/// The live range continues until we find a second definition or one of the
-/// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
-/// we must consider all uses and decide which are relevant in a second pass.
-void ARMLoadStoreOpt::findUsesOfImpDef(
- SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
- unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
- std::map<unsigned, MachineOperand *> Uses;
- unsigned LastLivePos = RangeEnd;
-
- // First we find all uses of this register with Position between RangeBegin
- // and RangeEnd, any or all of these could be uses of a definition at
- // RangeBegin. We also record the latest position a definition at RangeBegin
- // would be considered live.
- for (unsigned i = 0; i < MemOps.size(); ++i) {
- MachineInstr &MI = *MemOps[i].MBBI;
- unsigned MIPosition = MemOps[i].Position;
- if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
- continue;
-
- // If this instruction defines the register, then any later use will be of
- // that definition rather than ours.
- if (MI.definesRegister(DefReg))
- LastLivePos = std::min(LastLivePos, MIPosition);
-
- MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
- if (!UseOp)
- continue;
-
- // If this instruction kills the register then (assuming liveness is
- // correct when we start) we don't need to think about anything after here.
- if (UseOp->isKill())
- LastLivePos = std::min(LastLivePos, MIPosition);
-
- Uses[MIPosition] = UseOp;
- }
-
- // Now we traverse the list of all uses, and append the ones that actually use
- // our definition to the requested list.
- for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
- E = Uses.end();
- I != E; ++I) {
- // List is sorted by position so once we've found one out of range there
- // will be no more to consider.
- if (I->first > LastLivePos)
- break;
- UsesOfImpDefs.push_back(I->second);
+MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
+ bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
+ DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const {
+ bool IsLoad = isi32Load(Opcode);
+ assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
+ unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
+
+ assert(Regs.size() == 2);
+ MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
+ TII->get(LoadStoreOpcode));
+ if (IsLoad) {
+ MIB.addReg(Regs[0].first, RegState::Define)
+ .addReg(Regs[1].first, RegState::Define);
+ } else {
+ MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
+ .addReg(Regs[1].first, getKillRegState(Regs[1].second));
}
+ MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+ return MIB.getInstr();
}
/// Call MergeOps and update MemOps and merges accordingly on success.
-void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
- MemOpQueue &memOps,
- unsigned memOpsBegin, unsigned memOpsEnd,
- unsigned insertAfter, int Offset,
- unsigned Base, bool BaseKill,
- unsigned Opcode,
- ARMCC::CondCodes Pred, unsigned PredReg,
- unsigned Scratch,
- DebugLoc dl,
- SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
- // First calculate which of the registers should be killed by the merged
- // instruction.
- const unsigned insertPos = memOps[insertAfter].Position;
- SmallSet<unsigned, 4> KilledRegs;
- DenseMap<unsigned, unsigned> Killer;
- for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
- if (i == memOpsBegin) {
- i = memOpsEnd;
- if (i == e)
- break;
- }
- if (memOps[i].Position < insertPos && memOps[i].isKill) {
- unsigned Reg = memOps[i].Reg;
+MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
+ const MachineInstr *First = Cand.Instrs.front();
+ unsigned Opcode = First->getOpcode();
+ bool IsLoad = isLoadSingle(Opcode);
+ SmallVector<std::pair<unsigned, bool>, 8> Regs;
+ SmallVector<unsigned, 4> ImpDefs;
+ DenseSet<unsigned> KilledRegs;
+ // Determine list of registers and list of implicit super-register defs.
+ for (const MachineInstr *MI : Cand.Instrs) {
+ const MachineOperand &MO = getLoadStoreRegOp(*MI);
+ unsigned Reg = MO.getReg();
+ bool IsKill = MO.isKill();
+ if (IsKill)
KilledRegs.insert(Reg);
- Killer[Reg] = i;
+ Regs.push_back(std::make_pair(Reg, IsKill));
+
+ if (IsLoad) {
+ // Collect any implicit defs of super-registers, after merging we can't
+ // be sure anymore that we properly preserved these live ranges and must
+ // removed these implicit operands.
+ for (const MachineOperand &MO : MI->implicit_operands()) {
+ if (!MO.isReg() || !MO.isDef() || MO.isDead())
+ continue;
+ assert(MO.isImplicit());
+ unsigned DefReg = MO.getReg();
+
+ if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end())
+ continue;
+ // We can ignore cases where the super-reg is read and written.
+ if (MI->readsRegister(DefReg))
+ continue;
+ ImpDefs.push_back(DefReg);
+ }
}
}
- for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
- MachineOperand &TransferOp = memOps[i].MBBI->getOperand(0);
- if (TransferOp.isUse() && TransferOp.getReg() == Base)
- BaseKill = false;
+ // Attempt the merge.
+ typedef MachineBasicBlock::iterator iterator;
+ MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
+ iterator InsertBefore = std::next(iterator(LatestMI));
+ MachineBasicBlock &MBB = *LatestMI->getParent();
+ unsigned Offset = getMemoryOpOffset(First);
+ unsigned Base = getLoadStoreBaseOp(*First).getReg();
+ bool BaseKill = LatestMI->killsRegister(Base);
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg);
+ DebugLoc DL = First->getDebugLoc();
+ MachineInstr *Merged = nullptr;
+ if (Cand.CanMergeToLSDouble)
+ Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
+ Opcode, Pred, PredReg, DL, Regs);
+ if (!Merged && Cand.CanMergeToLSMulti)
+ Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
+ Opcode, Pred, PredReg, DL, Regs);
+ if (!Merged)
+ return nullptr;
+
+ // Determine earliest instruction that will get removed. We then keep an
+ // iterator just above it so the following erases don't invalidated it.
+ iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
+ bool EarliestAtBegin = false;
+ if (EarliestI == MBB.begin()) {
+ EarliestAtBegin = true;
+ } else {
+ EarliestI = std::prev(EarliestI);
}
- SmallVector<std::pair<unsigned, bool>, 8> Regs;
- SmallVector<unsigned, 8> ImpDefs;
- SmallVector<MachineOperand *, 8> UsesOfImpDefs;
- for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
- unsigned Reg = memOps[i].Reg;
- // If we are inserting the merged operation after an operation that
- // uses the same register, make sure to transfer any kill flag.
- bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
- Regs.push_back(std::make_pair(Reg, isKill));
-
- // Collect any implicit defs of super-registers. They must be preserved.
- for (const MachineOperand &MO : memOps[i].MBBI->operands()) {
- if (!MO.isReg() || !MO.isDef() || !MO.isImplicit() || MO.isDead())
- continue;
- unsigned DefReg = MO.getReg();
- if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
- ImpDefs.push_back(DefReg);
-
- // There may be other uses of the definition between this instruction and
- // the eventual LDM/STM position. These should be marked undef if the
- // merge takes place.
- findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
- insertPos);
+ // Remove instructions which have been merged.
+ for (MachineInstr *MI : Cand.Instrs)
+ MBB.erase(MI);
+
+ // Determine range between the earliest removed instruction and the new one.
+ if (EarliestAtBegin)
+ EarliestI = MBB.begin();
+ else
+ EarliestI = std::next(EarliestI);
+ auto FixupRange = make_range(EarliestI, iterator(Merged));
+
+ if (isLoadSingle(Opcode)) {
+ // If the previous loads defined a super-reg, then we have to mark earlier
+ // operands undef; Replicate the super-reg def on the merged instruction.
+ for (MachineInstr &MI : FixupRange) {
+ for (unsigned &ImpDefReg : ImpDefs) {
+ for (MachineOperand &MO : MI.implicit_operands()) {
+ if (!MO.isReg() || MO.getReg() != ImpDefReg)
+ continue;
+ if (MO.readsReg())
+ MO.setIsUndef();
+ else if (MO.isDef())
+ ImpDefReg = 0;
+ }
+ }
}
- }
- // Try to do the merge.
- MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
- ++Loc;
- if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
- Pred, PredReg, Scratch, dl, Regs, ImpDefs))
- return;
-
- // Merge succeeded, update records.
- Merges.push_back(std::prev(Loc));
-
- // In gathering loads together, we may have moved the imp-def of a register
- // past one of its uses. This is OK, since we know better than the rest of
- // LLVM what's OK with ARM loads and stores; but we still have to adjust the
- // affected uses.
- for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
- E = UsesOfImpDefs.end();
- I != E; ++I)
- (*I)->setIsUndef();
-
- for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
- // Remove kill flags from any memops that come before insertPos.
- if (Regs[i-memOpsBegin].second) {
- unsigned Reg = Regs[i-memOpsBegin].first;
- if (KilledRegs.count(Reg)) {
- unsigned j = Killer[Reg];
- int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
- assert(Idx >= 0 && "Cannot find killing operand");
- memOps[j].MBBI->getOperand(Idx).setIsKill(false);
- memOps[j].isKill = false;
+ MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
+ for (unsigned ImpDef : ImpDefs)
+ MIB.addReg(ImpDef, RegState::ImplicitDefine);
+ } else {
+ // Remove kill flags: We are possibly storing the values later now.
+ assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
+ for (MachineInstr &MI : FixupRange) {
+ for (MachineOperand &MO : MI.uses()) {
+ if (!MO.isReg() || !MO.isKill())
+ continue;
+ if (KilledRegs.count(MO.getReg()))
+ MO.setIsKill(false);
}
- memOps[i].isKill = true;
}
- MBB.erase(memOps[i].MBBI);
- // Update this memop to refer to the merged instruction.
- // We may need to move kill flags again.
- memOps[i].Merged = true;
- memOps[i].MBBI = Merges.back();
- memOps[i].Position = insertPos;
+ assert(ImpDefs.empty());
}
- // Update memOps offsets, since they may have been modified by MergeOps.
- for (auto &MemOp : memOps) {
- MemOp.Offset = getMemoryOpOffset(MemOp.MBBI);
- }
+ return Merged;
}
-/// Merge a number of load / store instructions into one or more load / store
-/// multiple instructions.
-void
-ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
- unsigned Base, unsigned Opcode, unsigned Size,
- ARMCC::CondCodes Pred, unsigned PredReg,
- unsigned Scratch, MemOpQueue &MemOps,
- SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
- bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
- int Offset = MemOps[SIndex].Offset;
- int SOffset = Offset;
- unsigned insertAfter = SIndex;
- MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
- DebugLoc dl = Loc->getDebugLoc();
- const MachineOperand &PMO = Loc->getOperand(0);
- unsigned PReg = PMO.getReg();
- unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
- unsigned Count = 1;
- unsigned Limit = ~0U;
- bool BaseKill = false;
- // vldm / vstm limit are 32 for S variants, 16 for D variants.
+static bool isValidLSDoubleOffset(int Offset) {
+ unsigned Value = abs(Offset);
+ // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
+ // multiplied by 4.
+ return (Value % 4) == 0 && Value < 1024;
+}
- switch (Opcode) {
- default: break;
- case ARM::VSTRS:
- Limit = 32;
- break;
- case ARM::VSTRD:
- Limit = 16;
- break;
- case ARM::VLDRD:
- Limit = 16;
- break;
- case ARM::VLDRS:
- Limit = 32;
- break;
- }
+/// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
+void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
+ const MachineInstr *FirstMI = MemOps[0].MI;
+ unsigned Opcode = FirstMI->getOpcode();
+ bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
+ unsigned Size = getLSMultipleTransferSize(FirstMI);
+
+ unsigned SIndex = 0;
+ unsigned EIndex = MemOps.size();
+ do {
+ // Look at the first instruction.
+ const MachineInstr *MI = MemOps[SIndex].MI;
+ int Offset = MemOps[SIndex].Offset;
+ const MachineOperand &PMO = getLoadStoreRegOp(*MI);
+ unsigned PReg = PMO.getReg();
+ unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
+ unsigned Latest = SIndex;
+ unsigned Earliest = SIndex;
+ unsigned Count = 1;
+ bool CanMergeToLSDouble =
+ STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
+ // ARM errata 602117: LDRD with base in list may result in incorrect base
+ // register when interrupted or faulted.
+ if (STI->isCortexM3() && isi32Load(Opcode) &&
+ PReg == getLoadStoreBaseOp(*MI).getReg())
+ CanMergeToLSDouble = false;
+
+ bool CanMergeToLSMulti = true;
+ // On swift vldm/vstm starting with an odd register number as that needs
+ // more uops than single vldrs.
+ if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1)
+ CanMergeToLSMulti = false;
+
+ // Merge following instructions where possible.
+ for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
+ int NewOffset = MemOps[I].Offset;
+ if (NewOffset != Offset + (int)Size)
+ break;
+ const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
+ unsigned Reg = MO.getReg();
+ unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
+
+ // See if the current load/store may be part of a multi load/store.
+ bool PartOfLSMulti = CanMergeToLSMulti;
+ if (PartOfLSMulti) {
+ // Cannot load from SP
+ if (Reg == ARM::SP)
+ PartOfLSMulti = false;
+ // Register numbers must be in ascending order.
+ else if (RegNum <= PRegNum)
+ PartOfLSMulti = false;
+ // For VFP / NEON load/store multiples, the registers must be
+ // consecutive and within the limit on the number of registers per
+ // instruction.
+ else if (!isNotVFP && RegNum != PRegNum+1)
+ PartOfLSMulti = false;
+ }
+ // See if the current load/store may be part of a double load/store.
+ bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
- for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
- int NewOffset = MemOps[i].Offset;
- const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
- unsigned Reg = MO.getReg();
- unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
- // Register numbers must be in ascending order. For VFP / NEON load and
- // store multiples, the registers must also be consecutive and within the
- // limit on the number of registers per instruction.
- if (Reg != ARM::SP &&
- NewOffset == Offset + (int)Size &&
- ((isNotVFP && RegNum > PRegNum) ||
- ((Count < Limit) && RegNum == PRegNum+1)) &&
- // On Swift we don't want vldm/vstm to start with a odd register num
- // because Q register unaligned vldm/vstm need more uops.
- (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) {
+ if (!PartOfLSMulti && !PartOfLSDouble)
+ break;
+ CanMergeToLSMulti &= PartOfLSMulti;
+ CanMergeToLSDouble &= PartOfLSDouble;
+ // Track MemOp with latest and earliest position (Positions are
+ // counted in reverse).
+ unsigned Position = MemOps[I].Position;
+ if (Position < MemOps[Latest].Position)
+ Latest = I;
+ else if (Position > MemOps[Earliest].Position)
+ Earliest = I;
+ // Prepare for next MemOp.
Offset += Size;
PRegNum = RegNum;
- ++Count;
- } else {
- // Can't merge this in. Try merge the earlier ones first.
- // We need to compute BaseKill here because the MemOps may have been
- // reordered.
- BaseKill = Loc->killsRegister(Base);
-
- MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base,
- BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
- MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
- MemOps, Merges);
- return;
}
- if (MemOps[i].Position > MemOps[insertAfter].Position) {
- insertAfter = i;
- Loc = MemOps[i].MBBI;
- }
- }
-
- BaseKill = Loc->killsRegister(Base);
- MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
- Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
-}
-
-static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
- unsigned Bytes, unsigned Limit,
- ARMCC::CondCodes Pred, unsigned PredReg) {
- unsigned MyPredReg = 0;
- if (!MI)
- return false;
-
- bool CheckCPSRDef = false;
- switch (MI->getOpcode()) {
- default: return false;
- case ARM::tSUBi8:
- case ARM::t2SUBri:
- case ARM::SUBri:
- CheckCPSRDef = true;
- break;
- case ARM::tSUBspi:
- break;
- }
-
- // Make sure the offset fits in 8 bits.
- if (Bytes == 0 || (Limit && Bytes >= Limit))
- return false;
-
- unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
- MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
- if (!(MI->getOperand(0).getReg() == Base &&
- MI->getOperand(1).getReg() == Base &&
- (MI->getOperand(2).getImm() * Scale) == Bytes &&
- getInstrPredicate(MI, MyPredReg) == Pred &&
- MyPredReg == PredReg))
- return false;
-
- return CheckCPSRDef ? !definesCPSR(MI) : true;
-}
-
-static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
- unsigned Bytes, unsigned Limit,
- ARMCC::CondCodes Pred, unsigned PredReg) {
- unsigned MyPredReg = 0;
- if (!MI)
- return false;
-
- bool CheckCPSRDef = false;
- switch (MI->getOpcode()) {
- default: return false;
- case ARM::tADDi8:
- case ARM::t2ADDri:
- case ARM::ADDri:
- CheckCPSRDef = true;
- break;
- case ARM::tADDspi:
- break;
- }
-
- if (Bytes == 0 || (Limit && Bytes >= Limit))
- // Make sure the offset fits in 8 bits.
- return false;
-
- unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
- MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
- if (!(MI->getOperand(0).getReg() == Base &&
- MI->getOperand(1).getReg() == Base &&
- (MI->getOperand(2).getImm() * Scale) == Bytes &&
- getInstrPredicate(MI, MyPredReg) == Pred &&
- MyPredReg == PredReg))
- return false;
-
- return CheckCPSRDef ? !definesCPSR(MI) : true;
-}
-
-static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
- switch (MI->getOpcode()) {
- default: return 0;
- case ARM::LDRi12:
- case ARM::STRi12:
- case ARM::tLDRi:
- case ARM::tSTRi:
- case ARM::tLDRspi:
- case ARM::tSTRspi:
- case ARM::t2LDRi8:
- case ARM::t2LDRi12:
- case ARM::t2STRi8:
- case ARM::t2STRi12:
- case ARM::VLDRS:
- case ARM::VSTRS:
- return 4;
- case ARM::VLDRD:
- case ARM::VSTRD:
- return 8;
- case ARM::LDMIA:
- case ARM::LDMDA:
- case ARM::LDMDB:
- case ARM::LDMIB:
- case ARM::STMIA:
- case ARM::STMDA:
- case ARM::STMDB:
- case ARM::STMIB:
- case ARM::tLDMIA:
- case ARM::tLDMIA_UPD:
- case ARM::tSTMIA_UPD:
- case ARM::t2LDMIA:
- case ARM::t2LDMDB:
- case ARM::t2STMIA:
- case ARM::t2STMDB:
- case ARM::VLDMSIA:
- case ARM::VSTMSIA:
- return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
- case ARM::VLDMDIA:
- case ARM::VSTMDIA:
- return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
- }
+ // Form a candidate from the Ops collected so far.
+ MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
+ for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
+ Candidate->Instrs.push_back(MemOps[C].MI);
+ Candidate->LatestMIIdx = Latest - SIndex;
+ Candidate->EarliestMIIdx = Earliest - SIndex;
+ Candidate->InsertPos = MemOps[Latest].Position;
+ if (Count == 1)
+ CanMergeToLSMulti = CanMergeToLSDouble = false;
+ Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
+ Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
+ Candidates.push_back(Candidate);
+ // Continue after the chain.
+ SIndex += Count;
+ } while (SIndex < EIndex);
}
static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
@@ -1081,6 +1060,75 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
}
}
+/// Check if the given instruction increments or decrements a register and
+/// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
+/// generated by the instruction are possibly read as well.
+static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,
+ ARMCC::CondCodes Pred, unsigned PredReg) {
+ bool CheckCPSRDef;
+ int Scale;
+ switch (MI.getOpcode()) {
+ case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;
+ case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;
+ case ARM::t2SUBri:
+ case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;
+ case ARM::t2ADDri:
+ case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;
+ case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;
+ case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
+ default: return 0;
+ }
+
+ unsigned MIPredReg;
+ if (MI.getOperand(0).getReg() != Reg ||
+ MI.getOperand(1).getReg() != Reg ||
+ getInstrPredicate(&MI, MIPredReg) != Pred ||
+ MIPredReg != PredReg)
+ return 0;
+
+ if (CheckCPSRDef && definesCPSR(&MI))
+ return 0;
+ return MI.getOperand(2).getImm() * Scale;
+}
+
+/// Searches for an increment or decrement of \p Reg before \p MBBI.
+static MachineBasicBlock::iterator
+findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg,
+ ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
+ Offset = 0;
+ MachineBasicBlock &MBB = *MBBI->getParent();
+ MachineBasicBlock::iterator BeginMBBI = MBB.begin();
+ MachineBasicBlock::iterator EndMBBI = MBB.end();
+ if (MBBI == BeginMBBI)
+ return EndMBBI;
+
+ // Skip debug values.
+ MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
+ while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI)
+ --PrevMBBI;
+
+ Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
+ return Offset == 0 ? EndMBBI : PrevMBBI;
+}
+
+/// Searches for a increment or decrement of \p Reg after \p MBBI.
+static MachineBasicBlock::iterator
+findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg,
+ ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
+ Offset = 0;
+ MachineBasicBlock &MBB = *MBBI->getParent();
+ MachineBasicBlock::iterator EndMBBI = MBB.end();
+ MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
+ // Skip debug values.
+ while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
+ ++NextMBBI;
+ if (NextMBBI == EndMBBI)
+ return EndMBBI;
+
+ Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
+ return Offset == 0 ? EndMBBI : NextMBBI;
+}
+
/// Fold proceeding/trailing inc/dec of base register into the
/// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
///
@@ -1093,21 +1141,17 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
/// ldmia rn, <ra, rb, rc>
/// =>
/// ldmdb rn!, <ra, rb, rc>
-bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- bool &Advance,
- MachineBasicBlock::iterator &I) {
+bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
// Thumb1 is already using updating loads/stores.
if (isThumb1) return false;
- MachineInstr *MI = MBBI;
- unsigned Base = MI->getOperand(0).getReg();
- bool BaseKill = MI->getOperand(0).isKill();
- unsigned Bytes = getLSMultipleTransferSize(MI);
+ const MachineOperand &BaseOP = MI->getOperand(0);
+ unsigned Base = BaseOP.getReg();
+ bool BaseKill = BaseOP.isKill();
unsigned PredReg = 0;
ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
unsigned Opcode = MI->getOpcode();
- DebugLoc dl = MI->getDebugLoc();
+ DebugLoc DL = MI->getDebugLoc();
// Can't use an updating ld/st if the base register is also a dest
// register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
@@ -1115,55 +1159,27 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
if (MI->getOperand(i).getReg() == Base)
return false;
- bool DoMerge = false;
+ int Bytes = getLSMultipleTransferSize(MI);
+ MachineBasicBlock &MBB = *MI->getParent();
+ MachineBasicBlock::iterator MBBI(MI);
+ int Offset;
+ MachineBasicBlock::iterator MergeInstr
+ = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
-
- // Try merging with the previous instruction.
- MachineBasicBlock::iterator BeginMBBI = MBB.begin();
- if (MBBI != BeginMBBI) {
- MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
- while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
- --PrevMBBI;
- if (Mode == ARM_AM::ia &&
- isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
- Mode = ARM_AM::db;
- DoMerge = true;
- } else if (Mode == ARM_AM::ib &&
- isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
- Mode = ARM_AM::da;
- DoMerge = true;
- }
- if (DoMerge)
- MBB.erase(PrevMBBI);
- }
-
- // Try merging with the next instruction.
- MachineBasicBlock::iterator EndMBBI = MBB.end();
- if (!DoMerge && MBBI != EndMBBI) {
- MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
- while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
- ++NextMBBI;
- if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
- isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
- DoMerge = true;
- } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
- isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
- DoMerge = true;
- }
- if (DoMerge) {
- if (NextMBBI == I) {
- Advance = true;
- ++I;
- }
- MBB.erase(NextMBBI);
- }
+ if (Mode == ARM_AM::ia && Offset == -Bytes) {
+ Mode = ARM_AM::db;
+ } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
+ Mode = ARM_AM::da;
+ } else {
+ MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
+ if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
+ ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes))
+ return false;
}
-
- if (!DoMerge)
- return false;
+ MBB.erase(MergeInstr);
unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
- MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
.addReg(Base, getDefRegState(true)) // WB base register
.addReg(Base, getKillRegState(BaseKill))
.addImm(Pred).addReg(PredReg);
@@ -1231,21 +1247,15 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
/// Fold proceeding/trailing inc/dec of base register into the
/// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
-bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- const TargetInstrInfo *TII,
- bool &Advance,
- MachineBasicBlock::iterator &I) {
+bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
// Thumb1 doesn't have updating LDR/STR.
// FIXME: Use LDM/STM with single register instead.
if (isThumb1) return false;
- MachineInstr *MI = MBBI;
- unsigned Base = MI->getOperand(1).getReg();
- bool BaseKill = MI->getOperand(1).isKill();
- unsigned Bytes = getLSMultipleTransferSize(MI);
+ unsigned Base = getLoadStoreBaseOp(*MI).getReg();
+ bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
unsigned Opcode = MI->getOpcode();
- DebugLoc dl = MI->getDebugLoc();
+ DebugLoc DL = MI->getDebugLoc();
bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
@@ -1255,7 +1265,6 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
return false;
- bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
// Can't do the merge if the destination register is the same as the would-be
// writeback register.
if (MI->getOperand(0).getReg() == Base)
@@ -1263,64 +1272,38 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
unsigned PredReg = 0;
ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
- bool DoMerge = false;
- ARM_AM::AddrOpc AddSub = ARM_AM::add;
- unsigned NewOpc = 0;
- // AM2 - 12 bits, thumb2 - 8 bits.
- unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
-
- // Try merging with the previous instruction.
- MachineBasicBlock::iterator BeginMBBI = MBB.begin();
- if (MBBI != BeginMBBI) {
- MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
- while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
- --PrevMBBI;
- if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
- DoMerge = true;
- AddSub = ARM_AM::sub;
- } else if (!isAM5 &&
- isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
- DoMerge = true;
- }
- if (DoMerge) {
- NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
- MBB.erase(PrevMBBI);
- }
- }
-
- // Try merging with the next instruction.
- MachineBasicBlock::iterator EndMBBI = MBB.end();
- if (!DoMerge && MBBI != EndMBBI) {
- MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
- while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
- ++NextMBBI;
- if (!isAM5 &&
- isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
- DoMerge = true;
- AddSub = ARM_AM::sub;
- } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
- DoMerge = true;
- }
- if (DoMerge) {
- NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
- if (NextMBBI == I) {
- Advance = true;
- ++I;
- }
- MBB.erase(NextMBBI);
- }
+ int Bytes = getLSMultipleTransferSize(MI);
+ MachineBasicBlock &MBB = *MI->getParent();
+ MachineBasicBlock::iterator MBBI(MI);
+ int Offset;
+ MachineBasicBlock::iterator MergeInstr
+ = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
+ unsigned NewOpc;
+ if (!isAM5 && Offset == Bytes) {
+ NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
+ } else if (Offset == -Bytes) {
+ NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
+ } else {
+ MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
+ if (Offset == Bytes) {
+ NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
+ } else if (!isAM5 && Offset == -Bytes) {
+ NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
+ } else
+ return false;
}
+ MBB.erase(MergeInstr);
- if (!DoMerge)
- return false;
+ ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;
+ bool isLd = isLoadSingle(Opcode);
if (isAM5) {
// VLDM[SD]_UPD, VSTM[SD]_UPD
// (There are no base-updating versions of VLDR/VSTR instructions, but the
// updating load/store-multiple instructions can be used with only one
// register.)
MachineOperand &MO = MI->getOperand(0);
- BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
+ BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
.addReg(Base, getDefRegState(true)) // WB base register
.addReg(Base, getKillRegState(isLd ? BaseKill : false))
.addImm(Pred).addReg(PredReg)
@@ -1330,20 +1313,18 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
if (isAM2) {
// LDR_PRE, LDR_POST
if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
- int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
- BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
+ BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
.addReg(Base, RegState::Define)
.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
} else {
- int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
- BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
+ int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
+ BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
.addReg(Base, RegState::Define)
- .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+ .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
}
} else {
- int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
// t2LDR_PRE, t2LDR_POST
- BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
+ BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
.addReg(Base, RegState::Define)
.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
}
@@ -1353,15 +1334,14 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
// the vestigal zero-reg offset register. When that's fixed, this clause
// can be removed entirely.
if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
- int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
+ int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
// STR_PRE, STR_POST
- BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
+ BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
.addReg(MO.getReg(), getKillRegState(MO.isKill()))
- .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+ .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
} else {
- int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
// t2STR_PRE, t2STR_POST
- BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
+ BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
.addReg(MO.getReg(), getKillRegState(MO.isKill()))
.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
}
@@ -1371,6 +1351,66 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
return true;
}
+bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
+ unsigned Opcode = MI.getOpcode();
+ assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
+ "Must have t2STRDi8 or t2LDRDi8");
+ if (MI.getOperand(3).getImm() != 0)
+ return false;
+
+ // Behaviour for writeback is undefined if base register is the same as one
+ // of the others.
+ const MachineOperand &BaseOp = MI.getOperand(2);
+ unsigned Base = BaseOp.getReg();
+ const MachineOperand &Reg0Op = MI.getOperand(0);
+ const MachineOperand &Reg1Op = MI.getOperand(1);
+ if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
+ return false;
+
+ unsigned PredReg;
+ ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg);
+ MachineBasicBlock::iterator MBBI(MI);
+ MachineBasicBlock &MBB = *MI.getParent();
+ int Offset;
+ MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred,
+ PredReg, Offset);
+ unsigned NewOpc;
+ if (Offset == 8 || Offset == -8) {
+ NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
+ } else {
+ MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
+ if (Offset == 8 || Offset == -8) {
+ NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
+ } else
+ return false;
+ }
+ MBB.erase(MergeInstr);
+
+ DebugLoc DL = MI.getDebugLoc();
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
+ if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
+ MIB.addOperand(Reg0Op).addOperand(Reg1Op)
+ .addReg(BaseOp.getReg(), RegState::Define);
+ } else {
+ assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
+ MIB.addReg(BaseOp.getReg(), RegState::Define)
+ .addOperand(Reg0Op).addOperand(Reg1Op);
+ }
+ MIB.addReg(BaseOp.getReg(), RegState::Kill)
+ .addImm(Offset).addImm(Pred).addReg(PredReg);
+ assert(TII->get(Opcode).getNumOperands() == 6 &&
+ TII->get(NewOpc).getNumOperands() == 7 &&
+ "Unexpected number of operands in Opcode specification.");
+
+ // Transfer implicit operands.
+ for (const MachineOperand &MO : MI.implicit_operands())
+ MIB.addOperand(MO);
+ MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+
+ MBB.erase(MBBI);
+ return true;
+}
+
/// Returns true if instruction is a memory operation that this pass is capable
/// of operating on.
static bool isMemoryOp(const MachineInstr *MI) {
@@ -1426,26 +1466,10 @@ static bool isMemoryOp(const MachineInstr *MI) {
return false;
}
-/// Advance register scavenger to just before the earliest memory op that is
-/// being merged.
-void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
- MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
- unsigned Position = MemOps[0].Position;
- for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
- if (MemOps[i].Position < Position) {
- Position = MemOps[i].Position;
- Loc = MemOps[i].MBBI;
- }
- }
-
- if (Loc != MBB.begin())
- RS->forward(std::prev(Loc));
-}
-
static void InsertLDR_STR(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
int Offset, bool isDef,
- DebugLoc dl, unsigned NewOpc,
+ DebugLoc DL, unsigned NewOpc,
unsigned Reg, bool RegDeadKill, bool RegUndef,
unsigned BaseReg, bool BaseKill, bool BaseUndef,
bool OffKill, bool OffUndef,
@@ -1491,7 +1515,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
if (!Errata602117 && !NonConsecutiveRegs)
return false;
- MachineBasicBlock::iterator NewBBI = MBBI;
bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
bool EvenDeadKill = isLd ?
@@ -1531,7 +1554,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
++NumSTRD2STM;
}
- NewBBI = std::prev(MBBI);
} else {
// Split into two instructions.
unsigned NewOpc = (isLd)
@@ -1553,7 +1575,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
OddReg, OddDeadKill, false,
BaseReg, false, BaseUndef, false, OffUndef,
Pred, PredReg, TII, isT2);
- NewBBI = std::prev(MBBI);
InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
EvenReg, EvenDeadKill, false,
BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
@@ -1573,7 +1594,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
EvenReg, EvenDeadKill, EvenUndef,
BaseReg, false, BaseUndef, false, OffUndef,
Pred, PredReg, TII, isT2);
- NewBBI = std::prev(MBBI);
InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
OddReg, OddDeadKill, OddUndef,
BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
@@ -1585,191 +1605,160 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
++NumSTRD2STR;
}
- MBB.erase(MI);
- MBBI = NewBBI;
+ MBBI = MBB.erase(MBBI);
return true;
}
/// An optimization pass to turn multiple LDR / STR ops of the same base and
/// incrementing offset into LDM / STM ops.
bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
- unsigned NumMerges = 0;
- unsigned NumMemOps = 0;
MemOpQueue MemOps;
unsigned CurrBase = 0;
unsigned CurrOpc = ~0u;
- unsigned CurrSize = 0;
ARMCC::CondCodes CurrPred = ARMCC::AL;
- unsigned CurrPredReg = 0;
unsigned Position = 0;
- SmallVector<MachineBasicBlock::iterator,4> Merges;
-
- RS->enterBasicBlock(&MBB);
- MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
- while (MBBI != E) {
+ assert(Candidates.size() == 0);
+ assert(MergeBaseCandidates.size() == 0);
+ LiveRegsValid = false;
+
+ for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
+ I = MBBI) {
+ // The instruction in front of the iterator is the one we look at.
+ MBBI = std::prev(I);
if (FixInvalidRegPairOp(MBB, MBBI))
continue;
+ ++Position;
- bool Advance = false;
- bool TryMerge = false;
-
- bool isMemOp = isMemoryOp(MBBI);
- if (isMemOp) {
+ if (isMemoryOp(MBBI)) {
unsigned Opcode = MBBI->getOpcode();
- unsigned Size = getLSMultipleTransferSize(MBBI);
const MachineOperand &MO = MBBI->getOperand(0);
unsigned Reg = MO.getReg();
- bool isKill = MO.isDef() ? false : MO.isKill();
- unsigned Base = MBBI->getOperand(1).getReg();
+ unsigned Base = getLoadStoreBaseOp(*MBBI).getReg();
unsigned PredReg = 0;
ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
int Offset = getMemoryOpOffset(MBBI);
- // Watch out for:
- // r4 := ldr [r5]
- // r5 := ldr [r5, #4]
- // r6 := ldr [r5, #8]
- //
- // The second ldr has effectively broken the chain even though it
- // looks like the later ldr(s) use the same base register. Try to
- // merge the ldr's so far, including this one. But don't try to
- // combine the following ldr(s).
- bool Clobber = isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg();
-
- // Watch out for:
- // r4 := ldr [r0, #8]
- // r4 := ldr [r0, #4]
- //
- // The optimization may reorder the second ldr in front of the first
- // ldr, which violates write after write(WAW) dependence. The same as
- // str. Try to merge inst(s) already in MemOps.
- bool Overlap = false;
- for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
- if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
- Overlap = true;
- break;
- }
- }
-
- if (CurrBase == 0 && !Clobber) {
+ if (CurrBase == 0) {
// Start of a new chain.
CurrBase = Base;
CurrOpc = Opcode;
- CurrSize = Size;
CurrPred = Pred;
- CurrPredReg = PredReg;
- MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
- ++NumMemOps;
- Advance = true;
- } else if (!Overlap) {
- if (Clobber) {
- TryMerge = true;
- Advance = true;
+ MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
+ continue;
+ }
+ // Note: No need to match PredReg in the next if.
+ if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
+ // Watch out for:
+ // r4 := ldr [r0, #8]
+ // r4 := ldr [r0, #4]
+ // or
+ // r0 := ldr [r0]
+ // If a load overrides the base register or a register loaded by
+ // another load in our chain, we cannot take this instruction.
+ bool Overlap = false;
+ if (isLoadSingle(Opcode)) {
+ Overlap = (Base == Reg);
+ if (!Overlap) {
+ for (const MemOpQueueEntry &E : MemOps) {
+ if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
+ Overlap = true;
+ break;
+ }
+ }
+ }
}
- if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
- // No need to match PredReg.
- // Continue adding to the queue.
+ if (!Overlap) {
+ // Check offset and sort memory operation into the current chain.
if (Offset > MemOps.back().Offset) {
- MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
- Position, MBBI));
- ++NumMemOps;
- Advance = true;
+ MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
+ continue;
} else {
- for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
- I != E; ++I) {
- if (Offset < I->Offset) {
- MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
- Position, MBBI));
- ++NumMemOps;
- Advance = true;
+ MemOpQueue::iterator MI, ME;
+ for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
+ if (Offset < MI->Offset) {
+ // Found a place to insert.
break;
- } else if (Offset == I->Offset) {
- // Collision! This can't be merged!
+ }
+ if (Offset == MI->Offset) {
+ // Collision, abort.
+ MI = ME;
break;
}
}
+ if (MI != MemOps.end()) {
+ MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position));
+ continue;
+ }
}
}
}
- }
- if (MBBI->isDebugValue()) {
- ++MBBI;
- if (MBBI == E)
- // Reach the end of the block, try merging the memory instructions.
- TryMerge = true;
- } else if (Advance) {
- ++Position;
- ++MBBI;
- if (MBBI == E)
- // Reach the end of the block, try merging the memory instructions.
- TryMerge = true;
- } else {
- TryMerge = true;
+ // Don't advance the iterator; The op will start a new chain next.
+ MBBI = I;
+ --Position;
+ // Fallthrough to look into existing chain.
+ } else if (MBBI->isDebugValue()) {
+ continue;
+ } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
+ MBBI->getOpcode() == ARM::t2STRDi8) {
+ // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
+ // remember them because we may still be able to merge add/sub into them.
+ MergeBaseCandidates.push_back(MBBI);
}
- if (TryMerge) {
- if (NumMemOps > 1) {
- // Try to find a free register to use as a new base in case it's needed.
- // First advance to the instruction just before the start of the chain.
- AdvanceRS(MBB, MemOps);
-
- // Find a scratch register.
- unsigned Scratch =
- RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass);
-
- // Process the load / store instructions.
- RS->forward(std::prev(MBBI));
-
- // Merge ops.
- Merges.clear();
- MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
- CurrPred, CurrPredReg, Scratch, MemOps, Merges);
-
- // Try folding preceding/trailing base inc/dec into the generated
- // LDM/STM ops.
- for (unsigned i = 0, e = Merges.size(); i < e; ++i)
- if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
- ++NumMerges;
- NumMerges += Merges.size();
-
- // Try folding preceding/trailing base inc/dec into those load/store
- // that were not merged to form LDM/STM ops.
- for (unsigned i = 0; i != NumMemOps; ++i)
- if (!MemOps[i].Merged)
- if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
- ++NumMerges;
-
- // RS may be pointing to an instruction that's deleted.
- RS->skipTo(std::prev(MBBI));
- } else if (NumMemOps == 1) {
- // Try folding preceding/trailing base inc/dec into the single
- // load/store.
- if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
- ++NumMerges;
- RS->forward(std::prev(MBBI));
- }
- }
+ // If we are here then the chain is broken; Extract candidates for a merge.
+ if (MemOps.size() > 0) {
+ FormCandidates(MemOps);
+ // Reset for the next chain.
CurrBase = 0;
CurrOpc = ~0u;
- CurrSize = 0;
CurrPred = ARMCC::AL;
- CurrPredReg = 0;
- if (NumMemOps) {
- MemOps.clear();
- NumMemOps = 0;
- }
+ MemOps.clear();
+ }
+ }
+ if (MemOps.size() > 0)
+ FormCandidates(MemOps);
- // If iterator hasn't been advanced and this is not a memory op, skip it.
- // It can't start a new chain anyway.
- if (!Advance && !isMemOp && MBBI != E) {
- ++Position;
- ++MBBI;
+ // Sort candidates so they get processed from end to begin of the basic
+ // block later; This is necessary for liveness calculation.
+ auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
+ return M0->InsertPos < M1->InsertPos;
+ };
+ std::sort(Candidates.begin(), Candidates.end(), LessThan);
+
+ // Go through list of candidates and merge.
+ bool Changed = false;
+ for (const MergeCandidate *Candidate : Candidates) {
+ if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
+ MachineInstr *Merged = MergeOpsUpdate(*Candidate);
+ // Merge preceding/trailing base inc/dec into the merged op.
+ if (Merged) {
+ Changed = true;
+ unsigned Opcode = Merged->getOpcode();
+ if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
+ MergeBaseUpdateLSDouble(*Merged);
+ else
+ MergeBaseUpdateLSMultiple(Merged);
+ } else {
+ for (MachineInstr *MI : Candidate->Instrs) {
+ if (MergeBaseUpdateLoadStore(MI))
+ Changed = true;
+ }
}
+ } else {
+ assert(Candidate->Instrs.size() == 1);
+ if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
+ Changed = true;
}
}
- return NumMerges > 0;
+ Candidates.clear();
+ // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
+ for (MachineInstr *MI : MergeBaseCandidates)
+ MergeBaseUpdateLSDouble(*MI);
+ MergeBaseCandidates.clear();
+
+ return Changed;
}
/// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
@@ -1814,12 +1803,14 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
}
bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+ MF = &Fn;
STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
TL = STI->getTargetLowering();
AFI = Fn.getInfo<ARMFunctionInfo>();
TII = STI->getInstrInfo();
TRI = STI->getRegisterInfo();
- RS = new RegScavenger();
+ MRI = &Fn.getRegInfo();
+ RegClassInfoValid = false;
isThumb2 = AFI->isThumb2Function();
isThumb1 = AFI->isThumbFunction() && !isThumb2;
@@ -1832,7 +1823,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
Modified |= MergeReturnIntoLDM(MBB);
}
- delete RS;
+ Allocator.DestroyAll();
return Modified;
}
@@ -2219,7 +2210,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
continue;
int Opc = MI->getOpcode();
- bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
+ bool isLd = isLoadSingle(Opc);
unsigned Base = MI->getOperand(1).getReg();
int Offset = getMemoryOpOffset(MI);
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index a59cf9851108..6cafbbb9f8eb 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -18,12 +18,6 @@ using namespace llvm;
#define DEBUG_TYPE "arm-selectiondag-info"
-ARMSelectionDAGInfo::ARMSelectionDAGInfo(const DataLayout &DL)
- : TargetSelectionDAGInfo(&DL) {}
-
-ARMSelectionDAGInfo::~ARMSelectionDAGInfo() {
-}
-
// Emit, if possible, a specialized version of the given Libcall. Typically this
// means selecting the appropriately aligned version, but we also convert memset
// of 0 into memclr.
@@ -83,7 +77,7 @@ EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl,
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
- Entry.Ty = TLI->getDataLayout()->getIntPtrType(*DAG.getContext());
+ Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
Entry.Node = Dst;
Args.push_back(Entry);
if (AEABILibcall == AEABI_MEMCLR) {
@@ -121,12 +115,14 @@ EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl,
{ "__aeabi_memclr", "__aeabi_memclr4", "__aeabi_memclr8" }
};
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(Chain)
- .setCallee(TLI->getLibcallCallingConv(LC),
- Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
- TLI->getPointerTy()), std::move(Args), 0)
- .setDiscardResult();
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setCallee(
+ TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
+ TLI->getPointerTy(DAG.getDataLayout())),
+ std::move(Args), 0)
+ .setDiscardResult();
std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
return CallResult.second;
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h
index 1db190f41e1a..289879ee1d7e 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.h
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -37,8 +37,6 @@ namespace ARM_AM {
class ARMSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- explicit ARMSelectionDAGInfo(const DataLayout &DL);
- ~ARMSelectionDAGInfo();
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
SDValue Chain,
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 55808dfb9efe..002c3e9b6291 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -112,7 +112,6 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
: ARMGenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
ARMProcClass(None), stackAlignment(4), CPUString(CPU), IsLittle(IsLittle),
TargetTriple(TT), Options(TM.Options), TM(TM),
- TSInfo(*TM.getDataLayout()),
FrameLowering(initializeFrameLowering(CPU, FS)),
// At this point initializeSubtargetDependencies has been called so
// we can query directly.
@@ -172,6 +171,7 @@ void ARMSubtarget::initializeEnvironment() {
AllowsUnalignedMem = false;
Thumb2DSP = false;
UseNaClTrap = false;
+ GenLongCalls = false;
UnsafeFPMath = false;
}
@@ -286,7 +286,7 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
if (RelocM == Reloc::Static)
return false;
- bool isDecl = GV->isDeclarationForLinker();
+ bool isDef = GV->isStrongDefinitionForLinker();
if (!isTargetMachO()) {
// Extra load is needed for all externally visible.
@@ -294,34 +294,22 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
return false;
return true;
} else {
- if (RelocM == Reloc::PIC_) {
- // If this is a strong reference to a definition, it is definitely not
- // through a stub.
- if (!isDecl && !GV->isWeakForLinker())
- return false;
-
- // Unless we have a symbol with hidden visibility, we have to go through a
- // normal $non_lazy_ptr stub because this symbol might be resolved late.
- if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.
- return true;
+ // If this is a strong reference to a definition, it is definitely not
+ // through a stub.
+ if (isDef)
+ return false;
+
+ // Unless we have a symbol with hidden visibility, we have to go through a
+ // normal $non_lazy_ptr stub because this symbol might be resolved late.
+ if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.
+ return true;
+ if (RelocM == Reloc::PIC_) {
// If symbol visibility is hidden, we have a stub for common symbol
// references and external declarations.
- if (isDecl || GV->hasCommonLinkage())
+ if (GV->isDeclarationForLinker() || GV->hasCommonLinkage())
// Hidden $non_lazy_ptr reference.
return true;
-
- return false;
- } else {
- // If this is a strong reference to a definition, it is definitely not
- // through a stub.
- if (!isDecl && !GV->isWeakForLinker())
- return false;
-
- // Unless we have a symbol with hidden visibility, we have to go through a
- // normal $non_lazy_ptr stub because this symbol might be resolved late.
- if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.
- return true;
}
}
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 9909a6a6d198..dd101df9b63d 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -206,6 +206,9 @@ protected:
/// NaCl TRAP instruction is generated instead of the regular TRAP.
bool UseNaClTrap;
+ /// Generate calls via indirect call instructions.
+ bool GenLongCalls;
+
/// Target machine allowed unsafe FP math (such as use of NEON fp)
bool UnsafeFPMath;
@@ -342,6 +345,7 @@ public:
bool hasMPExtension() const { return HasMPExtension; }
bool hasThumb2DSP() const { return Thumb2DSP; }
bool useNaClTrap() const { return UseNaClTrap; }
+ bool genLongCalls() const { return GenLongCalls; }
bool hasFP16() const { return HasFP16; }
bool hasD16() const { return HasD16; }
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 6e81bd2d349d..93495d66ae70 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -80,8 +80,7 @@ computeTargetABI(const Triple &TT, StringRef CPU,
// FIXME: This is duplicated code from the front end and should be unified.
if (TT.isOSBinFormatMachO()) {
if (TT.getEnvironment() == llvm::Triple::EABI ||
- (TT.getOS() == llvm::Triple::UnknownOS &&
- TT.getObjectFormat() == llvm::Triple::MachO) ||
+ (TT.getOS() == llvm::Triple::UnknownOS && TT.isOSBinFormatMachO()) ||
CPU.startswith("cortex-m")) {
TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
} else {
@@ -104,8 +103,8 @@ computeTargetABI(const Triple &TT, StringRef CPU,
TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
break;
default:
- if (TT.getOS() == llvm::Triple::NetBSD)
- TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
+ if (TT.isOSNetBSD())
+ TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
else
TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
break;
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index f4901fc24e44..2f194cf7ae06 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -61,14 +61,14 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND ||
ISD == ISD::FP_EXTEND)) {
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
int Idx = CostTableLookup(NEONFltDblTbl, ISD, LT.second);
if (Idx != -1)
return LT.first * NEONFltDblTbl[Idx].Cost;
}
- EVT SrcTy = TLI->getValueType(Src);
- EVT DstTy = TLI->getValueType(Dst);
+ EVT SrcTy = TLI->getValueType(DL, Src);
+ EVT DstTy = TLI->getValueType(DL, Dst);
if (!SrcTy.isSimple() || !DstTy.isSimple())
return BaseT::getCastInstrCost(Opcode, Dst, Src);
@@ -282,8 +282,8 @@ unsigned ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
{ ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 }
};
- EVT SelCondTy = TLI->getValueType(CondTy);
- EVT SelValTy = TLI->getValueType(ValTy);
+ EVT SelCondTy = TLI->getValueType(DL, CondTy);
+ EVT SelValTy = TLI->getValueType(DL, ValTy);
if (SelCondTy.isSimple() && SelValTy.isSimple()) {
int Idx = ConvertCostTableLookup(NEONVectorSelectTbl, ISD,
SelCondTy.getSimpleVT(),
@@ -292,7 +292,7 @@ unsigned ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
return NEONVectorSelectTbl[Idx].Cost;
}
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
return LT.first;
}
@@ -353,7 +353,7 @@ unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ISD::VECTOR_SHUFFLE, MVT::v8i16, 2},
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}};
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
if (Idx == -1)
@@ -379,7 +379,7 @@ unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
int Idx =
CostTableLookup(NEONAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
if (Idx == -1)
@@ -395,7 +395,7 @@ unsigned ARMTTIImpl::getArithmeticInstrCost(
TTI::OperandValueProperties Opd2PropInfo) {
int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
const unsigned FunctionCallDivCost = 20;
const unsigned ReciprocalDivCost = 10;
@@ -468,7 +468,7 @@ unsigned ARMTTIImpl::getArithmeticInstrCost(
unsigned ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment,
unsigned AddressSpace) {
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
if (Src->isVectorTy() && Alignment != 16 &&
Src->getVectorElementType()->isDoubleTy()) {
@@ -488,12 +488,12 @@ unsigned ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
assert(isa<VectorType>(VecTy) && "Expect a vector type");
// vldN/vstN doesn't support vector types of i64/f64 element.
- bool EltIs64Bits = DL->getTypeAllocSizeInBits(VecTy->getScalarType()) == 64;
+ bool EltIs64Bits = DL.getTypeAllocSizeInBits(VecTy->getScalarType()) == 64;
if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits) {
unsigned NumElts = VecTy->getVectorNumElements();
Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
- unsigned SubVecSize = TLI->getDataLayout()->getTypeAllocSize(SubVecTy);
+ unsigned SubVecSize = DL.getTypeAllocSize(SubVecTy);
// vldN/vstN only support legal vector types of size 64 or 128 in bits.
if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128))
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h
index f2e5db655ccf..84f256f73722 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -42,7 +42,8 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
public:
explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, Function &F)
- : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {}
+ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
+ TLI(ST->getTargetLowering()) {}
// Provide value semantics. MSVC requires that we spell all of these out.
ARMTTIImpl(const ARMTTIImpl &Arg)
@@ -50,18 +51,6 @@ public:
ARMTTIImpl(ARMTTIImpl &&Arg)
: BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
TLI(std::move(Arg.TLI)) {}
- ARMTTIImpl &operator=(const ARMTTIImpl &RHS) {
- BaseT::operator=(static_cast<const BaseT &>(RHS));
- ST = RHS.ST;
- TLI = RHS.TLI;
- return *this;
- }
- ARMTTIImpl &operator=(ARMTTIImpl &&RHS) {
- BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
- ST = std::move(RHS.ST);
- TLI = std::move(RHS.TLI);
- return *this;
- }
/// \name Scalar TTI Implementations
/// @{
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index c2db74619871..f8f0eb2d4baa 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -189,9 +189,9 @@ class ARMAsmParser : public MCTargetAsmParser {
return getParser().Error(L, Msg, Ranges);
}
- bool validatetLDMRegList(MCInst Inst, const OperandVector &Operands,
+ bool validatetLDMRegList(const MCInst &Inst, const OperandVector &Operands,
unsigned ListNo, bool IsARPop = false);
- bool validatetSTMRegList(MCInst Inst, const OperandVector &Operands,
+ bool validatetSTMRegList(const MCInst &Inst, const OperandVector &Operands,
unsigned ListNo);
int tryParseRegister();
@@ -242,6 +242,8 @@ class ARMAsmParser : public MCTargetAsmParser {
bool &CanAcceptCarrySet,
bool &CanAcceptPredicationCode);
+ void tryConvertingToTwoOperandForm(StringRef Mnemonic, bool CarrySetting,
+ OperandVector &Operands);
bool isThumb() const {
// FIXME: Can tablegen auto-generate this?
return STI.getFeatureBits()[ARM::ModeThumb];
@@ -5465,6 +5467,92 @@ void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
CanAcceptPredicationCode = true;
}
+// \brief Some Thumb instructions have two operand forms that are not
+// available as three operand, convert to two operand form if possible.
+//
+// FIXME: We would really like to be able to tablegen'erate this.
+void ARMAsmParser::tryConvertingToTwoOperandForm(StringRef Mnemonic,
+ bool CarrySetting,
+ OperandVector &Operands) {
+ if (Operands.size() != 6)
+ return;
+
+ const auto &Op3 = static_cast<ARMOperand &>(*Operands[3]);
+ auto &Op4 = static_cast<ARMOperand &>(*Operands[4]);
+ if (!Op3.isReg() || !Op4.isReg())
+ return;
+
+ auto Op3Reg = Op3.getReg();
+ auto Op4Reg = Op4.getReg();
+
+ // For most Thumb2 cases we just generate the 3 operand form and reduce
+ // it in processInstruction(), but the 3 operand form of ADD (t2ADDrr)
+ // won't accept SP or PC so we do the transformation here taking care
+ // with immediate range in the 'add sp, sp #imm' case.
+ auto &Op5 = static_cast<ARMOperand &>(*Operands[5]);
+ if (isThumbTwo()) {
+ if (Mnemonic != "add")
+ return;
+ bool TryTransform = Op3Reg == ARM::PC || Op4Reg == ARM::PC ||
+ (Op5.isReg() && Op5.getReg() == ARM::PC);
+ if (!TryTransform) {
+ TryTransform = (Op3Reg == ARM::SP || Op4Reg == ARM::SP ||
+ (Op5.isReg() && Op5.getReg() == ARM::SP)) &&
+ !(Op3Reg == ARM::SP && Op4Reg == ARM::SP &&
+ Op5.isImm() && !Op5.isImm0_508s4());
+ }
+ if (!TryTransform)
+ return;
+ } else if (!isThumbOne())
+ return;
+
+ if (!(Mnemonic == "add" || Mnemonic == "sub" || Mnemonic == "and" ||
+ Mnemonic == "eor" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
+ Mnemonic == "asr" || Mnemonic == "adc" || Mnemonic == "sbc" ||
+ Mnemonic == "ror" || Mnemonic == "orr" || Mnemonic == "bic"))
+ return;
+
+ // If first 2 operands of a 3 operand instruction are the same
+ // then transform to 2 operand version of the same instruction
+ // e.g. 'adds r0, r0, #1' transforms to 'adds r0, #1'
+ bool Transform = Op3Reg == Op4Reg;
+
+ // For communtative operations, we might be able to transform if we swap
+ // Op4 and Op5. The 'ADD Rdm, SP, Rdm' form is already handled specially
+ // as tADDrsp.
+ const ARMOperand *LastOp = &Op5;
+ bool Swap = false;
+ if (!Transform && Op5.isReg() && Op3Reg == Op5.getReg() &&
+ ((Mnemonic == "add" && Op4Reg != ARM::SP) ||
+ Mnemonic == "and" || Mnemonic == "eor" ||
+ Mnemonic == "adc" || Mnemonic == "orr")) {
+ Swap = true;
+ LastOp = &Op4;
+ Transform = true;
+ }
+
+ // If both registers are the same then remove one of them from
+ // the operand list, with certain exceptions.
+ if (Transform) {
+ // Don't transform 'adds Rd, Rd, Rm' or 'sub{s} Rd, Rd, Rm' because the
+ // 2 operand forms don't exist.
+ if (((Mnemonic == "add" && CarrySetting) || Mnemonic == "sub") &&
+ LastOp->isReg())
+ Transform = false;
+
+ // Don't transform 'add/sub{s} Rd, Rd, #imm' if the immediate fits into
+ // 3-bits because the ARMARM says not to.
+ if ((Mnemonic == "add" || Mnemonic == "sub") && LastOp->isImm0_7())
+ Transform = false;
+ }
+
+ if (Transform) {
+ if (Swap)
+ std::swap(Op4, Op5);
+ Operands.erase(Operands.begin() + 3);
+ }
+}
+
bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
OperandVector &Operands) {
// FIXME: This is all horribly hacky. We really need a better way to deal
@@ -5838,6 +5926,8 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
"VFP/Neon double precision register expected");
}
+ tryConvertingToTwoOperandForm(Mnemonic, CarrySetting, Operands);
+
// Some instructions, mostly Thumb, have forms for the same mnemonic that
// do and don't have a cc_out optional-def operand. With some spot-checks
// of the operand list, we can figure out which variant we're trying to
@@ -5901,48 +5991,6 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
}
}
- // If first 2 operands of a 3 operand instruction are the same
- // then transform to 2 operand version of the same instruction
- // e.g. 'adds r0, r0, #1' transforms to 'adds r0, #1'
- // FIXME: We would really like to be able to tablegen'erate this.
- if (isThumbOne() && Operands.size() == 6 &&
- (Mnemonic == "add" || Mnemonic == "sub" || Mnemonic == "and" ||
- Mnemonic == "eor" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
- Mnemonic == "asr" || Mnemonic == "adc" || Mnemonic == "sbc" ||
- Mnemonic == "ror" || Mnemonic == "orr" || Mnemonic == "bic")) {
- ARMOperand &Op3 = static_cast<ARMOperand &>(*Operands[3]);
- ARMOperand &Op4 = static_cast<ARMOperand &>(*Operands[4]);
- ARMOperand &Op5 = static_cast<ARMOperand &>(*Operands[5]);
-
- // If both registers are the same then remove one of them from
- // the operand list.
- if (Op3.isReg() && Op4.isReg() && Op3.getReg() == Op4.getReg()) {
- // If 3rd operand (variable Op5) is a register and the instruction is adds/sub
- // then do not transform as the backend already handles this instruction
- // correctly.
- if (!Op5.isReg() || !((Mnemonic == "add" && CarrySetting) || Mnemonic == "sub")) {
- Operands.erase(Operands.begin() + 3);
- if (Mnemonic == "add" && !CarrySetting) {
- // Special case for 'add' (not 'adds') instruction must
- // remove the CCOut operand as well.
- Operands.erase(Operands.begin() + 1);
- }
- }
- }
- }
-
- // If instruction is 'add' and first two register operands
- // use SP register, then remove one of the SP registers from
- // the instruction.
- // FIXME: We would really like to be able to tablegen'erate this.
- if (isThumbOne() && Operands.size() == 5 && Mnemonic == "add" && !CarrySetting) {
- ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[2]);
- ARMOperand &Op3 = static_cast<ARMOperand &>(*Operands[3]);
- if (Op2.isReg() && Op3.isReg() && Op2.getReg() == ARM::SP && Op3.getReg() == ARM::SP) {
- Operands.erase(Operands.begin() + 2);
- }
- }
-
// GNU Assembler extension (compatibility)
if ((Mnemonic == "ldrd" || Mnemonic == "strd")) {
ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[2]);
@@ -5985,8 +6033,9 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// return 'true' if register list contains non-low GPR registers,
// 'false' otherwise. If Reg is in the register list or is HiReg, set
// 'containsReg' to true.
-static bool checkLowRegisterList(MCInst Inst, unsigned OpNo, unsigned Reg,
- unsigned HiReg, bool &containsReg) {
+static bool checkLowRegisterList(const MCInst &Inst, unsigned OpNo,
+ unsigned Reg, unsigned HiReg,
+ bool &containsReg) {
containsReg = false;
for (unsigned i = OpNo; i < Inst.getNumOperands(); ++i) {
unsigned OpReg = Inst.getOperand(i).getReg();
@@ -6001,8 +6050,8 @@ static bool checkLowRegisterList(MCInst Inst, unsigned OpNo, unsigned Reg,
// Check if the specified regisgter is in the register list of the inst,
// starting at the indicated operand number.
-static bool listContainsReg(MCInst &Inst, unsigned OpNo, unsigned Reg) {
- for (unsigned i = OpNo; i < Inst.getNumOperands(); ++i) {
+static bool listContainsReg(const MCInst &Inst, unsigned OpNo, unsigned Reg) {
+ for (unsigned i = OpNo, e = Inst.getNumOperands(); i < e; ++i) {
unsigned OpReg = Inst.getOperand(i).getReg();
if (OpReg == Reg)
return true;
@@ -6020,7 +6069,7 @@ static bool instIsBreakpoint(const MCInst &Inst) {
}
-bool ARMAsmParser::validatetLDMRegList(MCInst Inst,
+bool ARMAsmParser::validatetLDMRegList(const MCInst &Inst,
const OperandVector &Operands,
unsigned ListNo, bool IsARPop) {
const ARMOperand &Op = static_cast<const ARMOperand &>(*Operands[ListNo]);
@@ -6043,7 +6092,7 @@ bool ARMAsmParser::validatetLDMRegList(MCInst Inst,
return false;
}
-bool ARMAsmParser::validatetSTMRegList(MCInst Inst,
+bool ARMAsmParser::validatetSTMRegList(const MCInst &Inst,
const OperandVector &Operands,
unsigned ListNo) {
const ARMOperand &Op = static_cast<const ARMOperand &>(*Operands[ListNo]);
@@ -8167,8 +8216,16 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
// If the destination and first source operand are the same, and
// there's no setting of the flags, use encoding T2 instead of T3.
// Note that this is only for ADD, not SUB. This mirrors the system
- // 'as' behaviour. Make sure the wide encoding wasn't explicit.
- if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() ||
+ // 'as' behaviour. Also take advantage of ADD being commutative.
+ // Make sure the wide encoding wasn't explicit.
+ bool Swap = false;
+ auto DestReg = Inst.getOperand(0).getReg();
+ bool Transform = DestReg == Inst.getOperand(1).getReg();
+ if (!Transform && DestReg == Inst.getOperand(2).getReg()) {
+ Transform = true;
+ Swap = true;
+ }
+ if (!Transform ||
Inst.getOperand(5).getReg() != 0 ||
(static_cast<ARMOperand &>(*Operands[3]).isToken() &&
static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w"))
@@ -8177,7 +8234,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
TmpInst.setOpcode(ARM::tADDhirr);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(0));
- TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(Swap ? 1 : 2));
TmpInst.addOperand(Inst.getOperand(3));
TmpInst.addOperand(Inst.getOperand(4));
Inst = TmpInst;
@@ -9176,8 +9233,7 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) {
return false;
}
- STI.InitMCProcessorInfo(CPU, "");
- STI.InitCPUSchedModel(CPU);
+ STI.setDefaultFeatures(CPU);
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
return false;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 4d12bfb5d60f..d17fdb95dbdf 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -1362,7 +1362,7 @@ MCTargetStreamer *createARMNullTargetStreamer(MCStreamer &S) {
MCTargetStreamer *createARMObjectTargetStreamer(MCStreamer &S,
const MCSubtargetInfo &STI) {
const Triple &TT = STI.getTargetTriple();
- if (TT.getObjectFormat() == Triple::ELF)
+ if (TT.isOSBinFormatELF())
return new ARMTargetELFStreamer(S);
return new ARMTargetStreamer(S);
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index fafe25ae5be5..21c9fc1e58b2 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -31,7 +31,7 @@ using namespace llvm;
#define GET_REGINFO_MC_DESC
#include "ARMGenRegisterInfo.inc"
-static bool getMCRDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
+static bool getMCRDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,
std::string &Info) {
if (STI.getFeatureBits()[llvm::ARM::HasV7Ops] &&
(MI.getOperand(0).isImm() && MI.getOperand(0).getImm() == 15) &&
@@ -63,7 +63,7 @@ static bool getMCRDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
return false;
}
-static bool getITDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
+static bool getITDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,
std::string &Info) {
if (STI.getFeatureBits()[llvm::ARM::HasV8Ops] && MI.getOperand(1).isImm() &&
MI.getOperand(1).getImm() != 8) {
@@ -75,7 +75,7 @@ static bool getITDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
return false;
}
-static bool getARMStoreDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
+static bool getARMStoreDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,
std::string &Info) {
assert(!STI.getFeatureBits()[llvm::ARM::ModeThumb] &&
"cannot predicate thumb instructions");
@@ -92,7 +92,7 @@ static bool getARMStoreDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
return false;
}
-static bool getARMLoadDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
+static bool getARMLoadDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,
std::string &Info) {
assert(!STI.getFeatureBits()[llvm::ARM::ModeThumb] &&
"cannot predicate thumb instructions");
@@ -257,9 +257,7 @@ MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(const Triple &TT,
ArchFS = FS;
}
- MCSubtargetInfo *X = new MCSubtargetInfo();
- InitARMMCSubtargetInfo(X, TT, CPU, ArchFS);
- return X;
+ return createARMMCSubtargetInfoImpl(TT, CPU, ArchFS);
}
static MCInstrInfo *createARMMCInstrInfo() {
@@ -268,7 +266,7 @@ static MCInstrInfo *createARMMCInstrInfo() {
return X;
}
-static MCRegisterInfo *createARMMCRegisterInfo(StringRef Triple) {
+static MCRegisterInfo *createARMMCRegisterInfo(const Triple &Triple) {
MCRegisterInfo *X = new MCRegisterInfo();
InitARMMCRegisterInfo(X, ARM::LR, 0, 0, ARM::PC);
return X;
@@ -279,10 +277,10 @@ static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI,
MCAsmInfo *MAI;
if (TheTriple.isOSDarwin() || TheTriple.isOSBinFormatMachO())
MAI = new ARMMCAsmInfoDarwin(TheTriple);
- else if (TheTriple.isWindowsItaniumEnvironment())
- MAI = new ARMCOFFMCAsmInfoGNU();
else if (TheTriple.isWindowsMSVCEnvironment())
MAI = new ARMCOFFMCAsmInfoMicrosoft();
+ else if (TheTriple.isOSWindows())
+ MAI = new ARMCOFFMCAsmInfoGNU();
else
MAI = new ARMELFMCAsmInfo(TheTriple);
@@ -292,14 +290,13 @@ static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI,
return MAI;
}
-static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+static MCCodeGenInfo *createARMMCCodeGenInfo(const Triple &TT, Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
if (RM == Reloc::Default) {
- Triple TheTriple(TT);
// Default relocation model on Darwin is PIC, not DynamicNoPIC.
- RM = TheTriple.isOSDarwin() ? Reloc::PIC_ : Reloc::DynamicNoPIC;
+ RM = TT.isOSDarwin() ? Reloc::PIC_ : Reloc::DynamicNoPIC;
}
X->initMCCodeGenInfo(RM, CM, OL);
return X;
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index 77cd890e4cad..3b4358b5d9bf 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -365,7 +365,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
// frame pointer stack slot, the target is ELF and the function has FP, or
// the target uses var sized objects.
if (NumBytes) {
- assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) &&
+ assert(!MFI->getPristineRegs(MF).test(ARM::R4) &&
"No scratch register to restore SP from FP!");
emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
TII, *RegInfo);