aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/ARM/ARMLoadStoreOptimizer.cpp')
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp265
1 files changed, 159 insertions, 106 deletions
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 6e7e47b8706a..62d57f3f4986 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -60,9 +60,14 @@ STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
-namespace llvm {
-void initializeARMLoadStoreOptPass(PassRegistry &);
-}
+/// This switch disables formation of double/multi instructions that could
+/// potentially lead to (new) alignment traps even with CCR.UNALIGN_TRP
+/// disabled. This can be used to create libraries that are robust even when
+/// users provoke undefined behaviour by supplying misaligned pointers.
+/// \see mayCombineMisaligned()
+static cl::opt<bool>
+AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden,
+ cl::init(false), cl::desc("Be more conservative in ARM load/store opt"));
#define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
@@ -71,9 +76,7 @@ namespace {
/// form ldm / stm instructions.
struct ARMLoadStoreOpt : public MachineFunctionPass {
static char ID;
- ARMLoadStoreOpt() : MachineFunctionPass(ID) {
- initializeARMLoadStoreOptPass(*PassRegistry::getPassRegistry());
- }
+ ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
const MachineFunction *MF;
const TargetInstrInfo *TII;
@@ -90,6 +93,11 @@ namespace {
bool runOnMachineFunction(MachineFunction &Fn) override;
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+ }
+
const char *getPassName() const override {
return ARM_LOAD_STORE_OPT_NAME;
}
@@ -101,8 +109,8 @@ namespace {
MachineInstr *MI;
int Offset; ///< Load/Store offset.
unsigned Position; ///< Position as counted from end of basic block.
- MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position)
- : MI(MI), Offset(Offset), Position(Position) {}
+ MemOpQueueEntry(MachineInstr &MI, int Offset, unsigned Position)
+ : MI(&MI), Offset(Offset), Position(Position) {}
};
typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
@@ -131,17 +139,19 @@ namespace {
MachineBasicBlock::const_iterator Before);
unsigned findFreeReg(const TargetRegisterClass &RegClass);
void UpdateBaseRegUses(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- DebugLoc DL, unsigned Base, unsigned WordOffset,
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
+ unsigned Base, unsigned WordOffset,
ARMCC::CondCodes Pred, unsigned PredReg);
- MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
- bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
- DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs);
- MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
- bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
- DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const;
+ MachineInstr *CreateLoadStoreMulti(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
+ int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
+ ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
+ ArrayRef<std::pair<unsigned, bool>> Regs);
+ MachineInstr *CreateLoadStoreDouble(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
+ int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
+ ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
+ ArrayRef<std::pair<unsigned, bool>> Regs) const;
void FormCandidates(const MemOpQueue &MemOps);
MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
@@ -156,10 +166,11 @@ namespace {
char ARMLoadStoreOpt::ID = 0;
}
-INITIALIZE_PASS(ARMLoadStoreOpt, "arm-load-store-opt", ARM_LOAD_STORE_OPT_NAME, false, false)
+INITIALIZE_PASS(ARMLoadStoreOpt, "arm-ldst-opt", ARM_LOAD_STORE_OPT_NAME, false,
+ false)
-static bool definesCPSR(const MachineInstr *MI) {
- for (const auto &MO : MI->operands()) {
+static bool definesCPSR(const MachineInstr &MI) {
+ for (const auto &MO : MI.operands()) {
if (!MO.isReg())
continue;
if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
@@ -171,11 +182,11 @@ static bool definesCPSR(const MachineInstr *MI) {
return false;
}
-static int getMemoryOpOffset(const MachineInstr *MI) {
- unsigned Opcode = MI->getOpcode();
+static int getMemoryOpOffset(const MachineInstr &MI) {
+ unsigned Opcode = MI.getOpcode();
bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
- unsigned NumOperands = MI->getDesc().getNumOperands();
- unsigned OffField = MI->getOperand(NumOperands-3).getImm();
+ unsigned NumOperands = MI.getDesc().getNumOperands();
+ unsigned OffField = MI.getOperand(NumOperands - 3).getImm();
if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
@@ -436,12 +447,12 @@ static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {
/// Update future uses of the base register with the offset introduced
/// due to writeback. This function only works on Thumb1.
-void
-ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- DebugLoc DL, unsigned Base,
- unsigned WordOffset,
- ARMCC::CondCodes Pred, unsigned PredReg) {
+void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, unsigned Base,
+ unsigned WordOffset,
+ ARMCC::CondCodes Pred,
+ unsigned PredReg) {
assert(isThumb1 && "Can only update base register uses for Thumb1!");
// Start updating any instructions with immediate offsets. Insert a SUB before
// the first non-updateable instruction (if any).
@@ -475,7 +486,7 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
InsertSub = true;
} else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
- !definesCPSR(MBBI)) {
+ !definesCPSR(*MBBI)) {
// SUBS/ADDS using this register, with a dead def of the CPSR.
// Merge it with the update; if the merged offset is too large,
// insert a new sub instead.
@@ -499,7 +510,7 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
InsertSub = true;
}
- } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
+ } else if (definesCPSR(*MBBI) || MBBI->isCall() || MBBI->isBranch()) {
// Since SUBS sets the condition flags, we can't place the base reset
// after an instruction that has a live CPSR def.
// The base register might also contain an argument for a function call.
@@ -552,7 +563,7 @@ void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
// Initialize if we never queried in this block.
if (!LiveRegsValid) {
LiveRegs.init(TRI);
- LiveRegs.addLiveOuts(&MBB, true);
+ LiveRegs.addLiveOuts(MBB);
LiveRegPos = MBB.end();
LiveRegsValid = true;
}
@@ -574,10 +585,11 @@ static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
/// Create and insert a LDM or STM with Base as base register and registers in
/// Regs as the register operands that would be loaded / stored. It returns
/// true if the transformation is done.
-MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
- bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
- DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) {
+MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
+ int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
+ ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
+ ArrayRef<std::pair<unsigned, bool>> Regs) {
unsigned NumRegs = Regs.size();
assert(NumRegs > 1);
@@ -770,10 +782,11 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB,
return MIB.getInstr();
}
-MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
- bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
- DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const {
+MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
+ int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
+ ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
+ ArrayRef<std::pair<unsigned, bool>> Regs) const {
bool IsLoad = isi32Load(Opcode);
assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
@@ -836,11 +849,11 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
iterator InsertBefore = std::next(iterator(LatestMI));
MachineBasicBlock &MBB = *LatestMI->getParent();
- unsigned Offset = getMemoryOpOffset(First);
+ unsigned Offset = getMemoryOpOffset(*First);
unsigned Base = getLoadStoreBaseOp(*First).getReg();
bool BaseKill = LatestMI->killsRegister(Base);
unsigned PredReg = 0;
- ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg);
+ ARMCC::CondCodes Pred = getInstrPredicate(*First, PredReg);
DebugLoc DL = First->getDebugLoc();
MachineInstr *Merged = nullptr;
if (Cand.CanMergeToLSDouble)
@@ -916,6 +929,24 @@ static bool isValidLSDoubleOffset(int Offset) {
return (Value % 4) == 0 && Value < 1024;
}
+/// Return true for loads/stores that can be combined to a double/multi
+/// operation without increasing the requirements for alignment.
+static bool mayCombineMisaligned(const TargetSubtargetInfo &STI,
+ const MachineInstr &MI) {
+ // vldr/vstr trap on misaligned pointers anyway, forming vldm makes no
+ // difference.
+ unsigned Opcode = MI.getOpcode();
+ if (!isi32Load(Opcode) && !isi32Store(Opcode))
+ return true;
+
+ // Stack pointer alignment is out of the programmers control so we can trust
+ // SP-relative loads/stores.
+ if (getLoadStoreBaseOp(MI).getReg() == ARM::SP &&
+ STI.getFrameLowering()->getTransientStackAlignment() >= 4)
+ return true;
+ return false;
+}
+
/// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
const MachineInstr *FirstMI = MemOps[0].MI;
@@ -946,7 +977,7 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
bool CanMergeToLSMulti = true;
// On swift vldm/vstm starting with an odd register number as that needs
// more uops than single vldrs.
- if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1)
+ if (STI->hasSlowOddRegister() && !isNotVFP && (PRegNum % 2) == 1)
CanMergeToLSMulti = false;
// LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
@@ -954,6 +985,10 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
if (PReg == ARM::SP || PReg == ARM::PC)
CanMergeToLSMulti = CanMergeToLSDouble = false;
+ // Should we be conservative?
+ if (AssumeMisalignedLoadStores && !mayCombineMisaligned(*STI, *MI))
+ CanMergeToLSMulti = CanMergeToLSDouble = false;
+
// Merge following instructions where possible.
for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
int NewOffset = MemOps[I].Offset;
@@ -1102,11 +1137,11 @@ static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,
unsigned MIPredReg;
if (MI.getOperand(0).getReg() != Reg ||
MI.getOperand(1).getReg() != Reg ||
- getInstrPredicate(&MI, MIPredReg) != Pred ||
+ getInstrPredicate(MI, MIPredReg) != Pred ||
MIPredReg != PredReg)
return 0;
- if (CheckCPSRDef && definesCPSR(&MI))
+ if (CheckCPSRDef && definesCPSR(MI))
return 0;
return MI.getOperand(2).getImm() * Scale;
}
@@ -1169,7 +1204,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
unsigned Base = BaseOP.getReg();
bool BaseKill = BaseOP.isKill();
unsigned PredReg = 0;
- ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+ ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
unsigned Opcode = MI->getOpcode();
DebugLoc DL = MI->getDebugLoc();
@@ -1193,10 +1228,30 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
} else {
MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
- ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes))
- return false;
+ ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) {
+
+ // We couldn't find an inc/dec to merge. But if the base is dead, we
+ // can still change to a writeback form as that will save us 2 bytes
+ // of code size. It can create WAW hazards though, so only do it if
+ // we're minimizing code size.
+ if (!MBB.getParent()->getFunction()->optForMinSize() || !BaseKill)
+ return false;
+
+ bool HighRegsUsed = false;
+ for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
+ if (MI->getOperand(i).getReg() >= ARM::R8) {
+ HighRegsUsed = true;
+ break;
+ }
+
+ if (!HighRegsUsed)
+ MergeInstr = MBB.end();
+ else
+ return false;
+ }
}
- MBB.erase(MergeInstr);
+ if (MergeInstr != MBB.end())
+ MBB.erase(MergeInstr);
unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
@@ -1291,7 +1346,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
return false;
unsigned PredReg = 0;
- ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+ ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
int Bytes = getLSMultipleTransferSize(MI);
MachineBasicBlock &MBB = *MI->getParent();
MachineBasicBlock::iterator MBBI(MI);
@@ -1388,7 +1443,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
return false;
unsigned PredReg;
- ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg);
+ ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
MachineBasicBlock::iterator MBBI(MI);
MachineBasicBlock &MBB = *MI.getParent();
int Offset;
@@ -1487,14 +1542,13 @@ static bool isMemoryOp(const MachineInstr &MI) {
}
static void InsertLDR_STR(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- int Offset, bool isDef,
- DebugLoc DL, unsigned NewOpc,
+ MachineBasicBlock::iterator &MBBI, int Offset,
+ bool isDef, const DebugLoc &DL, unsigned NewOpc,
unsigned Reg, bool RegDeadKill, bool RegUndef,
unsigned BaseReg, bool BaseKill, bool BaseUndef,
- bool OffKill, bool OffUndef,
- ARMCC::CondCodes Pred, unsigned PredReg,
- const TargetInstrInfo *TII, bool isT2) {
+ bool OffKill, bool OffUndef, ARMCC::CondCodes Pred,
+ unsigned PredReg, const TargetInstrInfo *TII,
+ bool isT2) {
if (isDef) {
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
TII->get(NewOpc))
@@ -1547,9 +1601,9 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
bool BaseUndef = BaseOp.isUndef();
bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
- int OffImm = getMemoryOpOffset(MI);
+ int OffImm = getMemoryOpOffset(*MI);
unsigned PredReg = 0;
- ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+ ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
if (OddRegNum > EvenRegNum && OffImm == 0) {
// Ascending register numbers and no offset. It's safe to change it to a
@@ -1655,14 +1709,14 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
unsigned Reg = MO.getReg();
unsigned Base = getLoadStoreBaseOp(*MBBI).getReg();
unsigned PredReg = 0;
- ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
- int Offset = getMemoryOpOffset(MBBI);
+ ARMCC::CondCodes Pred = getInstrPredicate(*MBBI, PredReg);
+ int Offset = getMemoryOpOffset(*MBBI);
if (CurrBase == 0) {
// Start of a new chain.
CurrBase = Base;
CurrOpc = Opcode;
CurrPred = Pred;
- MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
+ MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
continue;
}
// Note: No need to match PredReg in the next if.
@@ -1690,7 +1744,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
if (!Overlap) {
// Check offset and sort memory operation into the current chain.
if (Offset > MemOps.back().Offset) {
- MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
+ MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
continue;
} else {
MemOpQueue::iterator MI, ME;
@@ -1706,7 +1760,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
}
}
if (MI != MemOps.end()) {
- MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position));
+ MemOps.insert(MI, MemOpQueueEntry(*MBBI, Offset, Position));
continue;
}
}
@@ -1723,7 +1777,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
MBBI->getOpcode() == ARM::t2STRDi8) {
// ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
// remember them because we may still be able to merge add/sub into them.
- MergeBaseCandidates.push_back(MBBI);
+ MergeBaseCandidates.push_back(&*MBBI);
}
@@ -1805,20 +1859,20 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
// Ignore any DBG_VALUE instructions.
while (PrevI->isDebugValue() && PrevI != MBB.begin())
--PrevI;
- MachineInstr *PrevMI = PrevI;
- unsigned Opcode = PrevMI->getOpcode();
+ MachineInstr &PrevMI = *PrevI;
+ unsigned Opcode = PrevMI.getOpcode();
if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
- MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
+ MachineOperand &MO = PrevMI.getOperand(PrevMI.getNumOperands() - 1);
if (MO.getReg() != ARM::LR)
return false;
unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
- PrevMI->setDesc(TII->get(NewOpc));
+ PrevMI.setDesc(TII->get(NewOpc));
MO.setReg(ARM::PC);
- PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
+ PrevMI.copyImplicitOps(*MBB.getParent(), *MBBI);
MBB.erase(MBBI);
return true;
}
@@ -1840,8 +1894,8 @@ bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
for (auto Use : Prev->uses())
if (Use.isKill()) {
AddDefaultPred(BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
- .addReg(Use.getReg(), RegState::Kill))
- .copyImplicitOps(&*MBBI);
+ .addReg(Use.getReg(), RegState::Kill))
+ .copyImplicitOps(*MBBI);
MBB.erase(MBBI);
MBB.erase(Prev);
return true;
@@ -1851,6 +1905,9 @@ bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
}
bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+ if (skipFunction(*Fn.getFunction()))
+ return false;
+
MF = &Fn;
STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
TL = STI->getTargetLowering();
@@ -1877,10 +1934,6 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
return Modified;
}
-namespace llvm {
-void initializeARMPreAllocLoadStoreOptPass(PassRegistry &);
-}
-
#define ARM_PREALLOC_LOAD_STORE_OPT_NAME \
"ARM pre- register allocation load / store optimization pass"
@@ -1889,9 +1942,7 @@ namespace {
/// locations close to make it more likely they will be combined later.
struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
static char ID;
- ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {
- initializeARMPreAllocLoadStoreOptPass(*PassRegistry::getPassRegistry());
- }
+ ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
const DataLayout *TD;
const TargetInstrInfo *TII;
@@ -1922,10 +1973,13 @@ namespace {
char ARMPreAllocLoadStoreOpt::ID = 0;
}
-INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-load-store-opt",
+INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-ldst-opt",
ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false)
bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+ if (AssumeMisalignedLoadStores || skipFunction(*Fn.getFunction()))
+ return false;
+
TD = &Fn.getDataLayout();
STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
TII = STI->getInstrInfo();
@@ -2034,7 +2088,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
return false;
// Then make sure the immediate offset fits.
- int OffImm = getMemoryOpOffset(Op0);
+ int OffImm = getMemoryOpOffset(*Op0);
if (isT2) {
int Limit = (1 << 8) * Scale;
if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
@@ -2056,7 +2110,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
if (FirstReg == SecondReg)
return false;
BaseReg = Op0->getOperand(1).getReg();
- Pred = getInstrPredicate(Op0, PredReg);
+ Pred = getInstrPredicate(*Op0, PredReg);
dl = Op0->getDebugLoc();
return true;
}
@@ -2070,11 +2124,11 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
// Sort by offset (in reverse order).
std::sort(Ops.begin(), Ops.end(),
[](const MachineInstr *LHS, const MachineInstr *RHS) {
- int LOffset = getMemoryOpOffset(LHS);
- int ROffset = getMemoryOpOffset(RHS);
- assert(LHS == RHS || LOffset != ROffset);
- return LOffset > ROffset;
- });
+ int LOffset = getMemoryOpOffset(*LHS);
+ int ROffset = getMemoryOpOffset(*RHS);
+ assert(LHS == RHS || LOffset != ROffset);
+ return LOffset > ROffset;
+ });
// The loads / stores of the same base are in order. Scan them from first to
// last and check for the following:
@@ -2106,7 +2160,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
if (LastOpcode && LSMOpcode != LastOpcode)
break;
- int Offset = getMemoryOpOffset(Op);
+ int Offset = getMemoryOpOffset(*Op);
unsigned Bytes = getLSMultipleTransferSize(Op);
if (LastBytes) {
if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
@@ -2141,8 +2195,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
} else {
// This is the new location for the loads / stores.
MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
- while (InsertPos != MBB->end()
- && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
+ while (InsertPos != MBB->end() &&
+ (MemOps.count(&*InsertPos) || InsertPos->isDebugValue()))
++InsertPos;
// If we are moving a pair of loads / stores, see if it makes sense
@@ -2237,25 +2291,25 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
MachineBasicBlock::iterator E = MBB->end();
while (MBBI != E) {
for (; MBBI != E; ++MBBI) {
- MachineInstr *MI = MBBI;
- if (MI->isCall() || MI->isTerminator()) {
+ MachineInstr &MI = *MBBI;
+ if (MI.isCall() || MI.isTerminator()) {
// Stop at barriers.
++MBBI;
break;
}
- if (!MI->isDebugValue())
- MI2LocMap[MI] = ++Loc;
+ if (!MI.isDebugValue())
+ MI2LocMap[&MI] = ++Loc;
- if (!isMemoryOp(*MI))
+ if (!isMemoryOp(MI))
continue;
unsigned PredReg = 0;
if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
continue;
- int Opc = MI->getOpcode();
+ int Opc = MI.getOpcode();
bool isLd = isLoadSingle(Opc);
- unsigned Base = MI->getOperand(1).getReg();
+ unsigned Base = MI.getOperand(1).getReg();
int Offset = getMemoryOpOffset(MI);
bool StopHere = false;
@@ -2264,15 +2318,15 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
Base2LdsMap.find(Base);
if (BI != Base2LdsMap.end()) {
for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
- if (Offset == getMemoryOpOffset(BI->second[i])) {
+ if (Offset == getMemoryOpOffset(*BI->second[i])) {
StopHere = true;
break;
}
}
if (!StopHere)
- BI->second.push_back(MI);
+ BI->second.push_back(&MI);
} else {
- Base2LdsMap[Base].push_back(MI);
+ Base2LdsMap[Base].push_back(&MI);
LdBases.push_back(Base);
}
} else {
@@ -2280,15 +2334,15 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
Base2StsMap.find(Base);
if (BI != Base2StsMap.end()) {
for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
- if (Offset == getMemoryOpOffset(BI->second[i])) {
+ if (Offset == getMemoryOpOffset(*BI->second[i])) {
StopHere = true;
break;
}
}
if (!StopHere)
- BI->second.push_back(MI);
+ BI->second.push_back(&MI);
} else {
- Base2StsMap[Base].push_back(MI);
+ Base2StsMap[Base].push_back(&MI);
StBases.push_back(Base);
}
}
@@ -2335,4 +2389,3 @@ FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
return new ARMPreAllocLoadStoreOpt();
return new ARMLoadStoreOpt();
}
-