diff options
Diffstat (limited to 'lib/Target/ARM/ARMLoadStoreOptimizer.cpp')
-rw-r--r-- | lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 265 |
1 files changed, 159 insertions, 106 deletions
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 6e7e47b8706a..62d57f3f4986 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -60,9 +60,14 @@ STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); -namespace llvm { -void initializeARMLoadStoreOptPass(PassRegistry &); -} +/// This switch disables formation of double/multi instructions that could +/// potentially lead to (new) alignment traps even with CCR.UNALIGN_TRP +/// disabled. This can be used to create libraries that are robust even when +/// users provoke undefined behaviour by supplying misaligned pointers. +/// \see mayCombineMisaligned() +static cl::opt<bool> +AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden, + cl::init(false), cl::desc("Be more conservative in ARM load/store opt")); #define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass" @@ -71,9 +76,7 @@ namespace { /// form ldm / stm instructions. struct ARMLoadStoreOpt : public MachineFunctionPass { static char ID; - ARMLoadStoreOpt() : MachineFunctionPass(ID) { - initializeARMLoadStoreOptPass(*PassRegistry::getPassRegistry()); - } + ARMLoadStoreOpt() : MachineFunctionPass(ID) {} const MachineFunction *MF; const TargetInstrInfo *TII; @@ -90,6 +93,11 @@ namespace { bool runOnMachineFunction(MachineFunction &Fn) override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::AllVRegsAllocated); + } + const char *getPassName() const override { return ARM_LOAD_STORE_OPT_NAME; } @@ -101,8 +109,8 @@ namespace { MachineInstr *MI; int Offset; ///< Load/Store offset. unsigned Position; ///< Position as counted from end of basic block. - MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position) - : MI(MI), Offset(Offset), Position(Position) {} + MemOpQueueEntry(MachineInstr &MI, int Offset, unsigned Position) + : MI(&MI), Offset(Offset), Position(Position) {} }; typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; @@ -131,17 +139,19 @@ namespace { MachineBasicBlock::const_iterator Before); unsigned findFreeReg(const TargetRegisterClass &RegClass); void UpdateBaseRegUses(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - DebugLoc DL, unsigned Base, unsigned WordOffset, + MachineBasicBlock::iterator MBBI, const DebugLoc &DL, + unsigned Base, unsigned WordOffset, ARMCC::CondCodes Pred, unsigned PredReg); - MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB, - MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, - bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, - DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs); - MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB, - MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, - bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, - DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const; + MachineInstr *CreateLoadStoreMulti( + MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, + int Offset, unsigned Base, bool BaseKill, unsigned Opcode, + ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL, + ArrayRef<std::pair<unsigned, bool>> Regs); + MachineInstr *CreateLoadStoreDouble( + MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, + int Offset, unsigned Base, bool BaseKill, unsigned Opcode, + ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL, + ArrayRef<std::pair<unsigned, bool>> Regs) const; void FormCandidates(const MemOpQueue &MemOps); MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand); bool FixInvalidRegPairOp(MachineBasicBlock &MBB, @@ -156,10 +166,11 @@ namespace { char ARMLoadStoreOpt::ID = 0; } -INITIALIZE_PASS(ARMLoadStoreOpt, "arm-load-store-opt", ARM_LOAD_STORE_OPT_NAME, false, false) +INITIALIZE_PASS(ARMLoadStoreOpt, "arm-ldst-opt", ARM_LOAD_STORE_OPT_NAME, false, + false) -static bool definesCPSR(const MachineInstr *MI) { - for (const auto &MO : MI->operands()) { +static bool definesCPSR(const MachineInstr &MI) { + for (const auto &MO : MI.operands()) { if (!MO.isReg()) continue; if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) @@ -171,11 +182,11 @@ static bool definesCPSR(const MachineInstr *MI) { return false; } -static int getMemoryOpOffset(const MachineInstr *MI) { - unsigned Opcode = MI->getOpcode(); +static int getMemoryOpOffset(const MachineInstr &MI) { + unsigned Opcode = MI.getOpcode(); bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; - unsigned NumOperands = MI->getDesc().getNumOperands(); - unsigned OffField = MI->getOperand(NumOperands-3).getImm(); + unsigned NumOperands = MI.getDesc().getNumOperands(); + unsigned OffField = MI.getOperand(NumOperands - 3).getImm(); if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || @@ -436,12 +447,12 @@ static unsigned getLSMultipleTransferSize(const MachineInstr *MI) { /// Update future uses of the base register with the offset introduced /// due to writeback. This function only works on Thumb1. -void -ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - DebugLoc DL, unsigned Base, - unsigned WordOffset, - ARMCC::CondCodes Pred, unsigned PredReg) { +void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, unsigned Base, + unsigned WordOffset, + ARMCC::CondCodes Pred, + unsigned PredReg) { assert(isThumb1 && "Can only update base register uses for Thumb1!"); // Start updating any instructions with immediate offsets. Insert a SUB before // the first non-updateable instruction (if any). @@ -475,7 +486,7 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, InsertSub = true; } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) && - !definesCPSR(MBBI)) { + !definesCPSR(*MBBI)) { // SUBS/ADDS using this register, with a dead def of the CPSR. // Merge it with the update; if the merged offset is too large, // insert a new sub instead. @@ -499,7 +510,7 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, InsertSub = true; } - } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) { + } else if (definesCPSR(*MBBI) || MBBI->isCall() || MBBI->isBranch()) { // Since SUBS sets the condition flags, we can't place the base reset // after an instruction that has a live CPSR def. // The base register might also contain an argument for a function call. @@ -552,7 +563,7 @@ void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB, // Initialize if we never queried in this block. if (!LiveRegsValid) { LiveRegs.init(TRI); - LiveRegs.addLiveOuts(&MBB, true); + LiveRegs.addLiveOuts(MBB); LiveRegPos = MBB.end(); LiveRegsValid = true; } @@ -574,10 +585,11 @@ static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs, /// Create and insert a LDM or STM with Base as base register and registers in /// Regs as the register operands that would be loaded / stored. It returns /// true if the transformation is done. -MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB, - MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, - bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, - DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) { +MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti( + MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, + int Offset, unsigned Base, bool BaseKill, unsigned Opcode, + ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL, + ArrayRef<std::pair<unsigned, bool>> Regs) { unsigned NumRegs = Regs.size(); assert(NumRegs > 1); @@ -770,10 +782,11 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB, return MIB.getInstr(); } -MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB, - MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, - bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, - DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const { +MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble( + MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, + int Offset, unsigned Base, bool BaseKill, unsigned Opcode, + ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL, + ArrayRef<std::pair<unsigned, bool>> Regs) const { bool IsLoad = isi32Load(Opcode); assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store"); unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8; @@ -836,11 +849,11 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) { MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx]; iterator InsertBefore = std::next(iterator(LatestMI)); MachineBasicBlock &MBB = *LatestMI->getParent(); - unsigned Offset = getMemoryOpOffset(First); + unsigned Offset = getMemoryOpOffset(*First); unsigned Base = getLoadStoreBaseOp(*First).getReg(); bool BaseKill = LatestMI->killsRegister(Base); unsigned PredReg = 0; - ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(*First, PredReg); DebugLoc DL = First->getDebugLoc(); MachineInstr *Merged = nullptr; if (Cand.CanMergeToLSDouble) @@ -916,6 +929,24 @@ static bool isValidLSDoubleOffset(int Offset) { return (Value % 4) == 0 && Value < 1024; } +/// Return true for loads/stores that can be combined to a double/multi +/// operation without increasing the requirements for alignment. +static bool mayCombineMisaligned(const TargetSubtargetInfo &STI, + const MachineInstr &MI) { + // vldr/vstr trap on misaligned pointers anyway, forming vldm makes no + // difference. + unsigned Opcode = MI.getOpcode(); + if (!isi32Load(Opcode) && !isi32Store(Opcode)) + return true; + + // Stack pointer alignment is out of the programmers control so we can trust + // SP-relative loads/stores. + if (getLoadStoreBaseOp(MI).getReg() == ARM::SP && + STI.getFrameLowering()->getTransientStackAlignment() >= 4) + return true; + return false; +} + /// Find candidates for load/store multiple merge in list of MemOpQueueEntries. void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) { const MachineInstr *FirstMI = MemOps[0].MI; @@ -946,7 +977,7 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) { bool CanMergeToLSMulti = true; // On swift vldm/vstm starting with an odd register number as that needs // more uops than single vldrs. - if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1) + if (STI->hasSlowOddRegister() && !isNotVFP && (PRegNum % 2) == 1) CanMergeToLSMulti = false; // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it @@ -954,6 +985,10 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) { if (PReg == ARM::SP || PReg == ARM::PC) CanMergeToLSMulti = CanMergeToLSDouble = false; + // Should we be conservative? + if (AssumeMisalignedLoadStores && !mayCombineMisaligned(*STI, *MI)) + CanMergeToLSMulti = CanMergeToLSDouble = false; + // Merge following instructions where possible. for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) { int NewOffset = MemOps[I].Offset; @@ -1102,11 +1137,11 @@ static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg, unsigned MIPredReg; if (MI.getOperand(0).getReg() != Reg || MI.getOperand(1).getReg() != Reg || - getInstrPredicate(&MI, MIPredReg) != Pred || + getInstrPredicate(MI, MIPredReg) != Pred || MIPredReg != PredReg) return 0; - if (CheckCPSRDef && definesCPSR(&MI)) + if (CheckCPSRDef && definesCPSR(MI)) return 0; return MI.getOperand(2).getImm() * Scale; } @@ -1169,7 +1204,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { unsigned Base = BaseOP.getReg(); bool BaseKill = BaseOP.isKill(); unsigned PredReg = 0; - ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg); unsigned Opcode = MI->getOpcode(); DebugLoc DL = MI->getDebugLoc(); @@ -1193,10 +1228,30 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { } else { MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) && - ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) - return false; + ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) { + + // We couldn't find an inc/dec to merge. But if the base is dead, we + // can still change to a writeback form as that will save us 2 bytes + // of code size. It can create WAW hazards though, so only do it if + // we're minimizing code size. + if (!MBB.getParent()->getFunction()->optForMinSize() || !BaseKill) + return false; + + bool HighRegsUsed = false; + for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) + if (MI->getOperand(i).getReg() >= ARM::R8) { + HighRegsUsed = true; + break; + } + + if (!HighRegsUsed) + MergeInstr = MBB.end(); + else + return false; + } } - MBB.erase(MergeInstr); + if (MergeInstr != MBB.end()) + MBB.erase(MergeInstr); unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) @@ -1291,7 +1346,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { return false; unsigned PredReg = 0; - ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg); int Bytes = getLSMultipleTransferSize(MI); MachineBasicBlock &MBB = *MI->getParent(); MachineBasicBlock::iterator MBBI(MI); @@ -1388,7 +1443,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const { return false; unsigned PredReg; - ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); MachineBasicBlock::iterator MBBI(MI); MachineBasicBlock &MBB = *MI.getParent(); int Offset; @@ -1487,14 +1542,13 @@ static bool isMemoryOp(const MachineInstr &MI) { } static void InsertLDR_STR(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - int Offset, bool isDef, - DebugLoc DL, unsigned NewOpc, + MachineBasicBlock::iterator &MBBI, int Offset, + bool isDef, const DebugLoc &DL, unsigned NewOpc, unsigned Reg, bool RegDeadKill, bool RegUndef, unsigned BaseReg, bool BaseKill, bool BaseUndef, - bool OffKill, bool OffUndef, - ARMCC::CondCodes Pred, unsigned PredReg, - const TargetInstrInfo *TII, bool isT2) { + bool OffKill, bool OffUndef, ARMCC::CondCodes Pred, + unsigned PredReg, const TargetInstrInfo *TII, + bool isT2) { if (isDef) { MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) @@ -1547,9 +1601,9 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, bool BaseUndef = BaseOp.isUndef(); bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); - int OffImm = getMemoryOpOffset(MI); + int OffImm = getMemoryOpOffset(*MI); unsigned PredReg = 0; - ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg); if (OddRegNum > EvenRegNum && OffImm == 0) { // Ascending register numbers and no offset. It's safe to change it to a @@ -1655,14 +1709,14 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { unsigned Reg = MO.getReg(); unsigned Base = getLoadStoreBaseOp(*MBBI).getReg(); unsigned PredReg = 0; - ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); - int Offset = getMemoryOpOffset(MBBI); + ARMCC::CondCodes Pred = getInstrPredicate(*MBBI, PredReg); + int Offset = getMemoryOpOffset(*MBBI); if (CurrBase == 0) { // Start of a new chain. CurrBase = Base; CurrOpc = Opcode; CurrPred = Pred; - MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); + MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position)); continue; } // Note: No need to match PredReg in the next if. @@ -1690,7 +1744,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { if (!Overlap) { // Check offset and sort memory operation into the current chain. if (Offset > MemOps.back().Offset) { - MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); + MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position)); continue; } else { MemOpQueue::iterator MI, ME; @@ -1706,7 +1760,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { } } if (MI != MemOps.end()) { - MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position)); + MemOps.insert(MI, MemOpQueueEntry(*MBBI, Offset, Position)); continue; } } @@ -1723,7 +1777,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { MBBI->getOpcode() == ARM::t2STRDi8) { // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions // remember them because we may still be able to merge add/sub into them. - MergeBaseCandidates.push_back(MBBI); + MergeBaseCandidates.push_back(&*MBBI); } @@ -1805,20 +1859,20 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { // Ignore any DBG_VALUE instructions. while (PrevI->isDebugValue() && PrevI != MBB.begin()) --PrevI; - MachineInstr *PrevMI = PrevI; - unsigned Opcode = PrevMI->getOpcode(); + MachineInstr &PrevMI = *PrevI; + unsigned Opcode = PrevMI.getOpcode(); if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { - MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); + MachineOperand &MO = PrevMI.getOperand(PrevMI.getNumOperands() - 1); if (MO.getReg() != ARM::LR) return false; unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); - PrevMI->setDesc(TII->get(NewOpc)); + PrevMI.setDesc(TII->get(NewOpc)); MO.setReg(ARM::PC); - PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI); + PrevMI.copyImplicitOps(*MBB.getParent(), *MBBI); MBB.erase(MBBI); return true; } @@ -1840,8 +1894,8 @@ bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) { for (auto Use : Prev->uses()) if (Use.isKill()) { AddDefaultPred(BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX)) - .addReg(Use.getReg(), RegState::Kill)) - .copyImplicitOps(&*MBBI); + .addReg(Use.getReg(), RegState::Kill)) + .copyImplicitOps(*MBBI); MBB.erase(MBBI); MBB.erase(Prev); return true; @@ -1851,6 +1905,9 @@ bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) { } bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { + if (skipFunction(*Fn.getFunction())) + return false; + MF = &Fn; STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); TL = STI->getTargetLowering(); @@ -1877,10 +1934,6 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { return Modified; } -namespace llvm { -void initializeARMPreAllocLoadStoreOptPass(PassRegistry &); -} - #define ARM_PREALLOC_LOAD_STORE_OPT_NAME \ "ARM pre- register allocation load / store optimization pass" @@ -1889,9 +1942,7 @@ namespace { /// locations close to make it more likely they will be combined later. struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ static char ID; - ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) { - initializeARMPreAllocLoadStoreOptPass(*PassRegistry::getPassRegistry()); - } + ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} const DataLayout *TD; const TargetInstrInfo *TII; @@ -1922,10 +1973,13 @@ namespace { char ARMPreAllocLoadStoreOpt::ID = 0; } -INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-load-store-opt", +INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-ldst-opt", ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false) bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { + if (AssumeMisalignedLoadStores || skipFunction(*Fn.getFunction())) + return false; + TD = &Fn.getDataLayout(); STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); TII = STI->getInstrInfo(); @@ -2034,7 +2088,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, return false; // Then make sure the immediate offset fits. - int OffImm = getMemoryOpOffset(Op0); + int OffImm = getMemoryOpOffset(*Op0); if (isT2) { int Limit = (1 << 8) * Scale; if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) @@ -2056,7 +2110,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, if (FirstReg == SecondReg) return false; BaseReg = Op0->getOperand(1).getReg(); - Pred = getInstrPredicate(Op0, PredReg); + Pred = getInstrPredicate(*Op0, PredReg); dl = Op0->getDebugLoc(); return true; } @@ -2070,11 +2124,11 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, // Sort by offset (in reverse order). std::sort(Ops.begin(), Ops.end(), [](const MachineInstr *LHS, const MachineInstr *RHS) { - int LOffset = getMemoryOpOffset(LHS); - int ROffset = getMemoryOpOffset(RHS); - assert(LHS == RHS || LOffset != ROffset); - return LOffset > ROffset; - }); + int LOffset = getMemoryOpOffset(*LHS); + int ROffset = getMemoryOpOffset(*RHS); + assert(LHS == RHS || LOffset != ROffset); + return LOffset > ROffset; + }); // The loads / stores of the same base are in order. Scan them from first to // last and check for the following: @@ -2106,7 +2160,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, if (LastOpcode && LSMOpcode != LastOpcode) break; - int Offset = getMemoryOpOffset(Op); + int Offset = getMemoryOpOffset(*Op); unsigned Bytes = getLSMultipleTransferSize(Op); if (LastBytes) { if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) @@ -2141,8 +2195,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, } else { // This is the new location for the loads / stores. MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; - while (InsertPos != MBB->end() - && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) + while (InsertPos != MBB->end() && + (MemOps.count(&*InsertPos) || InsertPos->isDebugValue())) ++InsertPos; // If we are moving a pair of loads / stores, see if it makes sense @@ -2237,25 +2291,25 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { MachineBasicBlock::iterator E = MBB->end(); while (MBBI != E) { for (; MBBI != E; ++MBBI) { - MachineInstr *MI = MBBI; - if (MI->isCall() || MI->isTerminator()) { + MachineInstr &MI = *MBBI; + if (MI.isCall() || MI.isTerminator()) { // Stop at barriers. ++MBBI; break; } - if (!MI->isDebugValue()) - MI2LocMap[MI] = ++Loc; + if (!MI.isDebugValue()) + MI2LocMap[&MI] = ++Loc; - if (!isMemoryOp(*MI)) + if (!isMemoryOp(MI)) continue; unsigned PredReg = 0; if (getInstrPredicate(MI, PredReg) != ARMCC::AL) continue; - int Opc = MI->getOpcode(); + int Opc = MI.getOpcode(); bool isLd = isLoadSingle(Opc); - unsigned Base = MI->getOperand(1).getReg(); + unsigned Base = MI.getOperand(1).getReg(); int Offset = getMemoryOpOffset(MI); bool StopHere = false; @@ -2264,15 +2318,15 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { Base2LdsMap.find(Base); if (BI != Base2LdsMap.end()) { for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { - if (Offset == getMemoryOpOffset(BI->second[i])) { + if (Offset == getMemoryOpOffset(*BI->second[i])) { StopHere = true; break; } } if (!StopHere) - BI->second.push_back(MI); + BI->second.push_back(&MI); } else { - Base2LdsMap[Base].push_back(MI); + Base2LdsMap[Base].push_back(&MI); LdBases.push_back(Base); } } else { @@ -2280,15 +2334,15 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { Base2StsMap.find(Base); if (BI != Base2StsMap.end()) { for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { - if (Offset == getMemoryOpOffset(BI->second[i])) { + if (Offset == getMemoryOpOffset(*BI->second[i])) { StopHere = true; break; } } if (!StopHere) - BI->second.push_back(MI); + BI->second.push_back(&MI); } else { - Base2StsMap[Base].push_back(MI); + Base2StsMap[Base].push_back(&MI); StBases.push_back(Base); } } @@ -2335,4 +2389,3 @@ FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { return new ARMPreAllocLoadStoreOpt(); return new ARMLoadStoreOpt(); } - |