diff options
Diffstat (limited to 'lib/Target/ARM/ARMConstantIslandPass.cpp')
| -rw-r--r-- | lib/Target/ARM/ARMConstantIslandPass.cpp | 289 | 
1 files changed, 203 insertions, 86 deletions
| diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 60e5d7bf6098..24ca25f73e96 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -26,8 +26,10 @@  #include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/Statistic.h"  #include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/LivePhysRegs.h"  #include "llvm/CodeGen/MachineBasicBlock.h"  #include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineDominators.h"  #include "llvm/CodeGen/MachineFunction.h"  #include "llvm/CodeGen/MachineFunctionPass.h"  #include "llvm/CodeGen/MachineInstr.h" @@ -69,6 +71,7 @@ STATISTIC(NumT2BrShrunk, "Number of Thumb2 immediate branches shrunk");  STATISTIC(NumCBZ,        "Number of CBZ / CBNZ formed");  STATISTIC(NumJTMoved,    "Number of jump table destination blocks moved");  STATISTIC(NumJTInserted, "Number of jump table intermediate blocks inserted"); +STATISTIC(NumLEInserted, "Number of LE backwards branches inserted");  static cl::opt<bool>  AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true), @@ -212,6 +215,7 @@ namespace {      const ARMBaseInstrInfo *TII;      const ARMSubtarget *STI;      ARMFunctionInfo *AFI; +    MachineDominatorTree *DT = nullptr;      bool isThumb;      bool isThumb1;      bool isThumb2; @@ -224,6 +228,11 @@ namespace {      bool runOnMachineFunction(MachineFunction &MF) override; +    void getAnalysisUsage(AnalysisUsage &AU) const override { +      AU.addRequired<MachineDominatorTree>(); +      MachineFunctionPass::getAnalysisUsage(AU); +    } +      MachineFunctionProperties getRequiredProperties() const override {        return MachineFunctionProperties().set(            MachineFunctionProperties::Property::NoVRegs); @@ -238,7 +247,7 @@ namespace {      void doInitialJumpTablePlacement(std::vector<MachineInstr *> &CPEMIs);      bool BBHasFallthrough(MachineBasicBlock *MBB);      CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI); -    unsigned getCPELogAlign(const MachineInstr *CPEMI); +    Align getCPEAlign(const MachineInstr *CPEMI);      void scanFunctionJumpTables();      void initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs);      MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI); @@ -327,8 +336,7 @@ LLVM_DUMP_METHOD void ARMConstantIslands::dumpBBs() {        const BasicBlockInfo &BBI = BBInfo[J];        dbgs() << format("%08x %bb.%u\t", BBI.Offset, J)               << " kb=" << unsigned(BBI.KnownBits) -             << " ua=" << unsigned(BBI.Unalign) -             << " pa=" << unsigned(BBI.PostAlign) +             << " ua=" << unsigned(BBI.Unalign) << " pa=" << Log2(BBI.PostAlign)               << format(" size=%#x\n", BBInfo[J].Size);      }    }); @@ -349,6 +357,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {    isPositionIndependentOrROPI =        STI->getTargetLowering()->isPositionIndependent() || STI->isROPI();    AFI = MF->getInfo<ARMFunctionInfo>(); +  DT = &getAnalysis<MachineDominatorTree>();    isThumb = AFI->isThumbFunction();    isThumb1 = AFI->isThumb1OnlyFunction(); @@ -357,9 +366,6 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {    HasFarJump = false;    bool GenerateTBB = isThumb2 || (isThumb1 && SynthesizeThumb1TBB); -  // This pass invalidates liveness information when it splits basic blocks. -  MF->getRegInfo().invalidateLiveness(); -    // Renumber all of the machine basic blocks in the function, guaranteeing that    // the numbers agree with the position of the block in the function.    MF->RenumberBlocks(); @@ -398,7 +404,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {    // Functions with jump tables need an alignment of 4 because they use the ADR    // instruction, which aligns the PC to 4 bytes before adding an offset.    if (!T2JumpTables.empty()) -    MF->ensureAlignment(2); +    MF->ensureAlignment(Align(4));    /// Remove dead constant pool entries.    MadeChange |= removeUnusedCPEntries(); @@ -487,8 +493,9 @@ ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs)    MachineBasicBlock *BB = MF->CreateMachineBasicBlock();    MF->push_back(BB); -  // MachineConstantPool measures alignment in bytes. We measure in log2(bytes). -  unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment()); +  // MachineConstantPool measures alignment in bytes. +  const Align MaxAlign(MCP->getConstantPoolAlignment()); +  const unsigned MaxLogAlign = Log2(MaxAlign);    // Mark the basic block as required by the const-pool.    BB->setAlignment(MaxAlign); @@ -501,7 +508,8 @@ ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs)    // alignment of all entries as long as BB is sufficiently aligned.  Keep    // track of the insertion point for each alignment.  We are going to bucket    // sort the entries as they are created. -  SmallVector<MachineBasicBlock::iterator, 8> InsPoint(MaxAlign + 1, BB->end()); +  SmallVector<MachineBasicBlock::iterator, 8> InsPoint(MaxLogAlign + 1, +                                                       BB->end());    // Add all of the constants from the constant pool to the end block, use an    // identity mapping of CPI's to CPE's. @@ -526,7 +534,7 @@ ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs)      // Ensure that future entries with higher alignment get inserted before      // CPEMI. This is bucket sort with iterators. -    for (unsigned a = LogAlign + 1; a <= MaxAlign; ++a) +    for (unsigned a = LogAlign + 1; a <= MaxLogAlign; ++a)        if (InsPoint[a] == InsAt)          InsPoint[a] = CPEMI; @@ -640,29 +648,27 @@ ARMConstantIslands::findConstPoolEntry(unsigned CPI,    return nullptr;  } -/// getCPELogAlign - Returns the required alignment of the constant pool entry -/// represented by CPEMI.  Alignment is measured in log2(bytes) units. -unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) { +/// getCPEAlign - Returns the required alignment of the constant pool entry +/// represented by CPEMI. +Align ARMConstantIslands::getCPEAlign(const MachineInstr *CPEMI) {    switch (CPEMI->getOpcode()) {    case ARM::CONSTPOOL_ENTRY:      break;    case ARM::JUMPTABLE_TBB: -    return isThumb1 ? 2 : 0; +    return isThumb1 ? Align(4) : Align(1);    case ARM::JUMPTABLE_TBH: -    return isThumb1 ? 2 : 1; +    return isThumb1 ? Align(4) : Align(2);    case ARM::JUMPTABLE_INSTS: -    return 1; +    return Align(2);    case ARM::JUMPTABLE_ADDRS: -    return 2; +    return Align(4);    default:      llvm_unreachable("unknown constpool entry kind");    }    unsigned CPI = getCombinedIndex(CPEMI);    assert(CPI < MCP->getConstants().size() && "Invalid constant pool index."); -  unsigned Align = MCP->getConstants()[CPI].getAlignment(); -  assert(isPowerOf2_32(Align) && "Invalid CPE alignment"); -  return Log2_32(Align); +  return Align(MCP->getConstants()[CPI].getAlignment());  }  /// scanFunctionJumpTables - Do a scan of the function, building up @@ -687,7 +693,7 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {    BBInfoVector &BBInfo = BBUtils->getBBInfo();    // The known bits of the entry block offset are determined by the function    // alignment. -  BBInfo.front().KnownBits = MF->getAlignment(); +  BBInfo.front().KnownBits = Log2(MF->getAlignment());    // Compute block offsets and known bits.    BBUtils->adjustBBOffsetsAfter(&MF->front()); @@ -824,11 +830,6 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {              Scale = 2;  // +-(offset_8*2)              NegOk = true;              break; - -          case ARM::tLDRHi: -            Bits = 5; -            Scale = 2; // +(offset_5*2) -            break;            }            // Remember that this is a user of a CP entry. @@ -885,6 +886,13 @@ void ARMConstantIslands::updateForInsertedWaterBlock(MachineBasicBlock *NewBB) {  MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) {    MachineBasicBlock *OrigBB = MI->getParent(); +  // Collect liveness information at MI. +  LivePhysRegs LRs(*MF->getSubtarget().getRegisterInfo()); +  LRs.addLiveOuts(*OrigBB); +  auto LivenessEnd = ++MachineBasicBlock::iterator(MI).getReverse(); +  for (MachineInstr &LiveMI : make_range(OrigBB->rbegin(), LivenessEnd)) +    LRs.stepBackward(LiveMI); +    // Create a new MBB for the code after the OrigBB.    MachineBasicBlock *NewBB =      MF->CreateMachineBasicBlock(OrigBB->getBasicBlock()); @@ -913,6 +921,12 @@ MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) {    // OrigBB branches to NewBB.    OrigBB->addSuccessor(NewBB); +  // Update live-in information in the new block. +  MachineRegisterInfo &MRI = MF->getRegInfo(); +  for (MCPhysReg L : LRs) +    if (!MRI.isReserved(L)) +      NewBB->addLiveIn(L); +    // Update internal data structures to account for the newly inserted MBB.    // This is almost the same as updateForInsertedWaterBlock, except that    // the Water goes after OrigBB, not NewBB. @@ -1007,13 +1021,13 @@ bool ARMConstantIslands::isWaterInRange(unsigned UserOffset,                                          MachineBasicBlock* Water, CPUser &U,                                          unsigned &Growth) {    BBInfoVector &BBInfo = BBUtils->getBBInfo(); -  unsigned CPELogAlign = getCPELogAlign(U.CPEMI); -  unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign); -  unsigned NextBlockOffset, NextBlockAlignment; +  const Align CPEAlign = getCPEAlign(U.CPEMI); +  const unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPEAlign); +  unsigned NextBlockOffset; +  Align NextBlockAlignment;    MachineFunction::const_iterator NextBlock = Water->getIterator();    if (++NextBlock == MF->end()) {      NextBlockOffset = BBInfo[Water->getNumber()].postOffset(); -    NextBlockAlignment = 0;    } else {      NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset;      NextBlockAlignment = NextBlock->getAlignment(); @@ -1028,13 +1042,13 @@ bool ARMConstantIslands::isWaterInRange(unsigned UserOffset,      Growth = CPEEnd - NextBlockOffset;      // Compute the padding that would go at the end of the CPE to align the next      // block. -    Growth += OffsetToAlignment(CPEEnd, 1ULL << NextBlockAlignment); +    Growth += offsetToAlignment(CPEEnd, NextBlockAlignment);      // If the CPE is to be inserted before the instruction, that will raise      // the offset of the instruction. Also account for unknown alignment padding      // in blocks between CPE and the user.      if (CPEOffset < UserOffset) -      UserOffset += Growth + UnknownPadding(MF->getAlignment(), CPELogAlign); +      UserOffset += Growth + UnknownPadding(MF->getAlignment(), Log2(CPEAlign));    } else      // CPE fits in existing padding.      Growth = 0; @@ -1200,8 +1214,8 @@ bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset,    // inserting islands between BB0 and BB1 makes other accesses out of range.    MachineBasicBlock *UserBB = U.MI->getParent();    BBInfoVector &BBInfo = BBUtils->getBBInfo(); -  unsigned MinNoSplitDisp = -      BBInfo[UserBB->getNumber()].postOffset(getCPELogAlign(U.CPEMI)); +  const Align CPEAlign = getCPEAlign(U.CPEMI); +  unsigned MinNoSplitDisp = BBInfo[UserBB->getNumber()].postOffset(CPEAlign);    if (CloserWater && MinNoSplitDisp > U.getMaxDisp() / 2)      return false;    for (water_iterator IP = std::prev(WaterList.end()), B = WaterList.begin();; @@ -1254,7 +1268,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,    CPUser &U = CPUsers[CPUserIndex];    MachineInstr *UserMI = U.MI;    MachineInstr *CPEMI  = U.CPEMI; -  unsigned CPELogAlign = getCPELogAlign(CPEMI); +  const Align CPEAlign = getCPEAlign(CPEMI);    MachineBasicBlock *UserMBB = UserMI->getParent();    BBInfoVector &BBInfo = BBUtils->getBBInfo();    const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()]; @@ -1267,7 +1281,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,      // Size of branch to insert.      unsigned Delta = isThumb1 ? 2 : 4;      // Compute the offset where the CPE will begin. -    unsigned CPEOffset = UserBBI.postOffset(CPELogAlign) + Delta; +    unsigned CPEOffset = UserBBI.postOffset(CPEAlign) + Delta;      if (isOffsetInRange(UserOffset, CPEOffset, U)) {        LLVM_DEBUG(dbgs() << "Split at end of " << printMBBReference(*UserMBB) @@ -1308,11 +1322,11 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,    // Try to split the block so it's fully aligned.  Compute the latest split    // point where we can add a 4-byte branch instruction, and then align to -  // LogAlign which is the largest possible alignment in the function. -  unsigned LogAlign = MF->getAlignment(); -  assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry"); +  // Align which is the largest possible alignment in the function. +  const Align Align = MF->getAlignment(); +  assert(Align >= CPEAlign && "Over-aligned constant pool entry");    unsigned KnownBits = UserBBI.internalKnownBits(); -  unsigned UPad = UnknownPadding(LogAlign, KnownBits); +  unsigned UPad = UnknownPadding(Align, KnownBits);    unsigned BaseInsertOffset = UserOffset + U.getMaxDisp() - UPad;    LLVM_DEBUG(dbgs() << format("Split in middle of big block before %#x",                                BaseInsertOffset)); @@ -1323,7 +1337,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,    BaseInsertOffset -= 4;    LLVM_DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset) -                    << " la=" << LogAlign << " kb=" << KnownBits +                    << " la=" << Log2(Align) << " kb=" << KnownBits                      << " up=" << UPad << '\n');    // This could point off the end of the block if we've already got constant @@ -1337,6 +1351,28 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,      BaseInsertOffset =          std::max(UserBBI.postOffset() - UPad - 8,                   UserOffset + TII->getInstSizeInBytes(*UserMI) + 1); +    // If the CP is referenced(ie, UserOffset) is in first four instructions +    // after IT, this recalculated BaseInsertOffset could be in the middle of +    // an IT block. If it is, change the BaseInsertOffset to just after the +    // IT block. This still make the CP Entry is in range becuase of the +    // following reasons. +    //   1. The initial BaseseInsertOffset calculated is (UserOffset + +    //   U.getMaxDisp() - UPad). +    //   2. An IT block is only at most 4 instructions plus the "it" itself (18 +    //   bytes). +    //   3. All the relevant instructions support much larger Maximum +    //   displacement. +    MachineBasicBlock::iterator I = UserMI; +    ++I; +    for (unsigned Offset = UserOffset + TII->getInstSizeInBytes(*UserMI), +                  PredReg = 0; +         I->getOpcode() != ARM::t2IT && +         getITInstrPredicate(*I, PredReg) != ARMCC::AL; +         Offset += TII->getInstSizeInBytes(*I), I = std::next(I)) { +      BaseInsertOffset = +          std::max(BaseInsertOffset, Offset + TII->getInstSizeInBytes(*I) + 1); +      assert(I != UserMBB->end() && "Fell off end of block"); +    }      LLVM_DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset));    }    unsigned EndInsertOffset = BaseInsertOffset + 4 + UPad + @@ -1354,8 +1390,8 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,        CPUser &U = CPUsers[CPUIndex];        if (!isOffsetInRange(Offset, EndInsertOffset, U)) {          // Shift intertion point by one unit of alignment so it is within reach. -        BaseInsertOffset -= 1u << LogAlign; -        EndInsertOffset  -= 1u << LogAlign; +        BaseInsertOffset -= Align.value(); +        EndInsertOffset -= Align.value();        }        // This is overly conservative, as we don't account for CPEMIs being        // reused within the block, but it doesn't matter much.  Also assume CPEs @@ -1397,9 +1433,10 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,    }    // We really must not split an IT block. -  LLVM_DEBUG(unsigned PredReg; assert( -                 !isThumb || getITInstrPredicate(*MI, PredReg) == ARMCC::AL)); - +#ifndef NDEBUG +  unsigned PredReg; +  assert(!isThumb || getITInstrPredicate(*MI, PredReg) == ARMCC::AL); +#endif    NewMBB = splitBlockBeforeInstr(&*MI);  } @@ -1464,9 +1501,9 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex,    // Always align the new block because CP entries can be smaller than 4    // bytes. Be careful not to decrease the existing alignment, e.g. NewMBB may    // be an already aligned constant pool block. -  const unsigned Align = isThumb ? 1 : 2; -  if (NewMBB->getAlignment() < Align) -    NewMBB->setAlignment(Align); +  const Align Alignment = isThumb ? Align(2) : Align(4); +  if (NewMBB->getAlignment() < Alignment) +    NewMBB->setAlignment(Alignment);    // Remove the original WaterList entry; we want subsequent insertions in    // this vicinity to go after the one we're about to insert.  This @@ -1495,7 +1532,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex,    decrementCPEReferenceCount(CPI, CPEMI);    // Mark the basic block as aligned as required by the const-pool entry. -  NewIsland->setAlignment(getCPELogAlign(U.CPEMI)); +  NewIsland->setAlignment(getCPEAlign(U.CPEMI));    // Increase the size of the island block to account for the new entry.    BBUtils->adjustBBSize(NewIsland, Size); @@ -1529,10 +1566,11 @@ void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) {      BBInfo[CPEBB->getNumber()].Size = 0;      // This block no longer needs to be aligned. -    CPEBB->setAlignment(0); -  } else +    CPEBB->setAlignment(Align::None()); +  } else {      // Entries are sorted by descending alignment, so realign from the front. -    CPEBB->setAlignment(getCPELogAlign(&*CPEBB->begin())); +    CPEBB->setAlignment(getCPEAlign(&*CPEBB->begin())); +  }    BBUtils->adjustBBOffsetsAfter(CPEBB);    // An island has only one predecessor BB and one successor BB. Check if @@ -1620,7 +1658,7 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {    // L2:    ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(1).getImm();    CC = ARMCC::getOppositeCondition(CC); -  unsigned CCReg = MI->getOperand(2).getReg(); +  Register CCReg = MI->getOperand(2).getReg();    // If the branch is at the end of its MBB and that has a fall-through block,    // direct the updated conditional branch to the fall-through block. Otherwise, @@ -1778,16 +1816,10 @@ bool ARMConstantIslands::optimizeThumb2Instructions() {    return MadeChange;  } +  bool ARMConstantIslands::optimizeThumb2Branches() { -  bool MadeChange = false; -  // The order in which branches appear in ImmBranches is approximately their -  // order within the function body. By visiting later branches first, we reduce -  // the distance between earlier forward branches and their targets, making it -  // more likely that the cbn?z optimization, which can only apply to forward -  // branches, will succeed. -  for (unsigned i = ImmBranches.size(); i != 0; --i) { -    ImmBranch &Br = ImmBranches[i-1]; +  auto TryShrinkBranch = [this](ImmBranch &Br) {      unsigned Opcode = Br.MI->getOpcode();      unsigned NewOpc = 0;      unsigned Scale = 1; @@ -1815,47 +1847,115 @@ bool ARMConstantIslands::optimizeThumb2Branches() {          BBUtils->adjustBBSize(MBB, -2);          BBUtils->adjustBBOffsetsAfter(MBB);          ++NumT2BrShrunk; -        MadeChange = true; +        return true;        }      } +    return false; +  }; -    Opcode = Br.MI->getOpcode(); -    if (Opcode != ARM::tBcc) -      continue; +  struct ImmCompare { +    MachineInstr* MI = nullptr; +    unsigned NewOpc = 0; +  }; + +  auto FindCmpForCBZ = [this](ImmBranch &Br, ImmCompare &ImmCmp, +                              MachineBasicBlock *DestBB) { +    ImmCmp.MI = nullptr; +    ImmCmp.NewOpc = 0;      // If the conditional branch doesn't kill CPSR, then CPSR can be liveout      // so this transformation is not safe.      if (!Br.MI->killsRegister(ARM::CPSR)) -      continue; +      return false; -    NewOpc = 0;      unsigned PredReg = 0; +    unsigned NewOpc = 0;      ARMCC::CondCodes Pred = getInstrPredicate(*Br.MI, PredReg);      if (Pred == ARMCC::EQ)        NewOpc = ARM::tCBZ;      else if (Pred == ARMCC::NE)        NewOpc = ARM::tCBNZ; -    if (!NewOpc) -      continue; -    MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB(); +    else +      return false; +      // Check if the distance is within 126. Subtract starting offset by 2      // because the cmp will be eliminated.      unsigned BrOffset = BBUtils->getOffsetOf(Br.MI) + 4 - 2;      BBInfoVector &BBInfo = BBUtils->getBBInfo();      unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;      if (BrOffset >= DestOffset || (DestOffset - BrOffset) > 126) -      continue; +      return false;      // Search backwards to find a tCMPi8      auto *TRI = STI->getRegisterInfo();      MachineInstr *CmpMI = findCMPToFoldIntoCBZ(Br.MI, TRI);      if (!CmpMI || CmpMI->getOpcode() != ARM::tCMPi8) +      return false; + +    ImmCmp.MI = CmpMI; +    ImmCmp.NewOpc = NewOpc; +    return true; +  }; + +  auto TryConvertToLE = [this](ImmBranch &Br, ImmCompare &Cmp) { +    if (Br.MI->getOpcode() != ARM::t2Bcc || !STI->hasLOB() || +        STI->hasMinSize()) +      return false; + +    MachineBasicBlock *MBB = Br.MI->getParent(); +    MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB(); +    if (BBUtils->getOffsetOf(MBB) < BBUtils->getOffsetOf(DestBB) || +        !BBUtils->isBBInRange(Br.MI, DestBB, 4094)) +      return false; + +    if (!DT->dominates(DestBB, MBB)) +      return false; + +    // We queried for the CBN?Z opcode based upon the 'ExitBB', the opposite +    // target of Br. So now we need to reverse the condition. +    Cmp.NewOpc = Cmp.NewOpc == ARM::tCBZ ? ARM::tCBNZ : ARM::tCBZ; + +    MachineInstrBuilder MIB = BuildMI(*MBB, Br.MI, Br.MI->getDebugLoc(), +                                      TII->get(ARM::t2LE)); +    MIB.add(Br.MI->getOperand(0)); +    Br.MI->eraseFromParent(); +    Br.MI = MIB; +    ++NumLEInserted; +    return true; +  }; + +  bool MadeChange = false; + +  // The order in which branches appear in ImmBranches is approximately their +  // order within the function body. By visiting later branches first, we reduce +  // the distance between earlier forward branches and their targets, making it +  // more likely that the cbn?z optimization, which can only apply to forward +  // branches, will succeed. +  for (ImmBranch &Br : reverse(ImmBranches)) { +    MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB(); +    MachineBasicBlock *MBB = Br.MI->getParent(); +    MachineBasicBlock *ExitBB = &MBB->back() == Br.MI ? +      MBB->getFallThrough() : +      MBB->back().getOperand(0).getMBB(); + +    ImmCompare Cmp; +    if (FindCmpForCBZ(Br, Cmp, ExitBB) && TryConvertToLE(Br, Cmp)) { +      DestBB = ExitBB; +      MadeChange = true; +    } else { +      FindCmpForCBZ(Br, Cmp, DestBB); +      MadeChange |= TryShrinkBranch(Br); +    } + +    unsigned Opcode = Br.MI->getOpcode(); +    if ((Opcode != ARM::tBcc && Opcode != ARM::t2LE) || !Cmp.NewOpc)        continue; -    unsigned Reg = CmpMI->getOperand(0).getReg(); +    Register Reg = Cmp.MI->getOperand(0).getReg();      // Check for Kill flags on Reg. If they are present remove them and set kill      // on the new CBZ. +    auto *TRI = STI->getRegisterInfo();      MachineBasicBlock::iterator KillMI = Br.MI;      bool RegKilled = false;      do { @@ -1865,19 +1965,32 @@ bool ARMConstantIslands::optimizeThumb2Branches() {          RegKilled = true;          break;        } -    } while (KillMI != CmpMI); +    } while (KillMI != Cmp.MI);      // Create the new CBZ/CBNZ -    MachineBasicBlock *MBB = Br.MI->getParent(); -    LLVM_DEBUG(dbgs() << "Fold: " << *CmpMI << " and: " << *Br.MI); +    LLVM_DEBUG(dbgs() << "Fold: " << *Cmp.MI << " and: " << *Br.MI);      MachineInstr *NewBR = -        BuildMI(*MBB, Br.MI, Br.MI->getDebugLoc(), TII->get(NewOpc)) +        BuildMI(*MBB, Br.MI, Br.MI->getDebugLoc(), TII->get(Cmp.NewOpc))              .addReg(Reg, getKillRegState(RegKilled))              .addMBB(DestBB, Br.MI->getOperand(0).getTargetFlags()); -    CmpMI->eraseFromParent(); -    Br.MI->eraseFromParent(); -    Br.MI = NewBR; + +    Cmp.MI->eraseFromParent(); +    BBInfoVector &BBInfo = BBUtils->getBBInfo();      BBInfo[MBB->getNumber()].Size -= 2; + +    if (Br.MI->getOpcode() == ARM::tBcc) { +      Br.MI->eraseFromParent(); +      Br.MI = NewBR; +    } else if (&MBB->back() != Br.MI) { +      // We've generated an LE and already erased the original conditional +      // branch. The CBN?Z is now used to branch to the other successor, so an +      // unconditional branch terminator is now redundant. +      MachineInstr *LastMI = &MBB->back(); +      if (LastMI != Br.MI) { +        BBInfo[MBB->getNumber()].Size -= LastMI->getDesc().getSize(); +        LastMI->eraseFromParent(); +      } +    }      BBUtils->adjustBBOffsetsAfter(MBB);      ++NumCBZ;      MadeChange = true; @@ -1931,8 +2044,8 @@ bool ARMConstantIslands::preserveBaseRegister(MachineInstr *JumpMI,    //      of BaseReg, but only if the t2ADDrs can be removed.    //    + Some instruction other than t2ADDrs computing the entry. Not seen in    //      the wild, but we should be careful. -  unsigned EntryReg = JumpMI->getOperand(0).getReg(); -  unsigned BaseReg = LEAMI->getOperand(0).getReg(); +  Register EntryReg = JumpMI->getOperand(0).getReg(); +  Register BaseReg = LEAMI->getOperand(0).getReg();    CanDeleteLEA = true;    BaseRegKill = false; @@ -2009,7 +2122,7 @@ static void RemoveDeadAddBetweenLEAAndJT(MachineInstr *LEAMI,    // but the JT now uses PC. Finds the last ADD (if any) that def's EntryReg    // and is not clobbered / used.    MachineInstr *RemovableAdd = nullptr; -  unsigned EntryReg = JumpMI->getOperand(0).getReg(); +  Register EntryReg = JumpMI->getOperand(0).getReg();    // Find the last ADD to set EntryReg    MachineBasicBlock::iterator I(LEAMI); @@ -2106,7 +2219,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {        //   %idx = tLSLri %idx, 2        //   %base = tLEApcrelJT        //   %t = tLDRr %base, %idx -      unsigned BaseReg = User.MI->getOperand(0).getReg(); +      Register BaseReg = User.MI->getOperand(0).getReg();        if (User.MI->getIterator() == User.MI->getParent()->begin())          continue; @@ -2116,7 +2229,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {            !Shift->getOperand(2).isKill())          continue;        IdxReg = Shift->getOperand(2).getReg(); -      unsigned ShiftedIdxReg = Shift->getOperand(0).getReg(); +      Register ShiftedIdxReg = Shift->getOperand(0).getReg();        // It's important that IdxReg is live until the actual TBB/TBH. Most of        // the range is checked later, but the LEA might still clobber it and not @@ -2313,6 +2426,10 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {    MachineFunction::iterator MBBI = ++JTBB->getIterator();    MF->insert(MBBI, NewBB); +  // Copy live-in information to new block. +  for (const MachineBasicBlock::RegisterMaskPair &RegMaskPair : BB->liveins()) +    NewBB->addLiveIn(RegMaskPair); +    // Add an unconditional branch from NewBB to BB.    // There doesn't seem to be meaningful DebugInfo available; this doesn't    // correspond directly to anything in the source. | 
