diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp | 286 |
1 files changed, 202 insertions, 84 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp index a5df46c94f42..dc769ae526bc 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp +++ b/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp @@ -22,9 +22,9 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/ReachingDefAnalysis.h" #include "llvm/IR/DebugLoc.h" #include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Debug.h" #include <cassert> #include <new> @@ -34,83 +34,220 @@ using namespace llvm; #define DEBUG_TYPE "arm-mve-vpt" namespace { - class MVEVPTBlock : public MachineFunctionPass { - public: - static char ID; +class MVEVPTBlock : public MachineFunctionPass { +public: + static char ID; + const Thumb2InstrInfo *TII; + const TargetRegisterInfo *TRI; - MVEVPTBlock() : MachineFunctionPass(ID) {} + MVEVPTBlock() : MachineFunctionPass(ID) {} - bool runOnMachineFunction(MachineFunction &Fn) override; + bool runOnMachineFunction(MachineFunction &Fn) override; - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - AU.addRequired<ReachingDefAnalysis>(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - MachineFunctionProperties getRequiredProperties() const override { - return MachineFunctionProperties().set( - MachineFunctionProperties::Property::NoVRegs).set( - MachineFunctionProperties::Property::TracksLiveness); - } - - StringRef getPassName() const override { - return "MVE VPT block insertion pass"; - } + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } - private: - bool InsertVPTBlocks(MachineBasicBlock &MBB); + StringRef getPassName() const override { + return "MVE VPT block insertion pass"; + } - const Thumb2InstrInfo *TII = nullptr; - ReachingDefAnalysis *RDA = nullptr; - }; +private: + bool InsertVPTBlocks(MachineBasicBlock &MBB); +}; - char MVEVPTBlock::ID = 0; +char MVEVPTBlock::ID = 0; } // end anonymous namespace INITIALIZE_PASS(MVEVPTBlock, DEBUG_TYPE, "ARM MVE VPT block pass", false, false) -static MachineInstr *findVCMPToFoldIntoVPST(MachineInstr *MI, - ReachingDefAnalysis *RDA, +static MachineInstr *findVCMPToFoldIntoVPST(MachineBasicBlock::iterator MI, + const TargetRegisterInfo *TRI, unsigned &NewOpcode) { - // First, search backwards to the instruction that defines VPR - auto *Def = RDA->getReachingMIDef(MI, ARM::VPR); - if (!Def) - return nullptr; + // Search backwards to the instruction that defines VPR. This may or not + // be a VCMP, we check that after this loop. If we find another instruction + // that reads cpsr, we return nullptr. + MachineBasicBlock::iterator CmpMI = MI; + while (CmpMI != MI->getParent()->begin()) { + --CmpMI; + if (CmpMI->modifiesRegister(ARM::VPR, TRI)) + break; + if (CmpMI->readsRegister(ARM::VPR, TRI)) + break; + } - // Now check that Def is a VCMP - if (!(NewOpcode = VCMPOpcodeToVPT(Def->getOpcode()))) + if (CmpMI == MI) + return nullptr; + NewOpcode = VCMPOpcodeToVPT(CmpMI->getOpcode()); + if (NewOpcode == 0) return nullptr; - // Check that Def's operands are not defined between the VCMP and MI, i.e. - // check that they have the same reaching def. - if (!RDA->hasSameReachingDef(Def, MI, Def->getOperand(1).getReg()) || - !RDA->hasSameReachingDef(Def, MI, Def->getOperand(2).getReg())) + // Search forward from CmpMI to MI, checking if either register was def'd + if (registerDefinedBetween(CmpMI->getOperand(1).getReg(), std::next(CmpMI), + MI, TRI)) + return nullptr; + if (registerDefinedBetween(CmpMI->getOperand(2).getReg(), std::next(CmpMI), + MI, TRI)) return nullptr; + return &*CmpMI; +} + +// Advances Iter past a block of predicated instructions. +// Returns true if it successfully skipped the whole block of predicated +// instructions. Returns false when it stopped early (due to MaxSteps), or if +// Iter didn't point to a predicated instruction. +static bool StepOverPredicatedInstrs(MachineBasicBlock::instr_iterator &Iter, + MachineBasicBlock::instr_iterator EndIter, + unsigned MaxSteps, + unsigned &NumInstrsSteppedOver) { + ARMVCC::VPTCodes NextPred = ARMVCC::None; + Register PredReg; + NumInstrsSteppedOver = 0; + + while (Iter != EndIter) { + NextPred = getVPTInstrPredicate(*Iter, PredReg); + assert(NextPred != ARMVCC::Else && + "VPT block pass does not expect Else preds"); + if (NextPred == ARMVCC::None || MaxSteps == 0) + break; + --MaxSteps; + ++Iter; + ++NumInstrsSteppedOver; + }; + + return NumInstrsSteppedOver != 0 && + (NextPred == ARMVCC::None || Iter == EndIter); +} + +// Returns true if at least one instruction in the range [Iter, End) defines +// or kills VPR. +static bool IsVPRDefinedOrKilledByBlock(MachineBasicBlock::iterator Iter, + MachineBasicBlock::iterator End) { + for (; Iter != End; ++Iter) + if (Iter->definesRegister(ARM::VPR) || Iter->killsRegister(ARM::VPR)) + return true; + return false; +} + +// Creates a T, TT, TTT or TTTT BlockMask depending on BlockSize. +static ARM::PredBlockMask GetInitialBlockMask(unsigned BlockSize) { + switch (BlockSize) { + case 1: + return ARM::PredBlockMask::T; + case 2: + return ARM::PredBlockMask::TT; + case 3: + return ARM::PredBlockMask::TTT; + case 4: + return ARM::PredBlockMask::TTTT; + default: + llvm_unreachable("Invalid BlockSize!"); + } +} + +// Given an iterator (Iter) that points at an instruction with a "Then" +// predicate, tries to create the largest block of continuous predicated +// instructions possible, and returns the VPT Block Mask of that block. +// +// This will try to perform some minor optimization in order to maximize the +// size of the block. +static ARM::PredBlockMask +CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter, + MachineBasicBlock::instr_iterator EndIter, + SmallVectorImpl<MachineInstr *> &DeadInstructions) { + MachineBasicBlock::instr_iterator BlockBeg = Iter; + (void)BlockBeg; + assert(getVPTInstrPredicate(*Iter) == ARMVCC::Then && + "Expected a Predicated Instruction"); + + LLVM_DEBUG(dbgs() << "VPT block created for: "; Iter->dump()); + + unsigned BlockSize; + StepOverPredicatedInstrs(Iter, EndIter, 4, BlockSize); + + LLVM_DEBUG(for (MachineBasicBlock::instr_iterator AddedInstIter = + std::next(BlockBeg); + AddedInstIter != Iter; ++AddedInstIter) { + dbgs() << " adding: "; + AddedInstIter->dump(); + }); + + // Generate the initial BlockMask + ARM::PredBlockMask BlockMask = GetInitialBlockMask(BlockSize); + + // Remove VPNOTs while there's still room in the block, so we can make the + // largest block possible. + ARMVCC::VPTCodes CurrentPredicate = ARMVCC::Else; + while (BlockSize < 4 && Iter != EndIter && + Iter->getOpcode() == ARM::MVE_VPNOT) { + + // Try to skip all of the predicated instructions after the VPNOT, stopping + // after (4 - BlockSize). If we can't skip them all, stop. + unsigned ElseInstCnt = 0; + MachineBasicBlock::instr_iterator VPNOTBlockEndIter = std::next(Iter); + if (!StepOverPredicatedInstrs(VPNOTBlockEndIter, EndIter, (4 - BlockSize), + ElseInstCnt)) + break; + + // Check if this VPNOT can be removed or not: It can only be removed if at + // least one of the predicated instruction that follows it kills or sets + // VPR. + if (!IsVPRDefinedOrKilledByBlock(Iter, VPNOTBlockEndIter)) + break; + + LLVM_DEBUG(dbgs() << " removing VPNOT: "; Iter->dump();); + + // Record the new size of the block + BlockSize += ElseInstCnt; + assert(BlockSize <= 4 && "Block is too large!"); + + // Record the VPNot to remove it later. + DeadInstructions.push_back(&*Iter); + ++Iter; + + // Replace the predicates of the instructions we're adding. + // Note that we are using "Iter" to iterate over the block so we can update + // it at the same time. + for (; Iter != VPNOTBlockEndIter; ++Iter) { + // Find the register in which the predicate is + int OpIdx = findFirstVPTPredOperandIdx(*Iter); + assert(OpIdx != -1); + + // Change the predicate and update the mask + Iter->getOperand(OpIdx).setImm(CurrentPredicate); + BlockMask = expandPredBlockMask(BlockMask, CurrentPredicate); + + LLVM_DEBUG(dbgs() << " adding : "; Iter->dump()); + } - return Def; + CurrentPredicate = + (CurrentPredicate == ARMVCC::Then ? ARMVCC::Else : ARMVCC::Then); + } + return BlockMask; } bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) { bool Modified = false; MachineBasicBlock::instr_iterator MBIter = Block.instr_begin(); MachineBasicBlock::instr_iterator EndIter = Block.instr_end(); - SmallSet<MachineInstr *, 4> RemovedVCMPs; + + SmallVector<MachineInstr *, 4> DeadInstructions; while (MBIter != EndIter) { MachineInstr *MI = &*MBIter; - unsigned PredReg = 0; - DebugLoc dl = MI->getDebugLoc(); + Register PredReg; + DebugLoc DL = MI->getDebugLoc(); ARMVCC::VPTCodes Pred = getVPTInstrPredicate(*MI, PredReg); // The idea of the predicate is that None, Then and Else are for use when // handling assembly language: they correspond to the three possible // suffixes "", "t" and "e" on the mnemonic. So when instructions are read - // from assembly source or disassembled from object code, you expect to see - // a mixture whenever there's a long VPT block. But in code generation, we - // hope we'll never generate an Else as input to this pass. + // from assembly source or disassembled from object code, you expect to + // see a mixture whenever there's a long VPT block. But in code + // generation, we hope we'll never generate an Else as input to this pass. assert(Pred != ARMVCC::Else && "VPT block pass does not expect Else preds"); if (Pred == ARMVCC::None) { @@ -118,46 +255,25 @@ bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) { continue; } - LLVM_DEBUG(dbgs() << "VPT block created for: "; MI->dump()); - int VPTInstCnt = 1; - ARMVCC::VPTCodes NextPred; - - // Look at subsequent instructions, checking if they can be in the same VPT - // block. - ++MBIter; - while (MBIter != EndIter && VPTInstCnt < 4) { - NextPred = getVPTInstrPredicate(*MBIter, PredReg); - assert(NextPred != ARMVCC::Else && - "VPT block pass does not expect Else preds"); - if (NextPred != Pred) - break; - LLVM_DEBUG(dbgs() << " adding : "; MBIter->dump()); - ++VPTInstCnt; - ++MBIter; - }; - - unsigned BlockMask = getARMVPTBlockMask(VPTInstCnt); + ARM::PredBlockMask BlockMask = + CreateVPTBlock(MBIter, EndIter, DeadInstructions); - // Search back for a VCMP that can be folded to create a VPT, or else create - // a VPST directly + // Search back for a VCMP that can be folded to create a VPT, or else + // create a VPST directly MachineInstrBuilder MIBuilder; unsigned NewOpcode; - MachineInstr *VCMP = findVCMPToFoldIntoVPST(MI, RDA, NewOpcode); - if (VCMP) { + LLVM_DEBUG(dbgs() << " final block mask: " << (unsigned)BlockMask << "\n"); + if (MachineInstr *VCMP = findVCMPToFoldIntoVPST(MI, TRI, NewOpcode)) { LLVM_DEBUG(dbgs() << " folding VCMP into VPST: "; VCMP->dump()); - MIBuilder = BuildMI(Block, MI, dl, TII->get(NewOpcode)); - MIBuilder.addImm(BlockMask); + MIBuilder = BuildMI(Block, MI, DL, TII->get(NewOpcode)); + MIBuilder.addImm((uint64_t)BlockMask); MIBuilder.add(VCMP->getOperand(1)); MIBuilder.add(VCMP->getOperand(2)); MIBuilder.add(VCMP->getOperand(3)); - // We delay removing the actual VCMP instruction by saving it to a list - // and deleting all instructions in this list in one go after we have - // created the VPT blocks. We do this in order not to invalidate the - // ReachingDefAnalysis that is queried by 'findVCMPToFoldIntoVPST'. - RemovedVCMPs.insert(VCMP); + VCMP->eraseFromParent(); } else { - MIBuilder = BuildMI(Block, MI, dl, TII->get(ARM::MVE_VPST)); - MIBuilder.addImm(BlockMask); + MIBuilder = BuildMI(Block, MI, DL, TII->get(ARM::MVE_VPST)); + MIBuilder.addImm((uint64_t)BlockMask); } finalizeBundle( @@ -166,16 +282,18 @@ bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) { Modified = true; } - for (auto *I : RemovedVCMPs) - I->eraseFromParent(); + // Erase all dead instructions + for (MachineInstr *DeadMI : DeadInstructions) { + if (DeadMI->isInsideBundle()) + DeadMI->eraseFromBundle(); + else + DeadMI->eraseFromParent(); + } return Modified; } bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) { - if (skipFunction(Fn.getFunction())) - return false; - const ARMSubtarget &STI = static_cast<const ARMSubtarget &>(Fn.getSubtarget()); @@ -183,7 +301,7 @@ bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) { return false; TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo()); - RDA = &getAnalysis<ReachingDefAnalysis>(); + TRI = STI.getRegisterInfo(); LLVM_DEBUG(dbgs() << "********** ARM MVE VPT BLOCKS **********\n" << "********** Function: " << Fn.getName() << '\n'); |