aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp286
1 files changed, 202 insertions, 84 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp
index a5df46c94f42..dc769ae526bc 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp
@@ -22,9 +22,9 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/ReachingDefAnalysis.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include <cassert>
#include <new>
@@ -34,83 +34,220 @@ using namespace llvm;
#define DEBUG_TYPE "arm-mve-vpt"
namespace {
- class MVEVPTBlock : public MachineFunctionPass {
- public:
- static char ID;
+class MVEVPTBlock : public MachineFunctionPass {
+public:
+ static char ID;
+ const Thumb2InstrInfo *TII;
+ const TargetRegisterInfo *TRI;
- MVEVPTBlock() : MachineFunctionPass(ID) {}
+ MVEVPTBlock() : MachineFunctionPass(ID) {}
- bool runOnMachineFunction(MachineFunction &Fn) override;
+ bool runOnMachineFunction(MachineFunction &Fn) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<ReachingDefAnalysis>();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- MachineFunctionProperties getRequiredProperties() const override {
- return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::NoVRegs).set(
- MachineFunctionProperties::Property::TracksLiveness);
- }
-
- StringRef getPassName() const override {
- return "MVE VPT block insertion pass";
- }
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
- private:
- bool InsertVPTBlocks(MachineBasicBlock &MBB);
+ StringRef getPassName() const override {
+ return "MVE VPT block insertion pass";
+ }
- const Thumb2InstrInfo *TII = nullptr;
- ReachingDefAnalysis *RDA = nullptr;
- };
+private:
+ bool InsertVPTBlocks(MachineBasicBlock &MBB);
+};
- char MVEVPTBlock::ID = 0;
+char MVEVPTBlock::ID = 0;
} // end anonymous namespace
INITIALIZE_PASS(MVEVPTBlock, DEBUG_TYPE, "ARM MVE VPT block pass", false, false)
-static MachineInstr *findVCMPToFoldIntoVPST(MachineInstr *MI,
- ReachingDefAnalysis *RDA,
+static MachineInstr *findVCMPToFoldIntoVPST(MachineBasicBlock::iterator MI,
+ const TargetRegisterInfo *TRI,
unsigned &NewOpcode) {
- // First, search backwards to the instruction that defines VPR
- auto *Def = RDA->getReachingMIDef(MI, ARM::VPR);
- if (!Def)
- return nullptr;
+ // Search backwards to the instruction that defines VPR. This may or not
+ // be a VCMP, we check that after this loop. If we find another instruction
+ // that reads cpsr, we return nullptr.
+ MachineBasicBlock::iterator CmpMI = MI;
+ while (CmpMI != MI->getParent()->begin()) {
+ --CmpMI;
+ if (CmpMI->modifiesRegister(ARM::VPR, TRI))
+ break;
+ if (CmpMI->readsRegister(ARM::VPR, TRI))
+ break;
+ }
- // Now check that Def is a VCMP
- if (!(NewOpcode = VCMPOpcodeToVPT(Def->getOpcode())))
+ if (CmpMI == MI)
+ return nullptr;
+ NewOpcode = VCMPOpcodeToVPT(CmpMI->getOpcode());
+ if (NewOpcode == 0)
return nullptr;
- // Check that Def's operands are not defined between the VCMP and MI, i.e.
- // check that they have the same reaching def.
- if (!RDA->hasSameReachingDef(Def, MI, Def->getOperand(1).getReg()) ||
- !RDA->hasSameReachingDef(Def, MI, Def->getOperand(2).getReg()))
+ // Search forward from CmpMI to MI, checking if either register was def'd
+ if (registerDefinedBetween(CmpMI->getOperand(1).getReg(), std::next(CmpMI),
+ MI, TRI))
+ return nullptr;
+ if (registerDefinedBetween(CmpMI->getOperand(2).getReg(), std::next(CmpMI),
+ MI, TRI))
return nullptr;
+ return &*CmpMI;
+}
+
+// Advances Iter past a block of predicated instructions.
+// Returns true if it successfully skipped the whole block of predicated
+// instructions. Returns false when it stopped early (due to MaxSteps), or if
+// Iter didn't point to a predicated instruction.
+static bool StepOverPredicatedInstrs(MachineBasicBlock::instr_iterator &Iter,
+ MachineBasicBlock::instr_iterator EndIter,
+ unsigned MaxSteps,
+ unsigned &NumInstrsSteppedOver) {
+ ARMVCC::VPTCodes NextPred = ARMVCC::None;
+ Register PredReg;
+ NumInstrsSteppedOver = 0;
+
+ while (Iter != EndIter) {
+ NextPred = getVPTInstrPredicate(*Iter, PredReg);
+ assert(NextPred != ARMVCC::Else &&
+ "VPT block pass does not expect Else preds");
+ if (NextPred == ARMVCC::None || MaxSteps == 0)
+ break;
+ --MaxSteps;
+ ++Iter;
+ ++NumInstrsSteppedOver;
+ };
+
+ return NumInstrsSteppedOver != 0 &&
+ (NextPred == ARMVCC::None || Iter == EndIter);
+}
+
+// Returns true if at least one instruction in the range [Iter, End) defines
+// or kills VPR.
+static bool IsVPRDefinedOrKilledByBlock(MachineBasicBlock::iterator Iter,
+ MachineBasicBlock::iterator End) {
+ for (; Iter != End; ++Iter)
+ if (Iter->definesRegister(ARM::VPR) || Iter->killsRegister(ARM::VPR))
+ return true;
+ return false;
+}
+
+// Creates a T, TT, TTT or TTTT BlockMask depending on BlockSize.
+static ARM::PredBlockMask GetInitialBlockMask(unsigned BlockSize) {
+ switch (BlockSize) {
+ case 1:
+ return ARM::PredBlockMask::T;
+ case 2:
+ return ARM::PredBlockMask::TT;
+ case 3:
+ return ARM::PredBlockMask::TTT;
+ case 4:
+ return ARM::PredBlockMask::TTTT;
+ default:
+ llvm_unreachable("Invalid BlockSize!");
+ }
+}
+
+// Given an iterator (Iter) that points at an instruction with a "Then"
+// predicate, tries to create the largest block of continuous predicated
+// instructions possible, and returns the VPT Block Mask of that block.
+//
+// This will try to perform some minor optimization in order to maximize the
+// size of the block.
+static ARM::PredBlockMask
+CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter,
+ MachineBasicBlock::instr_iterator EndIter,
+ SmallVectorImpl<MachineInstr *> &DeadInstructions) {
+ MachineBasicBlock::instr_iterator BlockBeg = Iter;
+ (void)BlockBeg;
+ assert(getVPTInstrPredicate(*Iter) == ARMVCC::Then &&
+ "Expected a Predicated Instruction");
+
+ LLVM_DEBUG(dbgs() << "VPT block created for: "; Iter->dump());
+
+ unsigned BlockSize;
+ StepOverPredicatedInstrs(Iter, EndIter, 4, BlockSize);
+
+ LLVM_DEBUG(for (MachineBasicBlock::instr_iterator AddedInstIter =
+ std::next(BlockBeg);
+ AddedInstIter != Iter; ++AddedInstIter) {
+ dbgs() << " adding: ";
+ AddedInstIter->dump();
+ });
+
+ // Generate the initial BlockMask
+ ARM::PredBlockMask BlockMask = GetInitialBlockMask(BlockSize);
+
+ // Remove VPNOTs while there's still room in the block, so we can make the
+ // largest block possible.
+ ARMVCC::VPTCodes CurrentPredicate = ARMVCC::Else;
+ while (BlockSize < 4 && Iter != EndIter &&
+ Iter->getOpcode() == ARM::MVE_VPNOT) {
+
+ // Try to skip all of the predicated instructions after the VPNOT, stopping
+ // after (4 - BlockSize). If we can't skip them all, stop.
+ unsigned ElseInstCnt = 0;
+ MachineBasicBlock::instr_iterator VPNOTBlockEndIter = std::next(Iter);
+ if (!StepOverPredicatedInstrs(VPNOTBlockEndIter, EndIter, (4 - BlockSize),
+ ElseInstCnt))
+ break;
+
+ // Check if this VPNOT can be removed or not: It can only be removed if at
+ // least one of the predicated instruction that follows it kills or sets
+ // VPR.
+ if (!IsVPRDefinedOrKilledByBlock(Iter, VPNOTBlockEndIter))
+ break;
+
+ LLVM_DEBUG(dbgs() << " removing VPNOT: "; Iter->dump(););
+
+ // Record the new size of the block
+ BlockSize += ElseInstCnt;
+ assert(BlockSize <= 4 && "Block is too large!");
+
+ // Record the VPNot to remove it later.
+ DeadInstructions.push_back(&*Iter);
+ ++Iter;
+
+ // Replace the predicates of the instructions we're adding.
+ // Note that we are using "Iter" to iterate over the block so we can update
+ // it at the same time.
+ for (; Iter != VPNOTBlockEndIter; ++Iter) {
+ // Find the register in which the predicate is
+ int OpIdx = findFirstVPTPredOperandIdx(*Iter);
+ assert(OpIdx != -1);
+
+ // Change the predicate and update the mask
+ Iter->getOperand(OpIdx).setImm(CurrentPredicate);
+ BlockMask = expandPredBlockMask(BlockMask, CurrentPredicate);
+
+ LLVM_DEBUG(dbgs() << " adding : "; Iter->dump());
+ }
- return Def;
+ CurrentPredicate =
+ (CurrentPredicate == ARMVCC::Then ? ARMVCC::Else : ARMVCC::Then);
+ }
+ return BlockMask;
}
bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) {
bool Modified = false;
MachineBasicBlock::instr_iterator MBIter = Block.instr_begin();
MachineBasicBlock::instr_iterator EndIter = Block.instr_end();
- SmallSet<MachineInstr *, 4> RemovedVCMPs;
+
+ SmallVector<MachineInstr *, 4> DeadInstructions;
while (MBIter != EndIter) {
MachineInstr *MI = &*MBIter;
- unsigned PredReg = 0;
- DebugLoc dl = MI->getDebugLoc();
+ Register PredReg;
+ DebugLoc DL = MI->getDebugLoc();
ARMVCC::VPTCodes Pred = getVPTInstrPredicate(*MI, PredReg);
// The idea of the predicate is that None, Then and Else are for use when
// handling assembly language: they correspond to the three possible
// suffixes "", "t" and "e" on the mnemonic. So when instructions are read
- // from assembly source or disassembled from object code, you expect to see
- // a mixture whenever there's a long VPT block. But in code generation, we
- // hope we'll never generate an Else as input to this pass.
+ // from assembly source or disassembled from object code, you expect to
+ // see a mixture whenever there's a long VPT block. But in code
+ // generation, we hope we'll never generate an Else as input to this pass.
assert(Pred != ARMVCC::Else && "VPT block pass does not expect Else preds");
if (Pred == ARMVCC::None) {
@@ -118,46 +255,25 @@ bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) {
continue;
}
- LLVM_DEBUG(dbgs() << "VPT block created for: "; MI->dump());
- int VPTInstCnt = 1;
- ARMVCC::VPTCodes NextPred;
-
- // Look at subsequent instructions, checking if they can be in the same VPT
- // block.
- ++MBIter;
- while (MBIter != EndIter && VPTInstCnt < 4) {
- NextPred = getVPTInstrPredicate(*MBIter, PredReg);
- assert(NextPred != ARMVCC::Else &&
- "VPT block pass does not expect Else preds");
- if (NextPred != Pred)
- break;
- LLVM_DEBUG(dbgs() << " adding : "; MBIter->dump());
- ++VPTInstCnt;
- ++MBIter;
- };
-
- unsigned BlockMask = getARMVPTBlockMask(VPTInstCnt);
+ ARM::PredBlockMask BlockMask =
+ CreateVPTBlock(MBIter, EndIter, DeadInstructions);
- // Search back for a VCMP that can be folded to create a VPT, or else create
- // a VPST directly
+ // Search back for a VCMP that can be folded to create a VPT, or else
+ // create a VPST directly
MachineInstrBuilder MIBuilder;
unsigned NewOpcode;
- MachineInstr *VCMP = findVCMPToFoldIntoVPST(MI, RDA, NewOpcode);
- if (VCMP) {
+ LLVM_DEBUG(dbgs() << " final block mask: " << (unsigned)BlockMask << "\n");
+ if (MachineInstr *VCMP = findVCMPToFoldIntoVPST(MI, TRI, NewOpcode)) {
LLVM_DEBUG(dbgs() << " folding VCMP into VPST: "; VCMP->dump());
- MIBuilder = BuildMI(Block, MI, dl, TII->get(NewOpcode));
- MIBuilder.addImm(BlockMask);
+ MIBuilder = BuildMI(Block, MI, DL, TII->get(NewOpcode));
+ MIBuilder.addImm((uint64_t)BlockMask);
MIBuilder.add(VCMP->getOperand(1));
MIBuilder.add(VCMP->getOperand(2));
MIBuilder.add(VCMP->getOperand(3));
- // We delay removing the actual VCMP instruction by saving it to a list
- // and deleting all instructions in this list in one go after we have
- // created the VPT blocks. We do this in order not to invalidate the
- // ReachingDefAnalysis that is queried by 'findVCMPToFoldIntoVPST'.
- RemovedVCMPs.insert(VCMP);
+ VCMP->eraseFromParent();
} else {
- MIBuilder = BuildMI(Block, MI, dl, TII->get(ARM::MVE_VPST));
- MIBuilder.addImm(BlockMask);
+ MIBuilder = BuildMI(Block, MI, DL, TII->get(ARM::MVE_VPST));
+ MIBuilder.addImm((uint64_t)BlockMask);
}
finalizeBundle(
@@ -166,16 +282,18 @@ bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) {
Modified = true;
}
- for (auto *I : RemovedVCMPs)
- I->eraseFromParent();
+ // Erase all dead instructions
+ for (MachineInstr *DeadMI : DeadInstructions) {
+ if (DeadMI->isInsideBundle())
+ DeadMI->eraseFromBundle();
+ else
+ DeadMI->eraseFromParent();
+ }
return Modified;
}
bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) {
- if (skipFunction(Fn.getFunction()))
- return false;
-
const ARMSubtarget &STI =
static_cast<const ARMSubtarget &>(Fn.getSubtarget());
@@ -183,7 +301,7 @@ bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) {
return false;
TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
- RDA = &getAnalysis<ReachingDefAnalysis>();
+ TRI = STI.getRegisterInfo();
LLVM_DEBUG(dbgs() << "********** ARM MVE VPT BLOCKS **********\n"
<< "********** Function: " << Fn.getName() << '\n');