aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp240
1 files changed, 238 insertions, 2 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
index 4ea714ff15f7..a8853609a7c8 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
@@ -21,8 +21,8 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -39,10 +39,54 @@ STATISTIC(NumFrameOffFoldInPreEmit,
"Number of folding frame offset by using r+r in pre-emit peephole");
static cl::opt<bool>
+EnablePCRelLinkerOpt("ppc-pcrel-linker-opt", cl::Hidden, cl::init(true),
+ cl::desc("enable PC Relative linker optimization"));
+
+static cl::opt<bool>
RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true),
cl::desc("Run pre-emit peephole optimizations."));
namespace {
+
+static bool hasPCRelativeForm(MachineInstr &Use) {
+ switch (Use.getOpcode()) {
+ default:
+ return false;
+ case PPC::LBZ:
+ case PPC::LBZ8:
+ case PPC::LHA:
+ case PPC::LHA8:
+ case PPC::LHZ:
+ case PPC::LHZ8:
+ case PPC::LWZ:
+ case PPC::LWZ8:
+ case PPC::STB:
+ case PPC::STB8:
+ case PPC::STH:
+ case PPC::STH8:
+ case PPC::STW:
+ case PPC::STW8:
+ case PPC::LD:
+ case PPC::STD:
+ case PPC::LWA:
+ case PPC::LXSD:
+ case PPC::LXSSP:
+ case PPC::LXV:
+ case PPC::STXSD:
+ case PPC::STXSSP:
+ case PPC::STXV:
+ case PPC::LFD:
+ case PPC::LFS:
+ case PPC::STFD:
+ case PPC::STFS:
+ case PPC::DFLOADf32:
+ case PPC::DFLOADf64:
+ case PPC::DFSTOREf32:
+ case PPC::DFSTOREf64:
+ return true;
+ }
+}
+
class PPCPreEmitPeephole : public MachineFunctionPass {
public:
static char ID;
@@ -77,7 +121,7 @@ namespace {
for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) {
// Skip load immediate that is marked to be erased later because it
// cannot be used to replace any other instructions.
- if (InstrsToErase.find(&*BBI) != InstrsToErase.end())
+ if (InstrsToErase.contains(&*BBI))
continue;
// Skip non-load immediate.
unsigned Opc = BBI->getOpcode();
@@ -172,6 +216,196 @@ namespace {
return !InstrsToErase.empty();
}
+ // Check if this instruction is a PLDpc that is part of a GOT indirect
+ // access.
+ bool isGOTPLDpc(MachineInstr &Instr) {
+ if (Instr.getOpcode() != PPC::PLDpc)
+ return false;
+
+ // The result must be a register.
+ const MachineOperand &LoadedAddressReg = Instr.getOperand(0);
+ if (!LoadedAddressReg.isReg())
+ return false;
+
+ // Make sure that this is a global symbol.
+ const MachineOperand &SymbolOp = Instr.getOperand(1);
+ if (!SymbolOp.isGlobal())
+ return false;
+
+ // Finally return true only if the GOT flag is present.
+ return (SymbolOp.getTargetFlags() & PPCII::MO_GOT_FLAG);
+ }
+
+ bool addLinkerOpt(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) {
+ MachineFunction *MF = MBB.getParent();
+ // If the linker opt is disabled then just return.
+ if (!EnablePCRelLinkerOpt)
+ return false;
+
+ // Add this linker opt only if we are using PC Relative memops.
+ if (!MF->getSubtarget<PPCSubtarget>().isUsingPCRelativeCalls())
+ return false;
+
+ // Struct to keep track of one def/use pair for a GOT indirect access.
+ struct GOTDefUsePair {
+ MachineBasicBlock::iterator DefInst;
+ MachineBasicBlock::iterator UseInst;
+ Register DefReg;
+ Register UseReg;
+ bool StillValid;
+ };
+ // Vector of def/ues pairs in this basic block.
+ SmallVector<GOTDefUsePair, 4> CandPairs;
+ SmallVector<GOTDefUsePair, 4> ValidPairs;
+ bool MadeChange = false;
+
+ // Run through all of the instructions in the basic block and try to
+ // collect potential pairs of GOT indirect access instructions.
+ for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) {
+ // Look for the initial GOT indirect load.
+ if (isGOTPLDpc(*BBI)) {
+ GOTDefUsePair CurrentPair{BBI, MachineBasicBlock::iterator(),
+ BBI->getOperand(0).getReg(),
+ PPC::NoRegister, true};
+ CandPairs.push_back(CurrentPair);
+ continue;
+ }
+
+ // We haven't encountered any new PLD instructions, nothing to check.
+ if (CandPairs.empty())
+ continue;
+
+ // Run through the candidate pairs and see if any of the registers
+ // defined in the PLD instructions are used by this instruction.
+ // Note: the size of CandPairs can change in the loop.
+ for (unsigned Idx = 0; Idx < CandPairs.size(); Idx++) {
+ GOTDefUsePair &Pair = CandPairs[Idx];
+ // The instruction does not use or modify this PLD's def reg,
+ // ignore it.
+ if (!BBI->readsRegister(Pair.DefReg, TRI) &&
+ !BBI->modifiesRegister(Pair.DefReg, TRI))
+ continue;
+
+ // The use needs to be used in the address compuation and not
+ // as the register being stored for a store.
+ const MachineOperand *UseOp =
+ hasPCRelativeForm(*BBI) ? &BBI->getOperand(2) : nullptr;
+
+ // Check for a valid use.
+ if (UseOp && UseOp->isReg() && UseOp->getReg() == Pair.DefReg &&
+ UseOp->isUse() && UseOp->isKill()) {
+ Pair.UseInst = BBI;
+ Pair.UseReg = BBI->getOperand(0).getReg();
+ ValidPairs.push_back(Pair);
+ }
+ CandPairs.erase(CandPairs.begin() + Idx);
+ }
+ }
+
+ // Go through all of the pairs and check for any more valid uses.
+ for (auto Pair = ValidPairs.begin(); Pair != ValidPairs.end(); Pair++) {
+ // We shouldn't be here if we don't have a valid pair.
+ assert(Pair->UseInst.isValid() && Pair->StillValid &&
+ "Kept an invalid def/use pair for GOT PCRel opt");
+ // We have found a potential pair. Search through the instructions
+ // between the def and the use to see if it is valid to mark this as a
+ // linker opt.
+ MachineBasicBlock::iterator BBI = Pair->DefInst;
+ ++BBI;
+ for (; BBI != Pair->UseInst; ++BBI) {
+ if (BBI->readsRegister(Pair->UseReg, TRI) ||
+ BBI->modifiesRegister(Pair->UseReg, TRI)) {
+ Pair->StillValid = false;
+ break;
+ }
+ }
+
+ if (!Pair->StillValid)
+ continue;
+
+ // The load/store instruction that uses the address from the PLD will
+ // either use a register (for a store) or define a register (for the
+ // load). That register will be added as an implicit def to the PLD
+ // and as an implicit use on the second memory op. This is a precaution
+ // to prevent future passes from using that register between the two
+ // instructions.
+ MachineOperand ImplDef =
+ MachineOperand::CreateReg(Pair->UseReg, true, true);
+ MachineOperand ImplUse =
+ MachineOperand::CreateReg(Pair->UseReg, false, true);
+ Pair->DefInst->addOperand(ImplDef);
+ Pair->UseInst->addOperand(ImplUse);
+
+ // Create the symbol.
+ MCContext &Context = MF->getContext();
+ MCSymbol *Symbol = Context.createNamedTempSymbol("pcrel");
+ MachineOperand PCRelLabel =
+ MachineOperand::CreateMCSymbol(Symbol, PPCII::MO_PCREL_OPT_FLAG);
+ Pair->DefInst->addOperand(*MF, PCRelLabel);
+ Pair->UseInst->addOperand(*MF, PCRelLabel);
+ MadeChange |= true;
+ }
+ return MadeChange;
+ }
+
+ // This function removes redundant pairs of accumulator prime/unprime
+ // instructions. In some situations, it's possible the compiler inserts an
+ // accumulator prime instruction followed by an unprime instruction (e.g.
+ // when we store an accumulator after restoring it from a spill). If the
+ // accumulator is not used between the two, they can be removed. This
+ // function removes these redundant pairs from basic blocks.
+ // The algorithm is quite straightforward - every time we encounter a prime
+ // instruction, the primed register is added to a candidate set. Any use
+ // other than a prime removes the candidate from the set and any de-prime
+ // of a current candidate marks both the prime and de-prime for removal.
+ // This way we ensure we only remove prime/de-prime *pairs* with no
+ // intervening uses.
+ bool removeAccPrimeUnprime(MachineBasicBlock &MBB) {
+ DenseSet<MachineInstr *> InstrsToErase;
+ // Initially, none of the acc registers are candidates.
+ SmallVector<MachineInstr *, 8> Candidates(
+ PPC::UACCRCRegClass.getNumRegs(), nullptr);
+
+ for (MachineInstr &BBI : MBB.instrs()) {
+ unsigned Opc = BBI.getOpcode();
+ // If we are visiting a xxmtacc instruction, we add it and its operand
+ // register to the candidate set.
+ if (Opc == PPC::XXMTACC) {
+ Register Acc = BBI.getOperand(0).getReg();
+ assert(PPC::ACCRCRegClass.contains(Acc) &&
+ "Unexpected register for XXMTACC");
+ Candidates[Acc - PPC::ACC0] = &BBI;
+ }
+ // If we are visiting a xxmfacc instruction and its operand register is
+ // in the candidate set, we mark the two instructions for removal.
+ else if (Opc == PPC::XXMFACC) {
+ Register Acc = BBI.getOperand(0).getReg();
+ assert(PPC::ACCRCRegClass.contains(Acc) &&
+ "Unexpected register for XXMFACC");
+ if (!Candidates[Acc - PPC::ACC0])
+ continue;
+ InstrsToErase.insert(&BBI);
+ InstrsToErase.insert(Candidates[Acc - PPC::ACC0]);
+ }
+ // If we are visiting an instruction using an accumulator register
+ // as operand, we remove it from the candidate set.
+ else {
+ for (MachineOperand &Operand : BBI.operands()) {
+ if (!Operand.isReg())
+ continue;
+ Register Reg = Operand.getReg();
+ if (PPC::ACCRCRegClass.contains(Reg))
+ Candidates[Reg - PPC::ACC0] = nullptr;
+ }
+ }
+ }
+
+ for (MachineInstr *MI : InstrsToErase)
+ MI->eraseFromParent();
+ NumRemovedInPreEmit += InstrsToErase.size();
+ return !InstrsToErase.empty();
+ }
+
bool runOnMachineFunction(MachineFunction &MF) override {
if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) {
// Remove UNENCODED_NOP even when this pass is disabled.
@@ -192,6 +426,8 @@ namespace {
SmallVector<MachineInstr *, 4> InstrsToErase;
for (MachineBasicBlock &MBB : MF) {
Changed |= removeRedundantLIs(MBB, TRI);
+ Changed |= addLinkerOpt(MBB, TRI);
+ Changed |= removeAccPrimeUnprime(MBB);
for (MachineInstr &MI : MBB) {
unsigned Opc = MI.getOpcode();
if (Opc == PPC::UNENCODED_NOP) {