diff options
Diffstat (limited to 'lib/Target/R600/R600ClauseMergePass.cpp')
-rw-r--r-- | lib/Target/R600/R600ClauseMergePass.cpp | 204 |
1 files changed, 204 insertions, 0 deletions
diff --git a/lib/Target/R600/R600ClauseMergePass.cpp b/lib/Target/R600/R600ClauseMergePass.cpp new file mode 100644 index 000000000000..33d2ca32577d --- /dev/null +++ b/lib/Target/R600/R600ClauseMergePass.cpp @@ -0,0 +1,204 @@ +//===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer. +/// This pass is merging consecutive CFAlus where applicable. +/// It needs to be called after IfCvt for best results. +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "r600mergeclause" +#include "AMDGPU.h" +#include "R600Defines.h" +#include "R600InstrInfo.h" +#include "R600MachineFunctionInfo.h" +#include "R600RegisterInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { + +static bool isCFAlu(const MachineInstr *MI) { + switch (MI->getOpcode()) { + case AMDGPU::CF_ALU: + case AMDGPU::CF_ALU_PUSH_BEFORE: + return true; + default: + return false; + } +} + +class R600ClauseMergePass : public MachineFunctionPass { + +private: + static char ID; + const R600InstrInfo *TII; + + unsigned getCFAluSize(const MachineInstr *MI) const; + bool isCFAluEnabled(const MachineInstr *MI) const; + + /// IfCvt pass can generate "disabled" ALU clause marker that need to be + /// removed and their content affected to the previous alu clause. + /// This function parse instructions after CFAlu untill it find a disabled + /// CFAlu and merge the content, or an enabled CFAlu. + void cleanPotentialDisabledCFAlu(MachineInstr *CFAlu) const; + + /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if + /// it is the case. + bool mergeIfPossible(MachineInstr *RootCFAlu, const MachineInstr *LatrCFAlu) + const; + +public: + R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID) { } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const; +}; + +char R600ClauseMergePass::ID = 0; + +unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr *MI) const { + assert(isCFAlu(MI)); + return MI->getOperand( + TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::COUNT)).getImm(); +} + +bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr *MI) const { + assert(isCFAlu(MI)); + return MI->getOperand( + TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::Enabled)).getImm(); +} + +void R600ClauseMergePass::cleanPotentialDisabledCFAlu(MachineInstr *CFAlu) + const { + int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT); + MachineBasicBlock::iterator I = CFAlu, E = CFAlu->getParent()->end(); + I++; + do { + while (I!= E && !isCFAlu(I)) + I++; + if (I == E) + return; + MachineInstr *MI = I++; + if (isCFAluEnabled(MI)) + break; + CFAlu->getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI)); + MI->eraseFromParent(); + } while (I != E); +} + +bool R600ClauseMergePass::mergeIfPossible(MachineInstr *RootCFAlu, + const MachineInstr *LatrCFAlu) const { + assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu)); + int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT); + unsigned RootInstCount = getCFAluSize(RootCFAlu), + LaterInstCount = getCFAluSize(LatrCFAlu); + unsigned CumuledInsts = RootInstCount + LaterInstCount; + if (CumuledInsts >= TII->getMaxAlusPerClause()) { + DEBUG(dbgs() << "Excess inst counts\n"); + return false; + } + if (RootCFAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE) + return false; + // Is KCache Bank 0 compatible ? + int Mode0Idx = + TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0); + int KBank0Idx = + TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0); + int KBank0LineIdx = + TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0); + if (LatrCFAlu->getOperand(Mode0Idx).getImm() && + RootCFAlu->getOperand(Mode0Idx).getImm() && + (LatrCFAlu->getOperand(KBank0Idx).getImm() != + RootCFAlu->getOperand(KBank0Idx).getImm() || + LatrCFAlu->getOperand(KBank0LineIdx).getImm() != + RootCFAlu->getOperand(KBank0LineIdx).getImm())) { + DEBUG(dbgs() << "Wrong KC0\n"); + return false; + } + // Is KCache Bank 1 compatible ? + int Mode1Idx = + TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1); + int KBank1Idx = + TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1); + int KBank1LineIdx = + TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1); + if (LatrCFAlu->getOperand(Mode1Idx).getImm() && + RootCFAlu->getOperand(Mode1Idx).getImm() && + (LatrCFAlu->getOperand(KBank1Idx).getImm() != + RootCFAlu->getOperand(KBank1Idx).getImm() || + LatrCFAlu->getOperand(KBank1LineIdx).getImm() != + RootCFAlu->getOperand(KBank1LineIdx).getImm())) { + DEBUG(dbgs() << "Wrong KC0\n"); + return false; + } + if (LatrCFAlu->getOperand(Mode0Idx).getImm()) { + RootCFAlu->getOperand(Mode0Idx).setImm( + LatrCFAlu->getOperand(Mode0Idx).getImm()); + RootCFAlu->getOperand(KBank0Idx).setImm( + LatrCFAlu->getOperand(KBank0Idx).getImm()); + RootCFAlu->getOperand(KBank0LineIdx).setImm( + LatrCFAlu->getOperand(KBank0LineIdx).getImm()); + } + if (LatrCFAlu->getOperand(Mode1Idx).getImm()) { + RootCFAlu->getOperand(Mode1Idx).setImm( + LatrCFAlu->getOperand(Mode1Idx).getImm()); + RootCFAlu->getOperand(KBank1Idx).setImm( + LatrCFAlu->getOperand(KBank1Idx).getImm()); + RootCFAlu->getOperand(KBank1LineIdx).setImm( + LatrCFAlu->getOperand(KBank1LineIdx).getImm()); + } + RootCFAlu->getOperand(CntIdx).setImm(CumuledInsts); + RootCFAlu->setDesc(TII->get(LatrCFAlu->getOpcode())); + return true; +} + +bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) { + TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo()); + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); + BB != BB_E; ++BB) { + MachineBasicBlock &MBB = *BB; + MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + MachineBasicBlock::iterator LatestCFAlu = E; + while (I != E) { + MachineInstr *MI = I++; + if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) || + TII->mustBeLastInClause(MI->getOpcode())) + LatestCFAlu = E; + if (!isCFAlu(MI)) + continue; + cleanPotentialDisabledCFAlu(MI); + + if (LatestCFAlu != E && mergeIfPossible(LatestCFAlu, MI)) { + MI->eraseFromParent(); + } else { + assert(MI->getOperand(8).getImm() && "CF ALU instruction disabled"); + LatestCFAlu = MI; + } + } + } + return false; +} + +const char *R600ClauseMergePass::getPassName() const { + return "R600 Merge Clause Markers Pass"; +} + +} // end anonymous namespace + + +llvm::FunctionPass *llvm::createR600ClauseMergePass(TargetMachine &TM) { + return new R600ClauseMergePass(TM); +} |