diff options
Diffstat (limited to 'lib/Target/AMDGPU/R600InstrInfo.cpp')
-rw-r--r-- | lib/Target/AMDGPU/R600InstrInfo.cpp | 509 |
1 files changed, 285 insertions, 224 deletions
diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp index 8b6eea17130b..1c5f7ec1b6ef 100644 --- a/lib/Target/AMDGPU/R600InstrInfo.cpp +++ b/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -28,26 +28,17 @@ using namespace llvm; #define GET_INSTRINFO_CTOR_DTOR #include "AMDGPUGenDFAPacketizer.inc" -R600InstrInfo::R600InstrInfo(const AMDGPUSubtarget &st) - : AMDGPUInstrInfo(st), RI() {} - -const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const { - return RI; -} - -bool R600InstrInfo::isTrig(const MachineInstr &MI) const { - return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG; -} +R600InstrInfo::R600InstrInfo(const R600Subtarget &ST) + : AMDGPUInstrInfo(ST), RI(), ST(ST) {} bool R600InstrInfo::isVector(const MachineInstr &MI) const { return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR; } -void -R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const { +void R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const DebugLoc &DL, unsigned DestReg, + unsigned SrcReg, bool KillSrc) const { unsigned VectorComponents = 0; if ((AMDGPU::R600_Reg128RegClass.contains(DestReg) || AMDGPU::R600_Reg128VerticalRegClass.contains(DestReg)) && @@ -91,10 +82,9 @@ bool R600InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB, } bool R600InstrInfo::isMov(unsigned Opcode) const { - - switch(Opcode) { - default: return false; + default: + return false; case AMDGPU::MOV: case AMDGPU::MOV_IMM_F32: case AMDGPU::MOV_IMM_I32: @@ -102,17 +92,6 @@ bool R600InstrInfo::isMov(unsigned Opcode) const { } } -// Some instructions act as place holders to emulate operations that the GPU -// hardware does automatically. This function can be used to check if -// an opcode falls into this category. -bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const { - switch (Opcode) { - default: return false; - case AMDGPU::RETURN: - return true; - } -} - bool R600InstrInfo::isReductionOp(unsigned Opcode) const { return false; } @@ -150,20 +129,16 @@ bool R600InstrInfo::isLDSInstr(unsigned Opcode) const { (TargetFlags & R600_InstFlag::LDS_1A2D)); } -bool R600InstrInfo::isLDSNoRetInstr(unsigned Opcode) const { - return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) == -1; -} - bool R600InstrInfo::isLDSRetInstr(unsigned Opcode) const { return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) != -1; } -bool R600InstrInfo::canBeConsideredALU(const MachineInstr *MI) const { - if (isALUInstr(MI->getOpcode())) +bool R600InstrInfo::canBeConsideredALU(const MachineInstr &MI) const { + if (isALUInstr(MI.getOpcode())) return true; - if (isVector(*MI) || isCubeOp(MI->getOpcode())) + if (isVector(MI) || isCubeOp(MI.getOpcode())) return true; - switch (MI->getOpcode()) { + switch (MI.getOpcode()) { case AMDGPU::PRED_X: case AMDGPU::INTERP_PAIR_XY: case AMDGPU::INTERP_PAIR_ZW: @@ -182,16 +157,16 @@ bool R600InstrInfo::isTransOnly(unsigned Opcode) const { return (get(Opcode).getSchedClass() == AMDGPU::Sched::TransALU); } -bool R600InstrInfo::isTransOnly(const MachineInstr *MI) const { - return isTransOnly(MI->getOpcode()); +bool R600InstrInfo::isTransOnly(const MachineInstr &MI) const { + return isTransOnly(MI.getOpcode()); } bool R600InstrInfo::isVectorOnly(unsigned Opcode) const { return (get(Opcode).getSchedClass() == AMDGPU::Sched::VecALU); } -bool R600InstrInfo::isVectorOnly(const MachineInstr *MI) const { - return isVectorOnly(MI->getOpcode()); +bool R600InstrInfo::isVectorOnly(const MachineInstr &MI) const { + return isVectorOnly(MI.getOpcode()); } bool R600InstrInfo::isExport(unsigned Opcode) const { @@ -202,23 +177,21 @@ bool R600InstrInfo::usesVertexCache(unsigned Opcode) const { return ST.hasVertexCache() && IS_VTX(get(Opcode)); } -bool R600InstrInfo::usesVertexCache(const MachineInstr *MI) const { - const MachineFunction *MF = MI->getParent()->getParent(); - const R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>(); - return MFI->getShaderType() != ShaderType::COMPUTE && - usesVertexCache(MI->getOpcode()); +bool R600InstrInfo::usesVertexCache(const MachineInstr &MI) const { + const MachineFunction *MF = MI.getParent()->getParent(); + return !AMDGPU::isCompute(MF->getFunction()->getCallingConv()) && + usesVertexCache(MI.getOpcode()); } bool R600InstrInfo::usesTextureCache(unsigned Opcode) const { return (!ST.hasVertexCache() && IS_VTX(get(Opcode))) || IS_TEX(get(Opcode)); } -bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const { - const MachineFunction *MF = MI->getParent()->getParent(); - const R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>(); - return (MFI->getShaderType() == ShaderType::COMPUTE && - usesVertexCache(MI->getOpcode())) || - usesTextureCache(MI->getOpcode()); +bool R600InstrInfo::usesTextureCache(const MachineInstr &MI) const { + const MachineFunction *MF = MI.getParent()->getParent(); + return (AMDGPU::isCompute(MF->getFunction()->getCallingConv()) && + usesVertexCache(MI.getOpcode())) || + usesTextureCache(MI.getOpcode()); } bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const { @@ -231,20 +204,21 @@ bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const { } } -bool R600InstrInfo::usesAddressRegister(MachineInstr *MI) const { - return MI->findRegisterUseOperandIdx(AMDGPU::AR_X) != -1; +bool R600InstrInfo::usesAddressRegister(MachineInstr &MI) const { + return MI.findRegisterUseOperandIdx(AMDGPU::AR_X) != -1; } -bool R600InstrInfo::definesAddressRegister(MachineInstr *MI) const { - return MI->findRegisterDefOperandIdx(AMDGPU::AR_X) != -1; +bool R600InstrInfo::definesAddressRegister(MachineInstr &MI) const { + return MI.findRegisterDefOperandIdx(AMDGPU::AR_X) != -1; } -bool R600InstrInfo::readsLDSSrcReg(const MachineInstr *MI) const { - if (!isALUInstr(MI->getOpcode())) { +bool R600InstrInfo::readsLDSSrcReg(const MachineInstr &MI) const { + if (!isALUInstr(MI.getOpcode())) { return false; } - for (MachineInstr::const_mop_iterator I = MI->operands_begin(), - E = MI->operands_end(); I != E; ++I) { + for (MachineInstr::const_mop_iterator I = MI.operands_begin(), + E = MI.operands_end(); + I != E; ++I) { if (!I->isReg() || !I->isUse() || TargetRegisterInfo::isVirtualRegister(I->getReg())) continue; @@ -255,17 +229,6 @@ bool R600InstrInfo::readsLDSSrcReg(const MachineInstr *MI) const { return false; } -int R600InstrInfo::getSrcIdx(unsigned Opcode, unsigned SrcNum) const { - static const unsigned OpTable[] = { - AMDGPU::OpName::src0, - AMDGPU::OpName::src1, - AMDGPU::OpName::src2 - }; - - assert (SrcNum < 3); - return getOperandIdx(Opcode, OpTable[SrcNum]); -} - int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const { static const unsigned SrcSelTable[][2] = { {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel}, @@ -290,10 +253,10 @@ int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const { } SmallVector<std::pair<MachineOperand *, int64_t>, 3> -R600InstrInfo::getSrcs(MachineInstr *MI) const { +R600InstrInfo::getSrcs(MachineInstr &MI) const { SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result; - if (MI->getOpcode() == AMDGPU::DOT_4) { + if (MI.getOpcode() == AMDGPU::DOT_4) { static const unsigned OpTable[8][2] = { {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X}, {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y}, @@ -306,13 +269,13 @@ R600InstrInfo::getSrcs(MachineInstr *MI) const { }; for (unsigned j = 0; j < 8; j++) { - MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(), - OpTable[j][0])); + MachineOperand &MO = + MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][0])); unsigned Reg = MO.getReg(); if (Reg == AMDGPU::ALU_CONST) { - unsigned Sel = MI->getOperand(getOperandIdx(MI->getOpcode(), - OpTable[j][1])).getImm(); - Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel)); + MachineOperand &Sel = + MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1])); + Result.push_back(std::make_pair(&MO, Sel.getImm())); continue; } @@ -327,30 +290,33 @@ R600InstrInfo::getSrcs(MachineInstr *MI) const { }; for (unsigned j = 0; j < 3; j++) { - int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]); + int SrcIdx = getOperandIdx(MI.getOpcode(), OpTable[j][0]); if (SrcIdx < 0) break; - MachineOperand &MO = MI->getOperand(SrcIdx); - unsigned Reg = MI->getOperand(SrcIdx).getReg(); + MachineOperand &MO = MI.getOperand(SrcIdx); + unsigned Reg = MO.getReg(); if (Reg == AMDGPU::ALU_CONST) { - unsigned Sel = MI->getOperand( - getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm(); - Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel)); + MachineOperand &Sel = + MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1])); + Result.push_back(std::make_pair(&MO, Sel.getImm())); continue; } if (Reg == AMDGPU::ALU_LITERAL_X) { - unsigned Imm = MI->getOperand( - getOperandIdx(MI->getOpcode(), AMDGPU::OpName::literal)).getImm(); - Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Imm)); - continue; + MachineOperand &Operand = + MI.getOperand(getOperandIdx(MI.getOpcode(), AMDGPU::OpName::literal)); + if (Operand.isImm()) { + Result.push_back(std::make_pair(&MO, Operand.getImm())); + continue; + } + assert(Operand.isGlobal()); } - Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, 0)); + Result.push_back(std::make_pair(&MO, 0)); } return Result; } -std::vector<std::pair<int, unsigned> > -R600InstrInfo::ExtractSrcs(MachineInstr *MI, +std::vector<std::pair<int, unsigned>> +R600InstrInfo::ExtractSrcs(MachineInstr &MI, const DenseMap<unsigned, unsigned> &PV, unsigned &ConstCount) const { ConstCount = 0; @@ -360,13 +326,13 @@ R600InstrInfo::ExtractSrcs(MachineInstr *MI, unsigned i = 0; for (unsigned n = Srcs.size(); i < n; ++i) { unsigned Reg = Srcs[i].first->getReg(); - unsigned Index = RI.getEncodingValue(Reg) & 0xff; + int Index = RI.getEncodingValue(Reg) & 0xff; if (Reg == AMDGPU::OQAP) { - Result.push_back(std::pair<int, unsigned>(Index, 0)); + Result.push_back(std::make_pair(Index, 0U)); } if (PV.find(Reg) != PV.end()) { // 255 is used to tells its a PS/PV reg - Result.push_back(std::pair<int, unsigned>(255, 0)); + Result.push_back(std::make_pair(255, 0U)); continue; } if (Index > 127) { @@ -375,7 +341,7 @@ R600InstrInfo::ExtractSrcs(MachineInstr *MI, continue; } unsigned Chan = RI.getHWRegChan(Reg); - Result.push_back(std::pair<int, unsigned>(Index, Chan)); + Result.push_back(std::make_pair(Index, Chan)); } for (; i < 3; ++i) Result.push_back(DummyPair); @@ -411,8 +377,7 @@ Swizzle(std::vector<std::pair<int, unsigned> > Src, return Src; } -static unsigned -getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) { +static unsigned getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) { switch (Swz) { case R600InstrInfo::ALU_VEC_012_SCL_210: { unsigned Cycles[3] = { 2, 1, 0}; @@ -432,7 +397,6 @@ getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) { } default: llvm_unreachable("Wrong Swizzle for Trans Slot"); - return 0; } } @@ -557,7 +521,7 @@ R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG, unsigned ConstCount; BankSwizzle TransBS = ALU_VEC_012_SCL_210; for (unsigned i = 0, e = IG.size(); i < e; ++i) { - IGSrcs.push_back(ExtractSrcs(IG[i], PV, ConstCount)); + IGSrcs.push_back(ExtractSrcs(*IG[i], PV, ConstCount)); unsigned Op = getOperandIdx(IG[i]->getOpcode(), AMDGPU::OpName::bank_swizzle); ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle) @@ -624,14 +588,13 @@ R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs) std::vector<unsigned> Consts; SmallSet<int64_t, 4> Literals; for (unsigned i = 0, n = MIs.size(); i < n; i++) { - MachineInstr *MI = MIs[i]; - if (!isALUInstr(MI->getOpcode())) + MachineInstr &MI = *MIs[i]; + if (!isALUInstr(MI.getOpcode())) continue; ArrayRef<std::pair<MachineOperand *, int64_t>> Srcs = getSrcs(MI); - for (unsigned j = 0, e = Srcs.size(); j < e; j++) { - std::pair<MachineOperand *, unsigned> Src = Srcs[j]; + for (const auto &Src:Srcs) { if (Src.first->getReg() == AMDGPU::ALU_LITERAL_X) Literals.insert(Src.second); if (Literals.size() > 4) @@ -652,7 +615,7 @@ R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs) DFAPacketizer * R600InstrInfo::CreateTargetScheduleState(const TargetSubtargetInfo &STI) const { const InstrItineraryData *II = STI.getInstrItineraryData(); - return static_cast<const AMDGPUSubtarget &>(STI).createDFAPacketizer(II); + return static_cast<const R600Subtarget &>(STI).createDFAPacketizer(II); } static bool @@ -670,9 +633,9 @@ findFirstPredicateSetterFrom(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) { while (I != MBB.begin()) { --I; - MachineInstr *MI = I; - if (isPredicateSetter(MI->getOpcode())) - return MI; + MachineInstr &MI = *I; + if (isPredicateSetter(MI.getOpcode())) + return &MI; } return nullptr; @@ -688,12 +651,11 @@ static bool isBranch(unsigned Opcode) { Opcode == AMDGPU::BRANCH_COND_f32; } -bool -R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, - MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl<MachineOperand> &Cond, - bool AllowModify) const { +bool R600InstrInfo::analyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const { // Most of the following comes from the ARM implementation of AnalyzeBranch // If the block has no terminators, it just falls into the block after it. @@ -716,21 +678,21 @@ R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, I->removeFromParent(); I = PriorI; } - MachineInstr *LastInst = I; + MachineInstr &LastInst = *I; // If there is only one terminator instruction, process it. - unsigned LastOpc = LastInst->getOpcode(); + unsigned LastOpc = LastInst.getOpcode(); if (I == MBB.begin() || !isJump(static_cast<MachineInstr *>(--I)->getOpcode())) { if (LastOpc == AMDGPU::JUMP) { - TBB = LastInst->getOperand(0).getMBB(); + TBB = LastInst.getOperand(0).getMBB(); return false; } else if (LastOpc == AMDGPU::JUMP_COND) { - MachineInstr *predSet = I; + auto predSet = I; while (!isPredicateSetter(predSet->getOpcode())) { predSet = --I; } - TBB = LastInst->getOperand(0).getMBB(); + TBB = LastInst.getOperand(0).getMBB(); Cond.push_back(predSet->getOperand(1)); Cond.push_back(predSet->getOperand(2)); Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); @@ -740,17 +702,17 @@ R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, } // Get the instruction before it if it is a terminator. - MachineInstr *SecondLastInst = I; - unsigned SecondLastOpc = SecondLastInst->getOpcode(); + MachineInstr &SecondLastInst = *I; + unsigned SecondLastOpc = SecondLastInst.getOpcode(); // If the block ends with a B and a Bcc, handle it. if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) { - MachineInstr *predSet = --I; + auto predSet = --I; while (!isPredicateSetter(predSet->getOpcode())) { predSet = --I; } - TBB = SecondLastInst->getOperand(0).getMBB(); - FBB = LastInst->getOperand(0).getMBB(); + TBB = SecondLastInst.getOperand(0).getMBB(); + FBB = LastInst.getOperand(0).getMBB(); Cond.push_back(predSet->getOperand(1)); Cond.push_back(predSet->getOperand(2)); Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); @@ -772,12 +734,11 @@ MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) { return MBB.end(); } -unsigned -R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, - MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - ArrayRef<MachineOperand> Cond, - DebugLoc DL) const { +unsigned R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + ArrayRef<MachineOperand> Cond, + const DebugLoc &DL) const { assert(TBB && "InsertBranch must not be told to insert a fallthrough"); if (!FBB) { @@ -787,7 +748,7 @@ R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, } else { MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); assert(PredSet && "No previous predicate !"); - addFlag(PredSet, 0, MO_FLAG_PUSH); + addFlag(*PredSet, 0, MO_FLAG_PUSH); PredSet->getOperand(2).setImm(Cond[1].getImm()); BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND)) @@ -803,7 +764,7 @@ R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, } else { MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); assert(PredSet && "No previous predicate !"); - addFlag(PredSet, 0, MO_FLAG_PUSH); + addFlag(*PredSet, 0, MO_FLAG_PUSH); PredSet->getOperand(2).setImm(Cond[1].getImm()); BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND)) .addMBB(TBB) @@ -835,7 +796,7 @@ R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { return 0; case AMDGPU::JUMP_COND: { MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); - clearFlag(predSet, 0, MO_FLAG_PUSH); + clearFlag(*predSet, 0, MO_FLAG_PUSH); I->eraseFromParent(); MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); if (CfAlu == MBB.end()) @@ -860,7 +821,7 @@ R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { return 1; case AMDGPU::JUMP_COND: { MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); - clearFlag(predSet, 0, MO_FLAG_PUSH); + clearFlag(*predSet, 0, MO_FLAG_PUSH); I->eraseFromParent(); MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); if (CfAlu == MBB.end()) @@ -876,13 +837,12 @@ R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { return 2; } -bool -R600InstrInfo::isPredicated(const MachineInstr *MI) const { - int idx = MI->findFirstPredOperandIdx(); +bool R600InstrInfo::isPredicated(const MachineInstr &MI) const { + int idx = MI.findFirstPredOperandIdx(); if (idx < 0) return false; - unsigned Reg = MI->getOperand(idx).getReg(); + unsigned Reg = MI.getOperand(idx).getReg(); switch (Reg) { default: return false; case AMDGPU::PRED_SEL_ONE: @@ -892,25 +852,22 @@ R600InstrInfo::isPredicated(const MachineInstr *MI) const { } } -bool -R600InstrInfo::isPredicable(MachineInstr *MI) const { +bool R600InstrInfo::isPredicable(MachineInstr &MI) const { // XXX: KILL* instructions can be predicated, but they must be the last // instruction in a clause, so this means any instructions after them cannot // be predicated. Until we have proper support for instruction clauses in the // backend, we will mark KILL* instructions as unpredicable. - if (MI->getOpcode() == AMDGPU::KILLGT) { + if (MI.getOpcode() == AMDGPU::KILLGT) { return false; - } else if (MI->getOpcode() == AMDGPU::CF_ALU) { + } else if (MI.getOpcode() == AMDGPU::CF_ALU) { // If the clause start in the middle of MBB then the MBB has more // than a single clause, unable to predicate several clauses. - if (MI->getParent()->begin() != MachineBasicBlock::iterator(MI)) + if (MI.getParent()->begin() != MachineBasicBlock::iterator(MI)) return false; // TODO: We don't support KC merging atm - if (MI->getOperand(3).getImm() != 0 || MI->getOperand(4).getImm() != 0) - return false; - return true; - } else if (isVector(*MI)) { + return MI.getOperand(3).getImm() == 0 && MI.getOperand(4).getImm() == 0; + } else if (isVector(MI)) { return false; } else { return AMDGPUInstrInfo::isPredicable(MI); @@ -986,48 +943,39 @@ R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) con return false; } -bool -R600InstrInfo::DefinesPredicate(MachineInstr *MI, - std::vector<MachineOperand> &Pred) const { - return isPredicateSetter(MI->getOpcode()); +bool R600InstrInfo::DefinesPredicate(MachineInstr &MI, + std::vector<MachineOperand> &Pred) const { + return isPredicateSetter(MI.getOpcode()); } -bool -R600InstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1, - ArrayRef<MachineOperand> Pred2) const { - return false; -} - - -bool -R600InstrInfo::PredicateInstruction(MachineInstr *MI, - ArrayRef<MachineOperand> Pred) const { - int PIdx = MI->findFirstPredOperandIdx(); +bool R600InstrInfo::PredicateInstruction(MachineInstr &MI, + ArrayRef<MachineOperand> Pred) const { + int PIdx = MI.findFirstPredOperandIdx(); - if (MI->getOpcode() == AMDGPU::CF_ALU) { - MI->getOperand(8).setImm(0); + if (MI.getOpcode() == AMDGPU::CF_ALU) { + MI.getOperand(8).setImm(0); return true; } - if (MI->getOpcode() == AMDGPU::DOT_4) { - MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_X)) + if (MI.getOpcode() == AMDGPU::DOT_4) { + MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_X)) .setReg(Pred[2].getReg()); - MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Y)) + MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_Y)) .setReg(Pred[2].getReg()); - MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Z)) + MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_Z)) .setReg(Pred[2].getReg()); - MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_W)) + MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_W)) .setReg(Pred[2].getReg()); - MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); + MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); return true; } if (PIdx != -1) { - MachineOperand &PMO = MI->getOperand(PIdx); + MachineOperand &PMO = MI.getOperand(PIdx); PMO.setReg(Pred[2].getReg()); - MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); + MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); return true; } @@ -1035,45 +983,94 @@ R600InstrInfo::PredicateInstruction(MachineInstr *MI, return false; } -unsigned int R600InstrInfo::getPredicationCost(const MachineInstr *) const { +unsigned int R600InstrInfo::getPredicationCost(const MachineInstr &) const { return 2; } unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData, - const MachineInstr *MI, + const MachineInstr &, unsigned *PredCost) const { if (PredCost) *PredCost = 2; return 2; } -bool R600InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { +unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex, + unsigned Channel) const { + assert(Channel == 0); + return RegIndex; +} - switch(MI->getOpcode()) { - default: return AMDGPUInstrInfo::expandPostRAPseudo(MI); +bool R600InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { + switch (MI.getOpcode()) { + default: { + MachineBasicBlock *MBB = MI.getParent(); + int OffsetOpIdx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::addr); + // addr is a custom operand with multiple MI operands, and only the + // first MI operand is given a name. + int RegOpIdx = OffsetOpIdx + 1; + int ChanOpIdx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::chan); + if (isRegisterLoad(MI)) { + int DstOpIdx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst); + unsigned RegIndex = MI.getOperand(RegOpIdx).getImm(); + unsigned Channel = MI.getOperand(ChanOpIdx).getImm(); + unsigned Address = calculateIndirectAddress(RegIndex, Channel); + unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg(); + if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) { + buildMovInstr(MBB, MI, MI.getOperand(DstOpIdx).getReg(), + getIndirectAddrRegClass()->getRegister(Address)); + } else { + buildIndirectRead(MBB, MI, MI.getOperand(DstOpIdx).getReg(), Address, + OffsetReg); + } + } else if (isRegisterStore(MI)) { + int ValOpIdx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::val); + unsigned RegIndex = MI.getOperand(RegOpIdx).getImm(); + unsigned Channel = MI.getOperand(ChanOpIdx).getImm(); + unsigned Address = calculateIndirectAddress(RegIndex, Channel); + unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg(); + if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) { + buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address), + MI.getOperand(ValOpIdx).getReg()); + } else { + buildIndirectWrite(MBB, MI, MI.getOperand(ValOpIdx).getReg(), + calculateIndirectAddress(RegIndex, Channel), + OffsetReg); + } + } else { + return false; + } + + MBB->erase(MI); + return true; + } case AMDGPU::R600_EXTRACT_ELT_V2: case AMDGPU::R600_EXTRACT_ELT_V4: - buildIndirectRead(MI->getParent(), MI, MI->getOperand(0).getReg(), - RI.getHWRegIndex(MI->getOperand(1).getReg()), // Address - MI->getOperand(2).getReg(), - RI.getHWRegChan(MI->getOperand(1).getReg())); + buildIndirectRead(MI.getParent(), MI, MI.getOperand(0).getReg(), + RI.getHWRegIndex(MI.getOperand(1).getReg()), // Address + MI.getOperand(2).getReg(), + RI.getHWRegChan(MI.getOperand(1).getReg())); break; case AMDGPU::R600_INSERT_ELT_V2: case AMDGPU::R600_INSERT_ELT_V4: - buildIndirectWrite(MI->getParent(), MI, MI->getOperand(2).getReg(), // Value - RI.getHWRegIndex(MI->getOperand(1).getReg()), // Address - MI->getOperand(3).getReg(), // Offset - RI.getHWRegChan(MI->getOperand(1).getReg())); // Channel + buildIndirectWrite(MI.getParent(), MI, MI.getOperand(2).getReg(), // Value + RI.getHWRegIndex(MI.getOperand(1).getReg()), // Address + MI.getOperand(3).getReg(), // Offset + RI.getHWRegChan(MI.getOperand(1).getReg())); // Channel break; } - MI->eraseFromParent(); + MI.eraseFromParent(); return true; } void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved, const MachineFunction &MF) const { - const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering *>( - MF.getSubtarget().getFrameLowering()); + const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>(); + const R600FrameLowering *TFL = ST.getFrameLowering(); unsigned StackWidth = TFL->getStackWidth(MF); int End = getIndirectIndexEnd(MF); @@ -1091,13 +1088,6 @@ void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved, } } -unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex, - unsigned Channel) const { - // XXX: Remove when we support a stack width > 2 - assert(Channel == 0); - return RegIndex; -} - const TargetRegisterClass *R600InstrInfo::getIndirectAddrRegClass() const { return &AMDGPU::R600_TReg32_XRegClass; } @@ -1124,13 +1114,13 @@ MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB, } MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, AMDGPU::AR_X, OffsetReg); - setImmOperand(MOVA, AMDGPU::OpName::write, 0); + setImmOperand(*MOVA, AMDGPU::OpName::write, 0); MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, AddrReg, ValueReg) .addReg(AMDGPU::AR_X, RegState::Implicit | RegState::Kill); - setImmOperand(Mov, AMDGPU::OpName::dst_rel, 1); + setImmOperand(*Mov, AMDGPU::OpName::dst_rel, 1); return Mov; } @@ -1157,17 +1147,74 @@ MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB, MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, AMDGPU::AR_X, OffsetReg); - setImmOperand(MOVA, AMDGPU::OpName::write, 0); + setImmOperand(*MOVA, AMDGPU::OpName::write, 0); MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, ValueReg, AddrReg) .addReg(AMDGPU::AR_X, RegState::Implicit | RegState::Kill); - setImmOperand(Mov, AMDGPU::OpName::src0_rel, 1); + setImmOperand(*Mov, AMDGPU::OpName::src0_rel, 1); return Mov; } +int R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const { + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const MachineFrameInfo *MFI = MF.getFrameInfo(); + int Offset = -1; + + if (MFI->getNumObjects() == 0) { + return -1; + } + + if (MRI.livein_empty()) { + return 0; + } + + const TargetRegisterClass *IndirectRC = getIndirectAddrRegClass(); + for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(), + LE = MRI.livein_end(); + LI != LE; ++LI) { + unsigned Reg = LI->first; + if (TargetRegisterInfo::isVirtualRegister(Reg) || + !IndirectRC->contains(Reg)) + continue; + + unsigned RegIndex; + unsigned RegEnd; + for (RegIndex = 0, RegEnd = IndirectRC->getNumRegs(); RegIndex != RegEnd; + ++RegIndex) { + if (IndirectRC->getRegister(RegIndex) == Reg) + break; + } + Offset = std::max(Offset, (int)RegIndex); + } + + return Offset + 1; +} + +int R600InstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const { + int Offset = 0; + const MachineFrameInfo *MFI = MF.getFrameInfo(); + + // Variable sized objects are not supported + if (MFI->hasVarSizedObjects()) { + return -1; + } + + if (MFI->getNumObjects() == 0) { + return -1; + } + + const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>(); + const R600FrameLowering *TFL = ST.getFrameLowering(); + + unsigned IgnoredFrameReg; + Offset = TFL->getFrameIndexReference(MF, -1, IgnoredFrameReg); + + return getIndirectIndexBegin(MF) + Offset; +} + unsigned R600InstrInfo::getMaxAlusPerClause() const { return 115; } @@ -1256,7 +1303,7 @@ MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction( const { assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented"); unsigned Opcode; - if (ST.getGeneration() <= AMDGPUSubtarget::R700) + if (ST.getGeneration() <= R600Subtarget::R700) Opcode = AMDGPU::DOT4_r600; else Opcode = AMDGPU::DOT4_eg; @@ -1293,7 +1340,7 @@ MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction( MachineOperand &MO = MI->getOperand( getOperandIdx(MI->getOpcode(), getSlotedOps(Operands[i], Slot))); assert (MO.isImm()); - setImmOperand(MIB, Operands[i], MO.getImm()); + setImmOperand(*MIB, Operands[i], MO.getImm()); } MIB->getOperand(20).setImm(0); return MIB; @@ -1305,7 +1352,7 @@ MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB, uint64_t Imm) const { MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg, AMDGPU::ALU_LITERAL_X); - setImmOperand(MovImm, AMDGPU::OpName::literal, Imm); + setImmOperand(*MovImm, AMDGPU::OpName::literal, Imm); return MovImm; } @@ -1323,25 +1370,21 @@ int R600InstrInfo::getOperandIdx(unsigned Opcode, unsigned Op) const { return AMDGPU::getNamedOperandIdx(Opcode, Op); } -void R600InstrInfo::setImmOperand(MachineInstr *MI, unsigned Op, +void R600InstrInfo::setImmOperand(MachineInstr &MI, unsigned Op, int64_t Imm) const { - int Idx = getOperandIdx(*MI, Op); + int Idx = getOperandIdx(MI, Op); assert(Idx != -1 && "Operand not supported for this instruction."); - assert(MI->getOperand(Idx).isImm()); - MI->getOperand(Idx).setImm(Imm); + assert(MI.getOperand(Idx).isImm()); + MI.getOperand(Idx).setImm(Imm); } //===----------------------------------------------------------------------===// // Instruction flag getters/setters //===----------------------------------------------------------------------===// -bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const { - return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0; -} - -MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI, unsigned SrcIdx, +MachineOperand &R600InstrInfo::getFlagOp(MachineInstr &MI, unsigned SrcIdx, unsigned Flag) const { - unsigned TargetFlags = get(MI->getOpcode()).TSFlags; + unsigned TargetFlags = get(MI.getOpcode()).TSFlags; int FlagIndex = 0; if (Flag != 0) { // If we pass something other than the default value of Flag to this @@ -1351,20 +1394,26 @@ MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI, unsigned SrcIdx, bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3; switch (Flag) { case MO_FLAG_CLAMP: - FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::clamp); + FlagIndex = getOperandIdx(MI, AMDGPU::OpName::clamp); break; case MO_FLAG_MASK: - FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::write); + FlagIndex = getOperandIdx(MI, AMDGPU::OpName::write); break; case MO_FLAG_NOT_LAST: case MO_FLAG_LAST: - FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::last); + FlagIndex = getOperandIdx(MI, AMDGPU::OpName::last); break; case MO_FLAG_NEG: switch (SrcIdx) { - case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_neg); break; - case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_neg); break; - case 2: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src2_neg); break; + case 0: + FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src0_neg); + break; + case 1: + FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src1_neg); + break; + case 2: + FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src2_neg); + break; } break; @@ -1373,8 +1422,12 @@ MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI, unsigned SrcIdx, "instructions."); (void)IsOP3; switch (SrcIdx) { - case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_abs); break; - case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_abs); break; + case 0: + FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src0_abs); + break; + case 1: + FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src1_abs); + break; } break; @@ -1389,14 +1442,14 @@ MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI, unsigned SrcIdx, "Instruction flags not supported for this instruction"); } - MachineOperand &FlagOp = MI->getOperand(FlagIndex); + MachineOperand &FlagOp = MI.getOperand(FlagIndex); assert(FlagOp.isImm()); return FlagOp; } -void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand, +void R600InstrInfo::addFlag(MachineInstr &MI, unsigned Operand, unsigned Flag) const { - unsigned TargetFlags = get(MI->getOpcode()).TSFlags; + unsigned TargetFlags = get(MI.getOpcode()).TSFlags; if (Flag == 0) { return; } @@ -1415,9 +1468,9 @@ void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand, } } -void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand, +void R600InstrInfo::clearFlag(MachineInstr &MI, unsigned Operand, unsigned Flag) const { - unsigned TargetFlags = get(MI->getOpcode()).TSFlags; + unsigned TargetFlags = get(MI.getOpcode()).TSFlags; if (HAS_NATIVE_OPERANDS(TargetFlags)) { MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag); FlagOp.setImm(0); @@ -1428,3 +1481,11 @@ void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand, FlagOp.setImm(InstFlags); } } + +bool R600InstrInfo::isRegisterStore(const MachineInstr &MI) const { + return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_STORE; +} + +bool R600InstrInfo::isRegisterLoad(const MachineInstr &MI) const { + return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_LOAD; +} |