diff options
Diffstat (limited to 'lib/Target/AMDGPU/SIInsertSkips.cpp')
-rw-r--r-- | lib/Target/AMDGPU/SIInsertSkips.cpp | 59 |
1 files changed, 34 insertions, 25 deletions
diff --git a/lib/Target/AMDGPU/SIInsertSkips.cpp b/lib/Target/AMDGPU/SIInsertSkips.cpp index a2f844d7854e..61c8f359e168 100644 --- a/lib/Target/AMDGPU/SIInsertSkips.cpp +++ b/lib/Target/AMDGPU/SIInsertSkips.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // /// \file -/// \brief This pass inserts branches on the 0 exec mask over divergent branches +/// This pass inserts branches on the 0 exec mask over divergent branches /// branches when it's expected that jumping over the untaken control flow will /// be cheaper than having every workitem no-op through it. // @@ -18,6 +18,7 @@ #include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -210,65 +211,73 @@ void SIInsertSkips::kill(MachineInstr &MI) { switch (MI.getOperand(2).getImm()) { case ISD::SETOEQ: case ISD::SETEQ: - Opcode = AMDGPU::V_CMPX_EQ_F32_e32; + Opcode = AMDGPU::V_CMPX_EQ_F32_e64; break; case ISD::SETOGT: case ISD::SETGT: - Opcode = AMDGPU::V_CMPX_LT_F32_e32; + Opcode = AMDGPU::V_CMPX_LT_F32_e64; break; case ISD::SETOGE: case ISD::SETGE: - Opcode = AMDGPU::V_CMPX_LE_F32_e32; + Opcode = AMDGPU::V_CMPX_LE_F32_e64; break; case ISD::SETOLT: case ISD::SETLT: - Opcode = AMDGPU::V_CMPX_GT_F32_e32; + Opcode = AMDGPU::V_CMPX_GT_F32_e64; break; case ISD::SETOLE: case ISD::SETLE: - Opcode = AMDGPU::V_CMPX_GE_F32_e32; + Opcode = AMDGPU::V_CMPX_GE_F32_e64; break; case ISD::SETONE: case ISD::SETNE: - Opcode = AMDGPU::V_CMPX_LG_F32_e32; + Opcode = AMDGPU::V_CMPX_LG_F32_e64; break; case ISD::SETO: - Opcode = AMDGPU::V_CMPX_O_F32_e32; + Opcode = AMDGPU::V_CMPX_O_F32_e64; break; case ISD::SETUO: - Opcode = AMDGPU::V_CMPX_U_F32_e32; + Opcode = AMDGPU::V_CMPX_U_F32_e64; break; case ISD::SETUEQ: - Opcode = AMDGPU::V_CMPX_NLG_F32_e32; + Opcode = AMDGPU::V_CMPX_NLG_F32_e64; break; case ISD::SETUGT: - Opcode = AMDGPU::V_CMPX_NGE_F32_e32; + Opcode = AMDGPU::V_CMPX_NGE_F32_e64; break; case ISD::SETUGE: - Opcode = AMDGPU::V_CMPX_NGT_F32_e32; + Opcode = AMDGPU::V_CMPX_NGT_F32_e64; break; case ISD::SETULT: - Opcode = AMDGPU::V_CMPX_NLE_F32_e32; + Opcode = AMDGPU::V_CMPX_NLE_F32_e64; break; case ISD::SETULE: - Opcode = AMDGPU::V_CMPX_NLT_F32_e32; + Opcode = AMDGPU::V_CMPX_NLT_F32_e64; break; case ISD::SETUNE: - Opcode = AMDGPU::V_CMPX_NEQ_F32_e32; + Opcode = AMDGPU::V_CMPX_NEQ_F32_e64; break; default: llvm_unreachable("invalid ISD:SET cond code"); } - // TODO: Allow this: - if (!MI.getOperand(0).isReg() || - !TRI->isVGPR(MBB.getParent()->getRegInfo(), - MI.getOperand(0).getReg())) - llvm_unreachable("SI_KILL operand should be a VGPR"); - - BuildMI(MBB, &MI, DL, TII->get(Opcode)) - .add(MI.getOperand(1)) - .add(MI.getOperand(0)); + assert(MI.getOperand(0).isReg()); + + if (TRI->isVGPR(MBB.getParent()->getRegInfo(), + MI.getOperand(0).getReg())) { + Opcode = AMDGPU::getVOPe32(Opcode); + BuildMI(MBB, &MI, DL, TII->get(Opcode)) + .add(MI.getOperand(1)) + .add(MI.getOperand(0)); + } else { + BuildMI(MBB, &MI, DL, TII->get(Opcode)) + .addReg(AMDGPU::VCC, RegState::Define) + .addImm(0) // src0 modifiers + .add(MI.getOperand(1)) + .addImm(0) // src1 modifiers + .add(MI.getOperand(0)) + .addImm(0); // omod + } break; } case AMDGPU::SI_KILL_I1_TERMINATOR: { @@ -330,7 +339,7 @@ bool SIInsertSkips::skipMaskBranch(MachineInstr &MI, } bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) { - const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); TII = ST.getInstrInfo(); TRI = &TII->getRegisterInfo(); SkipThreshold = SkipThresholdFlag; |