aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp220
1 files changed, 116 insertions, 104 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 8482dbfec250..ed1dc77bd545 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -11,25 +11,11 @@
//===----------------------------------------------------------------------===//
#include "GCNHazardRecognizer.h"
-#include "AMDGPUSubtarget.h"
-#include "SIDefines.h"
-#include "SIInstrInfo.h"
-#include "SIRegisterInfo.h"
+#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/Support/ErrorHandling.h"
-#include <algorithm>
-#include <cassert>
-#include <limits>
-#include <set>
-#include <vector>
+#include "llvm/Support/TargetParser.h"
using namespace llvm;
@@ -50,6 +36,10 @@ GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
TSchedModel.init(&ST);
}
+void GCNHazardRecognizer::Reset() {
+ EmittedInstrs.clear();
+}
+
void GCNHazardRecognizer::EmitInstruction(SUnit *SU) {
EmitInstruction(SU->getInstr());
}
@@ -59,7 +49,7 @@ void GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) {
}
static bool isDivFMas(unsigned Opcode) {
- return Opcode == AMDGPU::V_DIV_FMAS_F32 || Opcode == AMDGPU::V_DIV_FMAS_F64;
+ return Opcode == AMDGPU::V_DIV_FMAS_F32_e64 || Opcode == AMDGPU::V_DIV_FMAS_F64_e64;
}
static bool isSGetReg(unsigned Opcode) {
@@ -67,7 +57,14 @@ static bool isSGetReg(unsigned Opcode) {
}
static bool isSSetReg(unsigned Opcode) {
- return Opcode == AMDGPU::S_SETREG_B32 || Opcode == AMDGPU::S_SETREG_IMM32_B32;
+ switch (Opcode) {
+ case AMDGPU::S_SETREG_B32:
+ case AMDGPU::S_SETREG_B32_mode:
+ case AMDGPU::S_SETREG_IMM32_B32:
+ case AMDGPU::S_SETREG_IMM32_B32_mode:
+ return true;
+ }
+ return false;
}
static bool isRWLane(unsigned Opcode) {
@@ -118,8 +115,8 @@ static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII,
static bool isPermlane(const MachineInstr &MI) {
unsigned Opcode = MI.getOpcode();
- return Opcode == AMDGPU::V_PERMLANE16_B32 ||
- Opcode == AMDGPU::V_PERMLANEX16_B32;
+ return Opcode == AMDGPU::V_PERMLANE16_B32_e64 ||
+ Opcode == AMDGPU::V_PERMLANEX16_B32_e64;
}
static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
@@ -131,75 +128,83 @@ static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
ScheduleHazardRecognizer::HazardType
GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
MachineInstr *MI = SU->getInstr();
+ // If we are not in "HazardRecognizerMode" and therefore not being run from
+ // the scheduler, track possible stalls from hazards but don't insert noops.
+ auto HazardType = IsHazardRecognizerMode ? NoopHazard : Hazard;
+
if (MI->isBundle())
return NoHazard;
if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0)
- return NoopHazard;
+ return HazardType;
// FIXME: Should flat be considered vmem?
if ((SIInstrInfo::isVMEM(*MI) ||
SIInstrInfo::isFLAT(*MI))
&& checkVMEMHazards(MI) > 0)
- return NoopHazard;
+ return HazardType;
if (ST.hasNSAtoVMEMBug() && checkNSAtoVMEMHazard(MI) > 0)
- return NoopHazard;
+ return HazardType;
if (checkFPAtomicToDenormModeHazard(MI) > 0)
- return NoopHazard;
+ return HazardType;
if (ST.hasNoDataDepHazard())
return NoHazard;
if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
- return NoopHazard;
+ return HazardType;
if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0)
- return NoopHazard;
+ return HazardType;
if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0)
- return NoopHazard;
+ return HazardType;
if (isRWLane(MI->getOpcode()) && checkRWLaneHazards(MI) > 0)
- return NoopHazard;
+ return HazardType;
if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0)
- return NoopHazard;
+ return HazardType;
if (isSSetReg(MI->getOpcode()) && checkSetRegHazards(MI) > 0)
- return NoopHazard;
+ return HazardType;
if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
- return NoopHazard;
+ return HazardType;
if (ST.hasReadM0MovRelInterpHazard() &&
(TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) &&
checkReadM0Hazards(MI) > 0)
- return NoopHazard;
+ return HazardType;
if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI) &&
checkReadM0Hazards(MI) > 0)
- return NoopHazard;
+ return HazardType;
if (SIInstrInfo::isMAI(*MI) && checkMAIHazards(MI) > 0)
- return NoopHazard;
+ return HazardType;
- if (MI->mayLoadOrStore() && checkMAILdStHazards(MI) > 0)
- return NoopHazard;
+ if ((SIInstrInfo::isVMEM(*MI) ||
+ SIInstrInfo::isFLAT(*MI) ||
+ SIInstrInfo::isDS(*MI)) && checkMAILdStHazards(MI) > 0)
+ return HazardType;
if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0)
- return NoopHazard;
-
- if (checkAnyInstHazards(MI) > 0)
- return NoopHazard;
+ return HazardType;
return NoHazard;
}
-static void insertNoopInBundle(MachineInstr *MI, const SIInstrInfo &TII) {
- BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP))
- .addImm(0);
+static void insertNoopsInBundle(MachineInstr *MI, const SIInstrInfo &TII,
+ unsigned Quantity) {
+ while (Quantity > 0) {
+ unsigned Arg = std::min(Quantity, 8u);
+ Quantity -= Arg;
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP))
+ .addImm(Arg - 1);
+ }
}
void GCNHazardRecognizer::processBundle() {
@@ -210,11 +215,11 @@ void GCNHazardRecognizer::processBundle() {
CurrCycleInstr = &*MI;
unsigned WaitStates = PreEmitNoopsCommon(CurrCycleInstr);
- if (IsHazardRecognizerMode)
+ if (IsHazardRecognizerMode) {
fixHazards(CurrCycleInstr);
- for (unsigned i = 0; i < WaitStates; ++i)
- insertNoopInBundle(CurrCycleInstr, TII);
+ insertNoopsInBundle(CurrCycleInstr, TII, WaitStates);
+ }
// It’s unnecessary to track more than MaxLookAhead instructions. Since we
// include the bundled MI directly after, only add a maximum of
@@ -241,7 +246,7 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
if (MI->isBundle())
return 0;
- int WaitStates = std::max(0, checkAnyInstHazards(MI));
+ int WaitStates = 0;
if (SIInstrInfo::isSMRD(*MI))
return std::max(WaitStates, checkSMRDHazards(MI));
@@ -291,7 +296,9 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
if (SIInstrInfo::isMAI(*MI))
return std::max(WaitStates, checkMAIHazards(MI));
- if (MI->mayLoadOrStore())
+ if (SIInstrInfo::isVMEM(*MI) ||
+ SIInstrInfo::isFLAT(*MI) ||
+ SIInstrInfo::isDS(*MI))
return std::max(WaitStates, checkMAILdStHazards(MI));
return WaitStates;
@@ -304,15 +311,19 @@ void GCNHazardRecognizer::EmitNoop() {
void GCNHazardRecognizer::AdvanceCycle() {
// When the scheduler detects a stall, it will call AdvanceCycle() without
// emitting any instructions.
- if (!CurrCycleInstr)
+ if (!CurrCycleInstr) {
+ EmittedInstrs.push_front(nullptr);
return;
+ }
// Do not track non-instructions which do not affect the wait states.
// If included, these instructions can lead to buffer overflow such that
// detectable hazards are missed.
if (CurrCycleInstr->isImplicitDef() || CurrCycleInstr->isDebugInstr() ||
- CurrCycleInstr->isKill())
+ CurrCycleInstr->isKill()) {
+ CurrCycleInstr = nullptr;
return;
+ }
if (CurrCycleInstr->isBundle()) {
processBundle();
@@ -367,7 +378,7 @@ static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
if (IsHazard(&*I))
return WaitStates;
- if (I->isInlineAsm() || I->isImplicitDef() || I->isDebugInstr())
+ if (I->isInlineAsm() || I->isMetaInstruction())
continue;
WaitStates += SIInstrInfo::getNumWaitStates(*I);
@@ -460,8 +471,8 @@ int GCNHazardRecognizer::getWaitStatesSinceSetReg(IsHazardFn IsHazard,
// No-op Hazard Detection
//===----------------------------------------------------------------------===//
-static void addRegUnits(const SIRegisterInfo &TRI,
- BitVector &BV, unsigned Reg) {
+static void addRegUnits(const SIRegisterInfo &TRI, BitVector &BV,
+ MCRegister Reg) {
for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI)
BV.set(*RUI);
}
@@ -471,7 +482,7 @@ static void addRegsToSet(const SIRegisterInfo &TRI,
BitVector &Set) {
for (const MachineOperand &Op : Ops) {
if (Op.isReg())
- addRegUnits(TRI, Set, Op.getReg());
+ addRegUnits(TRI, Set, Op.getReg().asMCReg());
}
}
@@ -718,8 +729,9 @@ int GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) {
return -1;
}
-int GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
- const MachineRegisterInfo &MRI) {
+int
+GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
+ const MachineRegisterInfo &MRI) {
// Helper to check for the hazard where VMEM instructions that store more than
// 8 bytes can have there store data over written by the next instruction.
const SIRegisterInfo *TRI = ST.getRegisterInfo();
@@ -821,34 +833,6 @@ int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {
return RFEWaitStates - WaitStatesNeeded;
}
-int GCNHazardRecognizer::checkAnyInstHazards(MachineInstr *MI) {
- if (MI->isDebugInstr())
- return 0;
-
- const SIRegisterInfo *TRI = ST.getRegisterInfo();
- if (!ST.hasSMovFedHazard())
- return 0;
-
- // Check for any instruction reading an SGPR after a write from
- // s_mov_fed_b32.
- int MovFedWaitStates = 1;
- int WaitStatesNeeded = 0;
-
- for (const MachineOperand &Use : MI->uses()) {
- if (!Use.isReg() || TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
- continue;
- auto IsHazardFn = [] (MachineInstr *MI) {
- return MI->getOpcode() == AMDGPU::S_MOV_FED_B32;
- };
- int WaitStatesNeededForUse =
- MovFedWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardFn,
- MovFedWaitStates);
- WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
- }
-
- return WaitStatesNeeded;
-}
-
int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
const SIInstrInfo *TII = ST.getInstrInfo();
const int SMovRelWaitStates = 1;
@@ -930,10 +914,12 @@ bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
return false;
};
- auto IsExpiredFn = [] (MachineInstr *MI, int) {
+ auto IsExpiredFn = [](MachineInstr *MI, int) {
return MI && (SIInstrInfo::isVALU(*MI) ||
(MI->getOpcode() == AMDGPU::S_WAITCNT &&
- !MI->getOperand(0).getImm()));
+ !MI->getOperand(0).getImm()) ||
+ (MI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
+ MI->getOperand(0).getImm() == 0xffe3));
};
if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
@@ -941,7 +927,9 @@ bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
return false;
const SIInstrInfo *TII = ST.getInstrInfo();
- BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::V_NOP_e32));
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII->get(AMDGPU::S_WAITCNT_DEPCTR))
+ .addImm(0xffe3);
return true;
}
@@ -955,7 +943,6 @@ bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) {
unsigned SDSTName;
switch (MI->getOpcode()) {
case AMDGPU::V_READLANE_B32:
- case AMDGPU::V_READLANE_B32_gfx10:
case AMDGPU::V_READFIRSTLANE_B32:
SDSTName = AMDGPU::OpName::vdst;
break;
@@ -1183,7 +1170,7 @@ int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) {
case AMDGPU::S_WAITCNT_VMCNT:
case AMDGPU::S_WAITCNT_EXPCNT:
case AMDGPU::S_WAITCNT_LGKMCNT:
- case AMDGPU::S_WAITCNT_IDLE:
+ case AMDGPU::S_WAIT_IDLE:
return true;
default:
break;
@@ -1207,7 +1194,7 @@ int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) {
return SIInstrInfo::isVALU(*MI);
};
- if (Opc != AMDGPU::V_ACCVGPR_READ_B32) { // MFMA or v_accvgpr_write
+ if (Opc != AMDGPU::V_ACCVGPR_READ_B32_e64) { // MFMA or v_accvgpr_write
const int LegacyVALUWritesVGPRWaitStates = 2;
const int VALUWritesExecWaitStates = 4;
const int MaxWaitStates = 4;
@@ -1235,15 +1222,15 @@ int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) {
auto IsMFMAFn = [] (MachineInstr *MI) {
return SIInstrInfo::isMAI(*MI) &&
- MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32 &&
- MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32;
+ MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
+ MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64;
};
for (const MachineOperand &Op : MI->explicit_operands()) {
if (!Op.isReg() || !TRI.isAGPR(MF.getRegInfo(), Op.getReg()))
continue;
- if (Op.isDef() && Opc != AMDGPU::V_ACCVGPR_WRITE_B32)
+ if (Op.isDef() && Opc != AMDGPU::V_ACCVGPR_WRITE_B32_e64)
continue;
const int MFMAWritesAGPROverlappedSrcABWaitStates = 4;
@@ -1277,7 +1264,7 @@ int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) {
int OpNo = MI->getOperandNo(&Op);
if (OpNo == SrcCIdx) {
NeedWaitStates = MFMAWritesAGPROverlappedSrcCWaitStates;
- } else if (Opc == AMDGPU::V_ACCVGPR_READ_B32) {
+ } else if (Opc == AMDGPU::V_ACCVGPR_READ_B32_e64) {
switch (HazardDefLatency) {
case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprReadWaitStates;
break;
@@ -1287,7 +1274,7 @@ int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) {
default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprReadWaitStates;
break;
}
- } else if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32) {
+ } else if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64) {
switch (HazardDefLatency) {
case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprWriteWaitStates;
break;
@@ -1306,7 +1293,7 @@ int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) {
return WaitStatesNeeded; // Early exit.
auto IsAccVgprWriteFn = [Reg, this] (MachineInstr *MI) {
- if (MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32)
+ if (MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64)
return false;
Register DstReg = MI->getOperand(0).getReg();
return TRI.regsOverlap(Reg, DstReg);
@@ -1318,7 +1305,7 @@ int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) {
NeedWaitStates = AccVGPRWriteMFMAReadSrcABWaitStates;
if (OpNo == SrcCIdx)
NeedWaitStates = AccVGPRWriteMFMAReadSrcCWaitStates;
- else if (Opc == AMDGPU::V_ACCVGPR_READ_B32)
+ else if (Opc == AMDGPU::V_ACCVGPR_READ_B32_e64)
NeedWaitStates = AccVGPRWriteAccVgprReadWaitStates;
WaitStatesNeededForUse = NeedWaitStates -
@@ -1329,7 +1316,7 @@ int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) {
return WaitStatesNeeded; // Early exit.
}
- if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32) {
+ if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64) {
const int MFMA4x4ReadSrcCAccVgprWriteWaitStates = 0;
const int MFMA16x16ReadSrcCAccVgprWriteWaitStates = 5;
const int MFMA32x32ReadSrcCAccVgprWriteWaitStates = 13;
@@ -1373,7 +1360,7 @@ int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) {
int WaitStatesNeeded = 0;
auto IsAccVgprReadFn = [] (MachineInstr *MI) {
- return MI->getOpcode() == AMDGPU::V_ACCVGPR_READ_B32;
+ return MI->getOpcode() == AMDGPU::V_ACCVGPR_READ_B32_e64;
};
for (const MachineOperand &Op : MI->explicit_uses()) {
@@ -1383,7 +1370,7 @@ int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) {
Register Reg = Op.getReg();
const int AccVgprReadLdStWaitStates = 2;
- const int VALUWriteAccVgprReadLdStDepVALUWaitStates = 1;
+ const int VALUWriteAccVgprRdWrLdStDepVALUWaitStates = 1;
const int MaxWaitStates = 2;
int WaitStatesNeededForUse = AccVgprReadLdStWaitStates -
@@ -1393,8 +1380,9 @@ int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) {
if (WaitStatesNeeded == MaxWaitStates)
return WaitStatesNeeded; // Early exit.
- auto IsVALUAccVgprReadCheckFn = [Reg, this] (MachineInstr *MI) {
- if (MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32)
+ auto IsVALUAccVgprRdWrCheckFn = [Reg, this](MachineInstr *MI) {
+ if (MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64 &&
+ MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64)
return false;
auto IsVALUFn = [] (MachineInstr *MI) {
return SIInstrInfo::isVALU(*MI) && !SIInstrInfo::isMAI(*MI);
@@ -1403,10 +1391,34 @@ int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) {
std::numeric_limits<int>::max();
};
- WaitStatesNeededForUse = VALUWriteAccVgprReadLdStDepVALUWaitStates -
- getWaitStatesSince(IsVALUAccVgprReadCheckFn, MaxWaitStates);
+ WaitStatesNeededForUse = VALUWriteAccVgprRdWrLdStDepVALUWaitStates -
+ getWaitStatesSince(IsVALUAccVgprRdWrCheckFn, MaxWaitStates);
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
}
return WaitStatesNeeded;
}
+
+bool GCNHazardRecognizer::ShouldPreferAnother(SUnit *SU) {
+ if (!SU->isInstr())
+ return false;
+
+ MachineInstr *MAI = nullptr;
+ auto IsMFMAFn = [&MAI] (MachineInstr *MI) {
+ MAI = nullptr;
+ if (SIInstrInfo::isMAI(*MI) &&
+ MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
+ MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64)
+ MAI = MI;
+ return MAI != nullptr;
+ };
+
+ MachineInstr *MI = SU->getInstr();
+ if (IsMFMAFn(MI)) {
+ int W = getWaitStatesSince(IsMFMAFn, 16);
+ if (MAI)
+ return W < (int)TSchedModel.computeInstrLatency(MAI);
+ }
+
+ return false;
+}