aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/SIInsertWaits.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/SIInsertWaits.cpp')
-rw-r--r--lib/Target/AMDGPU/SIInsertWaits.cpp104
1 files changed, 66 insertions, 38 deletions
diff --git a/lib/Target/AMDGPU/SIInsertWaits.cpp b/lib/Target/AMDGPU/SIInsertWaits.cpp
index fceabd7a8fdd..47257ce16ceb 100644
--- a/lib/Target/AMDGPU/SIInsertWaits.cpp
+++ b/lib/Target/AMDGPU/SIInsertWaits.cpp
@@ -21,16 +21,32 @@
#include "SIDefines.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <new>
+#include <utility>
#define DEBUG_TYPE "si-insert-waits"
using namespace llvm;
-using namespace llvm::AMDGPU;
namespace {
@@ -42,7 +58,6 @@ typedef union {
unsigned LGKM;
} Named;
unsigned Array[3];
-
} Counters;
typedef enum {
@@ -55,13 +70,12 @@ typedef Counters RegCounters[512];
typedef std::pair<unsigned, unsigned> RegInterval;
class SIInsertWaits : public MachineFunctionPass {
-
private:
- const SISubtarget *ST;
- const SIInstrInfo *TII;
- const SIRegisterInfo *TRI;
+ const SISubtarget *ST = nullptr;
+ const SIInstrInfo *TII = nullptr;
+ const SIRegisterInfo *TRI = nullptr;
const MachineRegisterInfo *MRI;
- IsaVersion IV;
+ AMDGPU::IsaInfo::IsaVersion ISA;
/// \brief Constant zero value
static const Counters ZeroCounts;
@@ -86,7 +100,7 @@ private:
RegCounters DefinedRegs;
/// \brief Different export instruction types seen since last wait.
- unsigned ExpInstrTypesSeen;
+ unsigned ExpInstrTypesSeen = 0;
/// \brief Type of the last opcode.
InstType LastOpcodeType;
@@ -100,7 +114,7 @@ private:
bool ReturnsVoid;
/// Whether the VCCZ bit is possibly corrupt
- bool VCCZCorrupt;
+ bool VCCZCorrupt = false;
/// \brief Get increment/decrement amount for this instruction.
Counters getHwCounts(MachineInstr &MI);
@@ -141,13 +155,7 @@ private:
public:
static char ID;
- SIInsertWaits() :
- MachineFunctionPass(ID),
- ST(nullptr),
- TII(nullptr),
- TRI(nullptr),
- ExpInstrTypesSeen(0),
- VCCZCorrupt(false) { }
+ SIInsertWaits() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -161,7 +169,7 @@ public:
}
};
-} // End anonymous namespace
+} // end anonymous namespace
INITIALIZE_PASS_BEGIN(SIInsertWaits, DEBUG_TYPE,
"SI Insert Waits", false, false)
@@ -294,7 +302,6 @@ RegInterval SIInsertWaits::getRegInterval(const TargetRegisterClass *RC,
void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const Counters &Increment) {
-
// Get the hardware counter increments and sum them up
Counters Limit = ZeroCounts;
unsigned Sum = 0;
@@ -366,7 +373,6 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const Counters &Required) {
-
// End of program? No need to wait on anything
// A function not returning void needs to wait, because other bytecode will
// be appended after it and we don't know what it will be.
@@ -393,7 +399,6 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
bool NeedWait = false;
for (unsigned i = 0; i < 3; ++i) {
-
if (Required.Array[i] <= WaitedOn.Array[i])
continue;
@@ -421,10 +426,10 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
// Build the wait instruction
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
- .addImm(encodeWaitcnt(IV,
- Counts.Named.VM,
- Counts.Named.EXP,
- Counts.Named.LGKM));
+ .addImm(AMDGPU::encodeWaitcnt(ISA,
+ Counts.Named.VM,
+ Counts.Named.EXP,
+ Counts.Named.LGKM));
LastOpcodeType = OTHER;
LastInstWritesM0 = false;
@@ -434,7 +439,6 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
/// \brief helper function for handleOperands
static void increaseCounters(Counters &Dst, const Counters &Src) {
-
for (unsigned i = 0; i < 3; ++i)
Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]);
}
@@ -453,9 +457,9 @@ void SIInsertWaits::handleExistingWait(MachineBasicBlock::iterator I) {
unsigned Imm = I->getOperand(0).getImm();
Counters Counts, WaitOn;
- Counts.Named.VM = decodeVmcnt(IV, Imm);
- Counts.Named.EXP = decodeExpcnt(IV, Imm);
- Counts.Named.LGKM = decodeLgkmcnt(IV, Imm);
+ Counts.Named.VM = AMDGPU::decodeVmcnt(ISA, Imm);
+ Counts.Named.EXP = AMDGPU::decodeExpcnt(ISA, Imm);
+ Counts.Named.LGKM = AMDGPU::decodeLgkmcnt(ISA, Imm);
for (unsigned i = 0; i < 3; ++i) {
if (Counts.Array[i] <= LastIssued.Array[i])
@@ -468,7 +472,6 @@ void SIInsertWaits::handleExistingWait(MachineBasicBlock::iterator I) {
}
Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
-
Counters Result = ZeroCounts;
// For each register affected by this instruction increase the result
@@ -484,7 +487,6 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
const TargetRegisterClass *RC = TII->getOpRegClass(MI, i);
RegInterval Interval = getRegInterval(RC, Op);
for (unsigned j = Interval.first; j < Interval.second; ++j) {
-
if (Op.isDef()) {
increaseCounters(Result, UsedRegs[j]);
increaseCounters(Result, DefinedRegs[j]);
@@ -522,6 +524,16 @@ void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB,
}
}
+/// Return true if \p MBB has one successor immediately following, and is its
+/// only predecessor
+static bool hasTrivialSuccessor(const MachineBasicBlock &MBB) {
+ if (MBB.succ_size() != 1)
+ return false;
+
+ const MachineBasicBlock *Succ = *MBB.succ_begin();
+ return (Succ->pred_size() == 1) && MBB.isLayoutSuccessor(Succ);
+}
+
// FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States"
// around other non-memory instructions.
bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
@@ -531,12 +543,12 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
TII = ST->getInstrInfo();
TRI = &TII->getRegisterInfo();
MRI = &MF.getRegInfo();
- IV = getIsaVersion(ST->getFeatureBits());
+ ISA = AMDGPU::IsaInfo::getIsaVersion(ST->getFeatureBits());
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- HardwareLimits.Named.VM = getVmcntBitMask(IV);
- HardwareLimits.Named.EXP = getExpcntBitMask(IV);
- HardwareLimits.Named.LGKM = getLgkmcntBitMask(IV);
+ HardwareLimits.Named.VM = AMDGPU::getVmcntBitMask(ISA);
+ HardwareLimits.Named.EXP = AMDGPU::getExpcntBitMask(ISA);
+ HardwareLimits.Named.LGKM = AMDGPU::getLgkmcntBitMask(ISA);
WaitedOn = ZeroCounts;
DelayedWaitOn = ZeroCounts;
@@ -636,12 +648,14 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
handleSendMsg(MBB, I);
if (I->getOpcode() == AMDGPU::S_ENDPGM ||
- I->getOpcode() == AMDGPU::SI_RETURN)
+ I->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG)
EndPgmBlocks.push_back(&MBB);
}
- // Wait for everything at the end of the MBB
- Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
+ // Wait for everything at the end of the MBB. If there is only one
+ // successor, we can defer this until the uses there.
+ if (!hasTrivialSuccessor(MBB))
+ Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
}
if (HaveScalarStores) {
@@ -665,7 +679,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
// FIXME: It would be better to insert this before a waitcnt if any.
if ((I->getOpcode() == AMDGPU::S_ENDPGM ||
- I->getOpcode() == AMDGPU::SI_RETURN) && !SeenDCacheWB) {
+ I->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) && !SeenDCacheWB) {
Changes = true;
BuildMI(*MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_DCACHE_WB));
}
@@ -676,5 +690,19 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
for (MachineInstr *I : RemoveMI)
I->eraseFromParent();
+ if (!MFI->isEntryFunction()) {
+ // Wait for any outstanding memory operations that the input registers may
+ // depend on. We can't track them and it's better to to the wait after the
+ // costly call sequence.
+
+ // TODO: Could insert earlier and schedule more liberally with operations
+ // that only use caller preserved registers.
+ MachineBasicBlock &EntryBB = MF.front();
+ BuildMI(EntryBB, EntryBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
+ .addImm(0);
+
+ Changes = true;
+ }
+
return Changes;
}