aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp245
1 files changed, 0 insertions, 245 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
deleted file mode 100644
index 43b3bf43fe56..000000000000
--- a/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
+++ /dev/null
@@ -1,245 +0,0 @@
-//===- AMDGPUInsertSingleUseVDST.cpp - Insert s_singleuse_vdst instructions ==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// Insert s_singleuse_vdst instructions on GFX11.5+ to mark regions of VALU
-/// instructions that produce single-use VGPR values. If the value is forwarded
-/// to the consumer instruction prior to VGPR writeback, the hardware can
-/// then skip (kill) the VGPR write.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "AMDGPUGenSearchableTables.inc"
-#include "GCNSubtarget.h"
-#include "SIInstrInfo.h"
-#include "SIRegisterInfo.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/Register.h"
-#include "llvm/IR/DebugLoc.h"
-#include "llvm/MC/MCRegister.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/Pass.h"
-#include <array>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "amdgpu-insert-single-use-vdst"
-
-namespace {
-class AMDGPUInsertSingleUseVDST : public MachineFunctionPass {
-private:
- const SIInstrInfo *SII;
- class SingleUseInstruction {
- private:
- static const unsigned MaxSkipRange = 0b111;
- static const unsigned MaxNumberOfSkipRegions = 2;
-
- unsigned LastEncodedPositionEnd;
- MachineInstr *ProducerInstr;
-
- std::array<unsigned, MaxNumberOfSkipRegions + 1> SingleUseRegions;
- SmallVector<unsigned, MaxNumberOfSkipRegions> SkipRegions;
-
- // Adds a skip region into the instruction.
- void skip(const unsigned ProducerPosition) {
- while (LastEncodedPositionEnd + MaxSkipRange < ProducerPosition) {
- SkipRegions.push_back(MaxSkipRange);
- LastEncodedPositionEnd += MaxSkipRange;
- }
- SkipRegions.push_back(ProducerPosition - LastEncodedPositionEnd);
- LastEncodedPositionEnd = ProducerPosition;
- }
-
- bool currentRegionHasSpace() {
- const auto Region = SkipRegions.size();
- // The first region has an extra bit of encoding space.
- return SingleUseRegions[Region] <
- ((Region == MaxNumberOfSkipRegions) ? 0b1111U : 0b111U);
- }
-
- unsigned encodeImm() {
- // Handle the first Single Use Region separately as it has an extra bit
- // of encoding space.
- unsigned Imm = SingleUseRegions[SkipRegions.size()];
- unsigned ShiftAmount = 4;
- for (unsigned i = SkipRegions.size(); i > 0; i--) {
- Imm |= SkipRegions[i - 1] << ShiftAmount;
- ShiftAmount += 3;
- Imm |= SingleUseRegions[i - 1] << ShiftAmount;
- ShiftAmount += 3;
- }
- return Imm;
- }
-
- public:
- SingleUseInstruction(const unsigned ProducerPosition,
- MachineInstr *Producer)
- : LastEncodedPositionEnd(ProducerPosition + 1), ProducerInstr(Producer),
- SingleUseRegions({1, 0, 0}) {}
-
- // Returns false if adding a new single use producer failed. This happens
- // because it could not be encoded, either because there is no room to
- // encode another single use producer region or that this single use
- // producer is too far away to encode the amount of instructions to skip.
- bool tryAddProducer(const unsigned ProducerPosition, MachineInstr *MI) {
- // Producer is too far away to encode into this instruction or another
- // skip region is needed and SkipRegions.size() = 2 so there's no room for
- // another skip region, therefore a new instruction is needed.
- if (LastEncodedPositionEnd +
- (MaxSkipRange * (MaxNumberOfSkipRegions - SkipRegions.size())) <
- ProducerPosition)
- return false;
-
- // If a skip region is needed.
- if (LastEncodedPositionEnd != ProducerPosition ||
- !currentRegionHasSpace()) {
- // If the current region is out of space therefore a skip region would
- // be needed, but there is no room for another skip region.
- if (SkipRegions.size() == MaxNumberOfSkipRegions)
- return false;
- skip(ProducerPosition);
- }
-
- SingleUseRegions[SkipRegions.size()]++;
- LastEncodedPositionEnd = ProducerPosition + 1;
- ProducerInstr = MI;
- return true;
- }
-
- auto emit(const SIInstrInfo *SII) {
- return BuildMI(*ProducerInstr->getParent(), ProducerInstr, DebugLoc(),
- SII->get(AMDGPU::S_SINGLEUSE_VDST))
- .addImm(encodeImm());
- }
- };
-
-public:
- static char ID;
-
- AMDGPUInsertSingleUseVDST() : MachineFunctionPass(ID) {}
-
- void insertSingleUseInstructions(
- ArrayRef<std::pair<unsigned, MachineInstr *>> SingleUseProducers) const {
- SmallVector<SingleUseInstruction> Instructions;
-
- for (auto &[Position, MI] : SingleUseProducers) {
- // Encode this position into the last single use instruction if possible.
- if (Instructions.empty() ||
- !Instructions.back().tryAddProducer(Position, MI)) {
- // If not, add a new instruction.
- Instructions.push_back(SingleUseInstruction(Position, MI));
- }
- }
-
- for (auto &Instruction : Instructions)
- Instruction.emit(SII);
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override {
- const auto &ST = MF.getSubtarget<GCNSubtarget>();
- if (!ST.hasVGPRSingleUseHintInsts())
- return false;
-
- SII = ST.getInstrInfo();
- const auto *TRI = &SII->getRegisterInfo();
- bool InstructionEmitted = false;
-
- for (MachineBasicBlock &MBB : MF) {
- DenseMap<MCRegUnit, unsigned> RegisterUseCount;
-
- // Handle boundaries at the end of basic block separately to avoid
- // false positives. If they are live at the end of a basic block then
- // assume it has more uses later on.
- for (const auto &Liveout : MBB.liveouts()) {
- for (MCRegUnitMaskIterator Units(Liveout.PhysReg, TRI); Units.isValid();
- ++Units) {
- const auto [Unit, Mask] = *Units;
- if ((Mask & Liveout.LaneMask).any())
- RegisterUseCount[Unit] = 2;
- }
- }
-
- SmallVector<std::pair<unsigned, MachineInstr *>>
- SingleUseProducerPositions;
-
- unsigned VALUInstrCount = 0;
- for (MachineInstr &MI : reverse(MBB.instrs())) {
- // All registers in all operands need to be single use for an
- // instruction to be marked as a single use producer.
- bool AllProducerOperandsAreSingleUse = true;
-
- // Gather a list of Registers used before updating use counts to avoid
- // double counting registers that appear multiple times in a single
- // MachineInstr.
- SmallVector<MCRegUnit> RegistersUsed;
-
- for (const auto &Operand : MI.all_defs()) {
- const auto Reg = Operand.getReg();
-
- const auto RegUnits = TRI->regunits(Reg);
- if (any_of(RegUnits, [&RegisterUseCount](const MCRegUnit Unit) {
- return RegisterUseCount[Unit] > 1;
- }))
- AllProducerOperandsAreSingleUse = false;
-
- // Reset uses count when a register is no longer live.
- for (const MCRegUnit Unit : RegUnits)
- RegisterUseCount.erase(Unit);
- }
-
- for (const auto &Operand : MI.all_uses()) {
- const auto Reg = Operand.getReg();
-
- // Count the number of times each register is read.
- for (const MCRegUnit Unit : TRI->regunits(Reg)) {
- if (!is_contained(RegistersUsed, Unit))
- RegistersUsed.push_back(Unit);
- }
- }
- for (const MCRegUnit Unit : RegistersUsed)
- RegisterUseCount[Unit]++;
-
- // Do not attempt to optimise across exec mask changes.
- if (MI.modifiesRegister(AMDGPU::EXEC, TRI) ||
- AMDGPU::isInvalidSingleUseConsumerInst(MI.getOpcode())) {
- for (auto &UsedReg : RegisterUseCount)
- UsedReg.second = 2;
- }
-
- if (!SIInstrInfo::isVALU(MI) ||
- AMDGPU::isInvalidSingleUseProducerInst(MI.getOpcode()))
- continue;
- if (AllProducerOperandsAreSingleUse) {
- SingleUseProducerPositions.push_back({VALUInstrCount, &MI});
- InstructionEmitted = true;
- }
- VALUInstrCount++;
- }
- insertSingleUseInstructions(SingleUseProducerPositions);
- }
- return InstructionEmitted;
- }
-};
-} // namespace
-
-char AMDGPUInsertSingleUseVDST::ID = 0;
-
-char &llvm::AMDGPUInsertSingleUseVDSTID = AMDGPUInsertSingleUseVDST::ID;
-
-INITIALIZE_PASS(AMDGPUInsertSingleUseVDST, DEBUG_TYPE,
- "AMDGPU Insert SingleUseVDST", false, false)