aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/SILoadStoreOptimizer.cpp')
-rw-r--r--lib/Target/AMDGPU/SILoadStoreOptimizer.cpp50
1 files changed, 21 insertions, 29 deletions
diff --git a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 1bdb1f0ee9f9..9e972a569a0f 100644
--- a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -26,7 +26,7 @@
//
// - Live interval recomputing seems inefficient. This currently only matches
// one pair, and recomputes live intervals and moves on to the next pair. It
-// would be better to compute a list of all merges that need to occur
+// would be better to compute a list of all merges that need to occur.
//
// - With a list of instructions to process, we can also merge more. If a
// cluster of loads have offsets that are too large to fit in the 8-bit
@@ -36,6 +36,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "SIRegisterInfo.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
@@ -61,7 +62,6 @@ private:
MachineRegisterInfo *MRI;
LiveIntervals *LIS;
-
static bool offsetsCanBeCombined(unsigned Offset0,
unsigned Offset1,
unsigned EltSize);
@@ -69,10 +69,6 @@ private:
MachineBasicBlock::iterator findMatchingDSInst(MachineBasicBlock::iterator I,
unsigned EltSize);
- void updateRegDefsUses(unsigned SrcReg,
- unsigned DstReg,
- unsigned SubIdx);
-
MachineBasicBlock::iterator mergeRead2Pair(
MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Paired,
@@ -193,17 +189,6 @@ SILoadStoreOptimizer::findMatchingDSInst(MachineBasicBlock::iterator I,
return E;
}
-void SILoadStoreOptimizer::updateRegDefsUses(unsigned SrcReg,
- unsigned DstReg,
- unsigned SubIdx) {
- for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg),
- E = MRI->reg_end(); I != E; ) {
- MachineOperand &O = *I;
- ++I;
- O.substVirtReg(DstReg, SubIdx, *TRI);
- }
-}
-
MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Paired,
@@ -268,19 +253,19 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
.addOperand(*Dest1)
.addReg(DestReg, RegState::Kill, SubRegIdx1);
- LIS->InsertMachineInstrInMaps(Read2);
+ LIS->InsertMachineInstrInMaps(*Read2);
// repairLiveintervalsInRange() doesn't handle physical register, so we have
// to update the M0 range manually.
- SlotIndex PairedIndex = LIS->getInstructionIndex(Paired);
+ SlotIndex PairedIndex = LIS->getInstructionIndex(*Paired);
LiveRange &M0Range = LIS->getRegUnit(*MCRegUnitIterator(AMDGPU::M0, TRI));
LiveRange::Segment *M0Segment = M0Range.getSegmentContaining(PairedIndex);
bool UpdateM0Range = M0Segment->end == PairedIndex.getRegSlot();
// The new write to the original destination register is now the copy. Steal
// the old SlotIndex.
- LIS->ReplaceMachineInstrInMaps(I, Copy0);
- LIS->ReplaceMachineInstrInMaps(Paired, Copy1);
+ LIS->ReplaceMachineInstrInMaps(*I, *Copy0);
+ LIS->ReplaceMachineInstrInMaps(*Paired, *Copy1);
I->eraseFromParent();
Paired->eraseFromParent();
@@ -291,7 +276,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
LIS->createAndComputeVirtRegInterval(DestReg);
if (UpdateM0Range) {
- SlotIndex Read2Index = LIS->getInstructionIndex(Read2);
+ SlotIndex Read2Index = LIS->getInstructionIndex(*Read2);
M0Segment->end = Read2Index.getRegSlot();
}
@@ -340,7 +325,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
// repairLiveintervalsInRange() doesn't handle physical register, so we have
// to update the M0 range manually.
- SlotIndex PairedIndex = LIS->getInstructionIndex(Paired);
+ SlotIndex PairedIndex = LIS->getInstructionIndex(*Paired);
LiveRange &M0Range = LIS->getRegUnit(*MCRegUnitIterator(AMDGPU::M0, TRI));
LiveRange::Segment *M0Segment = M0Range.getSegmentContaining(PairedIndex);
bool UpdateM0Range = M0Segment->end == PairedIndex.getRegSlot();
@@ -359,8 +344,8 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
// XXX - How do we express subregisters here?
unsigned OrigRegs[] = { Data0->getReg(), Data1->getReg(), Addr->getReg() };
- LIS->RemoveMachineInstrFromMaps(I);
- LIS->RemoveMachineInstrFromMaps(Paired);
+ LIS->RemoveMachineInstrFromMaps(*I);
+ LIS->RemoveMachineInstrFromMaps(*Paired);
I->eraseFromParent();
Paired->eraseFromParent();
@@ -368,7 +353,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
LIS->repairIntervalsInRange(MBB, Write2, Write2, OrigRegs);
if (UpdateM0Range) {
- SlotIndex Write2Index = LIS->getInstructionIndex(Write2);
+ SlotIndex Write2Index = LIS->getInstructionIndex(*Write2);
M0Segment->end = Write2Index.getRegSlot();
}
@@ -423,9 +408,16 @@ bool SILoadStoreOptimizer::optimizeBlock(MachineBasicBlock &MBB) {
}
bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) {
- const TargetSubtargetInfo &STM = MF.getSubtarget();
- TRI = static_cast<const SIRegisterInfo *>(STM.getRegisterInfo());
- TII = static_cast<const SIInstrInfo *>(STM.getInstrInfo());
+ if (skipFunction(*MF.getFunction()))
+ return false;
+
+ const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
+ if (!STM.loadStoreOptEnabled())
+ return false;
+
+ TII = STM.getInstrInfo();
+ TRI = &TII->getRegisterInfo();
+
MRI = &MF.getRegInfo();
LIS = &getAnalysis<LiveIntervals>();