aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/AMDGPUInstrInfo.cpp')
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstrInfo.cpp97
1 files changed, 19 insertions, 78 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
index 8156599528c2..07aa7c2cc8ad 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
@@ -16,95 +16,36 @@
#include "AMDGPUInstrInfo.h"
#include "AMDGPURegisterInfo.h"
#include "AMDGPUTargetMachine.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
using namespace llvm;
-#define GET_INSTRINFO_CTOR_DTOR
-#include "AMDGPUGenInstrInfo.inc"
-
// Pin the vtable to this file.
-void AMDGPUInstrInfo::anchor() {}
-
-AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)
- : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
- ST(ST),
- AMDGPUASI(ST.getAMDGPUAS()) {}
-
-// FIXME: This behaves strangely. If, for example, you have 32 load + stores,
-// the first 16 loads will be interleaved with the stores, and the next 16 will
-// be clustered as expected. It should really split into 2 16 store batches.
-//
-// Loads are clustered until this returns false, rather than trying to schedule
-// groups of stores. This also means we have to deal with saying different
-// address space loads should be clustered, and ones which might cause bank
-// conflicts.
-//
-// This might be deprecated so it might not be worth that much effort to fix.
-bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
- int64_t Offset0, int64_t Offset1,
- unsigned NumLoads) const {
- assert(Offset1 > Offset0 &&
- "Second offset should be larger than first offset!");
- // If we have less than 16 loads in a row, and the offsets are within 64
- // bytes, then schedule together.
-
- // A cacheline is 64 bytes (for global memory).
- return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
-}
-
-// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
-enum SIEncodingFamily {
- SI = 0,
- VI = 1,
- SDWA = 2,
- SDWA9 = 3,
- GFX9 = 4
-};
-
-static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {
- switch (ST.getGeneration()) {
- case AMDGPUSubtarget::SOUTHERN_ISLANDS:
- case AMDGPUSubtarget::SEA_ISLANDS:
- return SIEncodingFamily::SI;
- case AMDGPUSubtarget::VOLCANIC_ISLANDS:
- case AMDGPUSubtarget::GFX9:
- return SIEncodingFamily::VI;
-
- // FIXME: This should never be called for r600 GPUs.
- case AMDGPUSubtarget::R600:
- case AMDGPUSubtarget::R700:
- case AMDGPUSubtarget::EVERGREEN:
- case AMDGPUSubtarget::NORTHERN_ISLANDS:
- return SIEncodingFamily::SI;
- }
-
- llvm_unreachable("Unknown subtarget generation!");
-}
-
-int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
- SIEncodingFamily Gen = subtargetEncodingFamily(ST);
+//void AMDGPUInstrInfo::anchor() {}
- if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
- ST.getGeneration() >= AMDGPUSubtarget::GFX9)
- Gen = SIEncodingFamily::GFX9;
+AMDGPUInstrInfo::AMDGPUInstrInfo(const GCNSubtarget &ST) { }
- if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
- Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9
- : SIEncodingFamily::SDWA;
- int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
+// TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence.
+bool AMDGPUInstrInfo::isUniformMMO(const MachineMemOperand *MMO) {
+ const Value *Ptr = MMO->getValue();
+ // UndefValue means this is a load of a kernel input. These are uniform.
+ // Sometimes LDS instructions have constant pointers.
+ // If Ptr is null, then that means this mem operand contains a
+ // PseudoSourceValue like GOT.
+ if (!Ptr || isa<UndefValue>(Ptr) ||
+ isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
+ return true;
- // -1 means that Opcode is already a native instruction.
- if (MCOp == -1)
- return Opcode;
+ if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
+ return true;
- // (uint16_t)-1 means that Opcode is a pseudo instruction that has
- // no encoding in the given subtarget generation.
- if (MCOp == (uint16_t)-1)
- return -1;
+ if (const Argument *Arg = dyn_cast<Argument>(Ptr))
+ return AMDGPU::isArgPassedInSGPR(Arg);
- return MCOp;
+ const Instruction *I = dyn_cast<Instruction>(Ptr);
+ return I && I->getMetadata("amdgpu.uniform");
}