aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp')
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp793
1 files changed, 425 insertions, 368 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index b33040b4d06a..23c9352ce273 100644
--- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1,4 +1,4 @@
-//===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
+//===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,30 +12,44 @@
//
//===----------------------------------------------------------------------===//
-#include "AMDGPUDiagnosticInfoUnsupported.h"
#include "AMDGPUInstrInfo.h"
+#include "AMDGPUIntrinsicInfo.h"
#include "AMDGPUISelLowering.h" // For AMDGPUISD
-#include "AMDGPURegisterInfo.h"
#include "AMDGPUSubtarget.h"
-#include "R600InstrInfo.h"
-#include "SIDefines.h"
#include "SIISelLowering.h"
#include "SIMachineFunctionInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/IR/Function.h"
+#include "llvm/IR/DiagnosticInfo.h"
using namespace llvm;
+namespace llvm {
+class R600InstrInfo;
+}
+
//===----------------------------------------------------------------------===//
// Instruction Selector Implementation
//===----------------------------------------------------------------------===//
namespace {
+
+static bool isCBranchSCC(const SDNode *N) {
+ assert(N->getOpcode() == ISD::BRCOND);
+ if (!N->hasOneUse())
+ return false;
+
+ SDValue Cond = N->getOperand(1);
+ if (Cond.getOpcode() == ISD::CopyToReg)
+ Cond = Cond.getOperand(2);
+ return Cond.getOpcode() == ISD::SETCC &&
+ Cond.getOperand(0).getValueType() == MVT::i32 && Cond.hasOneUse();
+}
+
/// AMDGPU specific code to select AMDGPU machine instructions for
/// SelectionDAG operations.
class AMDGPUDAGToDAGISel : public SelectionDAGISel {
@@ -47,7 +61,7 @@ public:
AMDGPUDAGToDAGISel(TargetMachine &TM);
virtual ~AMDGPUDAGToDAGISel();
bool runOnMachineFunction(MachineFunction &MF) override;
- SDNode *Select(SDNode *N) override;
+ void Select(SDNode *N) override;
const char *getPassName() const override;
void PreprocessISelDAG() override;
void PostprocessISelDAG() override;
@@ -59,28 +73,8 @@ private:
bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
- // Complex pattern selectors
- bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
- bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
- bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
-
- static bool checkType(const Value *ptr, unsigned int addrspace);
- static bool checkPrivateAddress(const MachineMemOperand *Op);
-
- static bool isGlobalStore(const StoreSDNode *N);
- static bool isFlatStore(const StoreSDNode *N);
- static bool isPrivateStore(const StoreSDNode *N);
- static bool isLocalStore(const StoreSDNode *N);
- static bool isRegionStore(const StoreSDNode *N);
-
- bool isCPLoad(const LoadSDNode *N) const;
- bool isConstantLoad(const LoadSDNode *N, int cbID) const;
- bool isGlobalLoad(const LoadSDNode *N) const;
- bool isFlatLoad(const LoadSDNode *N) const;
- bool isParamLoad(const LoadSDNode *N) const;
- bool isPrivateLoad(const LoadSDNode *N) const;
- bool isLocalLoad(const LoadSDNode *N) const;
- bool isRegionLoad(const LoadSDNode *N) const;
+ bool isConstantLoad(const MemSDNode *N, int cbID) const;
+ bool isUniformBr(const SDNode *N) const;
SDNode *glueCopyToM0(SDNode *N) const;
@@ -111,7 +105,20 @@ private:
SDValue &Offset, SDValue &GLC, SDValue &SLC,
SDValue &TFE) const;
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
- SDValue &Offset, SDValue &GLC) const;
+ SDValue &Offset, SDValue &SLC) const;
+ bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
+ SDValue &Offset) const;
+ bool SelectMUBUFConstant(SDValue Constant,
+ SDValue &SOffset,
+ SDValue &ImmOffset) const;
+ bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset,
+ SDValue &ImmOffset) const;
+ bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset,
+ SDValue &ImmOffset, SDValue &VOffset) const;
+
+ bool SelectFlat(SDValue Addr, SDValue &VAddr,
+ SDValue &SLC, SDValue &TFE) const;
+
bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
bool &Imm) const;
bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
@@ -122,7 +129,7 @@ private:
bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const;
- SDNode *SelectAddrSpaceCast(SDNode *N);
+ bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
@@ -136,13 +143,15 @@ private:
SDValue &Clamp,
SDValue &Omod) const;
- SDNode *SelectADD_SUB_I64(SDNode *N);
- SDNode *SelectDIV_SCALE(SDNode *N);
+ void SelectADD_SUB_I64(SDNode *N);
+ void SelectDIV_SCALE(SDNode *N);
- SDNode *getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
+ SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
uint32_t Offset, uint32_t Width);
- SDNode *SelectS_BFEFromShifts(SDNode *N);
- SDNode *SelectS_BFE(SDNode *N);
+ void SelectS_BFEFromShifts(SDNode *N);
+ void SelectS_BFE(SDNode *N);
+ void SelectBRCOND(SDNode *N);
+ void SelectATOMIC_CMP_SWAP(SDNode *N);
// Include the pieces autogenerated from the target description.
#include "AMDGPUGenDAGISel.inc"
@@ -159,7 +168,7 @@ AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM)
: SelectionDAGISel(TM) {}
bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
- Subtarget = &static_cast<const AMDGPUSubtarget &>(MF.getSubtarget());
+ Subtarget = &MF.getSubtarget<AMDGPUSubtarget>();
return SelectionDAGISel::runOnMachineFunction(MF);
}
@@ -207,64 +216,9 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
}
}
-bool AMDGPUDAGToDAGISel::SelectADDRParam(
- SDValue Addr, SDValue& R1, SDValue& R2) {
-
- if (Addr.getOpcode() == ISD::FrameIndex) {
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
- } else {
- R1 = Addr;
- R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
- }
- } else if (Addr.getOpcode() == ISD::ADD) {
- R1 = Addr.getOperand(0);
- R2 = Addr.getOperand(1);
- } else {
- R1 = Addr;
- R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
- }
- return true;
-}
-
-bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress) {
- return false;
- }
- return SelectADDRParam(Addr, R1, R2);
-}
-
-
-bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress) {
- return false;
- }
-
- if (Addr.getOpcode() == ISD::FrameIndex) {
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
- R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
- } else {
- R1 = Addr;
- R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
- }
- } else if (Addr.getOpcode() == ISD::ADD) {
- R1 = Addr.getOperand(0);
- R2 = Addr.getOperand(1);
- } else {
- R1 = Addr;
- R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
- }
- return true;
-}
-
SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
- !checkType(cast<MemSDNode>(N)->getMemOperand()->getValue(),
- AMDGPUAS::LOCAL_ADDRESS))
+ cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
return N;
const SITargetLowering& Lowering =
@@ -304,14 +258,15 @@ static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
llvm_unreachable("invalid vector size");
}
-SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
+void AMDGPUDAGToDAGISel::Select(SDNode *N) {
unsigned int Opc = N->getOpcode();
if (N->isMachineOpcode()) {
N->setNodeId(-1);
- return nullptr; // Already selected.
+ return; // Already selected.
}
- if (isa<AtomicSDNode>(N))
+ if (isa<AtomicSDNode>(N) ||
+ (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC))
N = glueCopyToM0(N);
switch (Opc) {
@@ -325,7 +280,8 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
break;
- return SelectADD_SUB_I64(N);
+ SelectADD_SUB_I64(N);
+ return;
}
case ISD::SCALAR_TO_VECTOR:
case AMDGPUISD::BUILD_VERTICAL_VECTOR:
@@ -359,8 +315,9 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
if (NumVectorElts == 1) {
- return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT,
- N->getOperand(0), RegClass);
+ CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
+ RegClass);
+ return;
}
assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
@@ -400,8 +357,8 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
if (!IsRegSeq)
break;
- return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
- RegSeqArgs);
+ CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
+ return;
}
case ISD::BUILD_PAIR: {
SDValue RC, SubReg0, SubReg1;
@@ -422,8 +379,9 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
}
const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
N->getOperand(1), SubReg1 };
- return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
- DL, N->getValueType(0), Ops);
+ ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
+ N->getValueType(0), Ops));
+ return;
}
case ISD::Constant:
@@ -452,8 +410,9 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
};
- return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
- N->getValueType(0), Ops);
+ ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
+ N->getValueType(0), Ops));
+ return;
}
case ISD::LOAD:
case ISD::STORE: {
@@ -487,11 +446,13 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
uint32_t OffsetVal = Offset->getZExtValue();
uint32_t WidthVal = Width->getZExtValue();
- return getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, SDLoc(N),
- N->getOperand(0), OffsetVal, WidthVal);
+ ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
+ SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
+ return;
}
case AMDGPUISD::DIV_SCALE: {
- return SelectDIV_SCALE(N);
+ SelectDIV_SCALE(N);
+ return;
}
case ISD::CopyToReg: {
const SITargetLowering& Lowering =
@@ -499,139 +460,48 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
Lowering.legalizeTargetIndependentNode(N, *CurDAG);
break;
}
- case ISD::ADDRSPACECAST:
- return SelectAddrSpaceCast(N);
case ISD::AND:
case ISD::SRL:
case ISD::SRA:
+ case ISD::SIGN_EXTEND_INREG:
if (N->getValueType(0) != MVT::i32 ||
Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
break;
- return SelectS_BFE(N);
+ SelectS_BFE(N);
+ return;
+ case ISD::BRCOND:
+ SelectBRCOND(N);
+ return;
+
+ case AMDGPUISD::ATOMIC_CMP_SWAP:
+ SelectATOMIC_CMP_SWAP(N);
+ return;
}
- return SelectCode(N);
+ SelectCode(N);
}
-bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) {
- assert(AS != 0 && "Use checkPrivateAddress instead.");
- if (!Ptr)
+bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
+ if (!N->readMem())
return false;
-
- return Ptr->getType()->getPointerAddressSpace() == AS;
-}
-
-bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) {
- if (Op->getPseudoValue())
- return true;
-
- if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType()))
- return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
-
- return false;
-}
-
-bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
- return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
-}
-
-bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
- const Value *MemVal = N->getMemOperand()->getValue();
- return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
- !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
- !checkType(MemVal, AMDGPUAS::REGION_ADDRESS));
-}
-
-bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
- return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
-}
-
-bool AMDGPUDAGToDAGISel::isFlatStore(const StoreSDNode *N) {
- return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
-}
-
-bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
- return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
-}
-
-bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const {
- const Value *MemVal = N->getMemOperand()->getValue();
if (CbId == -1)
- return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS);
+ return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
- return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId);
+ return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
}
-bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const {
- if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
- if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
- N->getMemoryVT().bitsLT(MVT::i32))
- return true;
-
- return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
-}
-
-bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const {
- return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS);
-}
-
-bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) const {
- return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
-}
-
-bool AMDGPUDAGToDAGISel::isFlatLoad(const LoadSDNode *N) const {
- return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
-}
-
-bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) const {
- return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
-}
-
-bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const {
- MachineMemOperand *MMO = N->getMemOperand();
- if (checkPrivateAddress(N->getMemOperand())) {
- if (MMO) {
- const PseudoSourceValue *PSV = MMO->getPseudoValue();
- if (PSV && PSV->isConstantPool()) {
- return true;
- }
- }
- }
- return false;
-}
-
-bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const {
- if (checkPrivateAddress(N->getMemOperand())) {
- // Check to make sure we are not a constant pool load or a constant load
- // that is marked as a private load
- if (isCPLoad(N) || isConstantLoad(N, -1)) {
- return false;
- }
- }
-
- const Value *MemVal = N->getMemOperand()->getValue();
- if (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
- !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
- !checkType(MemVal, AMDGPUAS::FLAT_ADDRESS) &&
- !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) &&
- !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) &&
- !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) &&
- !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS)) {
- return true;
- }
- return false;
+bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
+ const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
+ const Instruction *Term = BB->getTerminator();
+ return Term->getMetadata("amdgpu.uniform") ||
+ Term->getMetadata("structurizecfg.uniform");
}
const char *AMDGPUDAGToDAGISel::getPassName() const {
return "AMDGPU DAG->DAG Pattern Instruction Selection";
}
-#ifdef DEBUGTMP
-#undef INT64_C
-#endif
-#undef DEBUGTMP
-
//===----------------------------------------------------------------------===//
// Complex Patterns
//===----------------------------------------------------------------------===//
@@ -705,7 +575,7 @@ bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
return true;
}
-SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
+void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
SDLoc DL(N);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
@@ -728,7 +598,6 @@ SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
-
unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
@@ -745,12 +614,12 @@ SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
SDValue(AddHi,0),
Sub1,
};
- return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
+ CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
}
// We need to handle this here because tablegen doesn't support matching
// instructions with multiple outputs.
-SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
+void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
SDLoc SL(N);
EVT VT = N->getValueType(0);
@@ -766,7 +635,7 @@ SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
- return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
+ CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
}
bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
@@ -786,6 +655,7 @@ bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
SDValue &Offset) const {
+ SDLoc DL(Addr);
if (CurDAG->isBaseWithConstantOffset(Addr)) {
SDValue N0 = Addr.getOperand(0);
SDValue N1 = Addr.getOperand(1);
@@ -793,7 +663,7 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
// (add n0, c0)
Base = N0;
- Offset = N1;
+ Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
return true;
}
} else if (Addr.getOpcode() == ISD::SUB) {
@@ -801,7 +671,6 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
int64_t ByteOffset = C->getSExtValue();
if (isUInt<16>(ByteOffset)) {
- SDLoc DL(Addr);
SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
// XXX - This is kind of hacky. Create a dummy sub node so we can check
@@ -816,7 +685,7 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
Zero, Addr.getOperand(1));
Base = SDValue(MachineSub, 0);
- Offset = Addr.getOperand(0);
+ Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
return true;
}
}
@@ -834,7 +703,7 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
DL, MVT::i32, Zero);
Base = SDValue(MovZero, 0);
- Offset = Addr;
+ Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
return true;
}
}
@@ -932,8 +801,10 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
SDLoc DL(Addr);
- GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
- SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
+ if (!GLC.getNode())
+ GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
+ if (!SLC.getNode())
+ SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
@@ -961,9 +832,11 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
}
if (isLegalMUBUFImmOffset(C1)) {
- Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
- return true;
- } else if (isUInt<32>(C1->getZExtValue())) {
+ Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
+ return true;
+ }
+
+ if (isUInt<32>(C1->getZExtValue())) {
// Illegal offset, store it in soffset.
Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
@@ -1045,14 +918,13 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
if (CurDAG->isBaseWithConstantOffset(Addr)) {
SDValue N0 = Addr.getOperand(0);
SDValue N1 = Addr.getOperand(1);
+
// Offsets in vaddr must be positive.
- if (CurDAG->SignBitIsZero(N0)) {
- ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
- if (isLegalMUBUFImmOffset(C1)) {
- VAddr = N0;
- ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
- return true;
- }
+ ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
+ if (isLegalMUBUFImmOffset(C1)) {
+ VAddr = N0;
+ ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
+ return true;
}
}
@@ -1091,13 +963,118 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
}
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
+ SDValue &Soffset, SDValue &Offset
+ ) const {
+ SDValue GLC, SLC, TFE;
+
+ return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
+}
+bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
SDValue &Soffset, SDValue &Offset,
- SDValue &GLC) const {
- SDValue SLC, TFE;
+ SDValue &SLC) const {
+ SDValue GLC, TFE;
return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
}
+bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant,
+ SDValue &SOffset,
+ SDValue &ImmOffset) const {
+ SDLoc DL(Constant);
+ uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue();
+ uint32_t Overflow = 0;
+
+ if (Imm >= 4096) {
+ if (Imm <= 4095 + 64) {
+ // Use an SOffset inline constant for 1..64
+ Overflow = Imm - 4095;
+ Imm = 4095;
+ } else {
+ // Try to keep the same value in SOffset for adjacent loads, so that
+ // the corresponding register contents can be re-used.
+ //
+ // Load values with all low-bits set into SOffset, so that a larger
+ // range of values can be covered using s_movk_i32
+ uint32_t High = (Imm + 1) & ~4095;
+ uint32_t Low = (Imm + 1) & 4095;
+ Imm = Low;
+ Overflow = High - 1;
+ }
+ }
+
+ // There is a hardware bug in SI and CI which prevents address clamping in
+ // MUBUF instructions from working correctly with SOffsets. The immediate
+ // offset is unaffected.
+ if (Overflow > 0 &&
+ Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
+ return false;
+
+ ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16);
+
+ if (Overflow <= 64)
+ SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32);
+ else
+ SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
+ CurDAG->getTargetConstant(Overflow, DL, MVT::i32)),
+ 0);
+
+ return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset,
+ SDValue &SOffset,
+ SDValue &ImmOffset) const {
+ SDLoc DL(Offset);
+
+ if (!isa<ConstantSDNode>(Offset))
+ return false;
+
+ return SelectMUBUFConstant(Offset, SOffset, ImmOffset);
+}
+
+bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset,
+ SDValue &SOffset,
+ SDValue &ImmOffset,
+ SDValue &VOffset) const {
+ SDLoc DL(Offset);
+
+ // Don't generate an unnecessary voffset for constant offsets.
+ if (isa<ConstantSDNode>(Offset)) {
+ SDValue Tmp1, Tmp2;
+
+ // When necessary, use a voffset in <= CI anyway to work around a hardware
+ // bug.
+ if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS ||
+ SelectMUBUFConstant(Offset, Tmp1, Tmp2))
+ return false;
+ }
+
+ if (CurDAG->isBaseWithConstantOffset(Offset)) {
+ SDValue N0 = Offset.getOperand(0);
+ SDValue N1 = Offset.getOperand(1);
+ if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 &&
+ SelectMUBUFConstant(N1, SOffset, ImmOffset)) {
+ VOffset = N0;
+ return true;
+ }
+ }
+
+ SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
+ ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
+ VOffset = Offset;
+
+ return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectFlat(SDValue Addr,
+ SDValue &VAddr,
+ SDValue &SLC,
+ SDValue &TFE) const {
+ VAddr = Addr;
+ TFE = SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
+ return true;
+}
+
///
/// \param EncodedOffset This is the immediate value that will be encoded
/// directly into the instruction. On SI/CI the \p EncodedOffset
@@ -1213,71 +1190,33 @@ bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr,
!isa<ConstantSDNode>(Offset);
}
-// FIXME: This is incorrect and only enough to be able to compile.
-SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
- AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N);
- SDLoc DL(N);
-
- const MachineFunction &MF = CurDAG->getMachineFunction();
- DiagnosticInfoUnsupported NotImplemented(*MF.getFunction(),
- "addrspacecast not implemented");
- CurDAG->getContext()->diagnose(NotImplemented);
-
- assert(Subtarget->hasFlatAddressSpace() &&
- "addrspacecast only supported with flat address space!");
-
- assert((ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS ||
- ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) &&
- "Can only cast to / from flat address space!");
-
- // The flat instructions read the address as the index of the VGPR holding the
- // address, so casting should just be reinterpreting the base VGPR, so just
- // insert trunc / bitcast / zext.
-
- SDValue Src = ASC->getOperand(0);
- EVT DestVT = ASC->getValueType(0);
- EVT SrcVT = Src.getValueType();
-
- unsigned SrcSize = SrcVT.getSizeInBits();
- unsigned DestSize = DestVT.getSizeInBits();
-
- if (SrcSize > DestSize) {
- assert(SrcSize == 64 && DestSize == 32);
- return CurDAG->getMachineNode(
- TargetOpcode::EXTRACT_SUBREG,
- DL,
- DestVT,
- Src,
- CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32));
- }
-
- if (DestSize > SrcSize) {
- assert(SrcSize == 32 && DestSize == 64);
-
- // FIXME: This is probably wrong, we should never be defining
- // a register class with both VGPRs and SGPRs
- SDValue RC = CurDAG->getTargetConstant(AMDGPU::VS_64RegClassID, DL,
- MVT::i32);
+bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
+ SDValue &Base,
+ SDValue &Offset) const {
+ SDLoc DL(Index);
- const SDValue Ops[] = {
- RC,
- Src,
- CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
- SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
- CurDAG->getConstant(0, DL, MVT::i32)), 0),
- CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
- };
+ if (CurDAG->isBaseWithConstantOffset(Index)) {
+ SDValue N0 = Index.getOperand(0);
+ SDValue N1 = Index.getOperand(1);
+ ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
- return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
- DL, N->getValueType(0), Ops);
+ // (add n0, c0)
+ Base = N0;
+ Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
+ return true;
}
- assert(SrcSize == 64 && DestSize == 64);
- return CurDAG->getNode(ISD::BITCAST, DL, DestVT, Src).getNode();
+ if (isa<ConstantSDNode>(Index))
+ return false;
+
+ Base = Index;
+ Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
+ return true;
}
-SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
- uint32_t Offset, uint32_t Width) {
+SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
+ SDValue Val, uint32_t Offset,
+ uint32_t Width) {
// Transformation function, pack the offset and width of a BFE into
// the format expected by the S_BFE_I32 / S_BFE_U32. In the second
// source, bits [5:0] contain the offset and bits [22:16] the width.
@@ -1287,7 +1226,7 @@ SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
}
-SDNode *AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
+void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
// "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
// "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
// Predicate: 0 < b <= c < 32
@@ -1304,14 +1243,15 @@ SDNode *AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
bool Signed = N->getOpcode() == ISD::SRA;
unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
- return getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0),
- CVal - BVal, 32 - CVal);
+ ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
+ 32 - CVal));
+ return;
}
}
- return SelectCode(N);
+ SelectCode(N);
}
-SDNode *AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
+void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
switch (N->getOpcode()) {
case ISD::AND:
if (N->getOperand(0).getOpcode() == ISD::SRL) {
@@ -1328,8 +1268,9 @@ SDNode *AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
if (isMask_32(MaskVal)) {
uint32_t WidthVal = countPopulation(MaskVal);
- return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), Srl.getOperand(0),
- ShiftVal, WidthVal);
+ ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
+ Srl.getOperand(0), ShiftVal, WidthVal));
+ return;
}
}
}
@@ -1349,20 +1290,139 @@ SDNode *AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
if (isMask_32(MaskVal)) {
uint32_t WidthVal = countPopulation(MaskVal);
- return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), And.getOperand(0),
- ShiftVal, WidthVal);
+ ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
+ And.getOperand(0), ShiftVal, WidthVal));
+ return;
}
}
- } else if (N->getOperand(0).getOpcode() == ISD::SHL)
- return SelectS_BFEFromShifts(N);
+ } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
+ SelectS_BFEFromShifts(N);
+ return;
+ }
break;
case ISD::SRA:
- if (N->getOperand(0).getOpcode() == ISD::SHL)
- return SelectS_BFEFromShifts(N);
+ if (N->getOperand(0).getOpcode() == ISD::SHL) {
+ SelectS_BFEFromShifts(N);
+ return;
+ }
break;
+
+ case ISD::SIGN_EXTEND_INREG: {
+ // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
+ SDValue Src = N->getOperand(0);
+ if (Src.getOpcode() != ISD::SRL)
+ break;
+
+ const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
+ if (!Amt)
+ break;
+
+ unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
+ ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
+ Amt->getZExtValue(), Width));
+ return;
+ }
}
- return SelectCode(N);
+ SelectCode(N);
+}
+
+void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
+ SDValue Cond = N->getOperand(1);
+
+ if (isCBranchSCC(N)) {
+ // This brcond will use S_CBRANCH_SCC*, so let tablegen handle it.
+ SelectCode(N);
+ return;
+ }
+
+ // The result of VOPC instructions is or'd against ~EXEC before it is
+ // written to vcc or another SGPR. This means that the value '1' is always
+ // written to the corresponding bit for results that are masked. In order
+ // to correctly check against vccz, we need to and VCC with the EXEC
+ // register in order to clear the value from the masked bits.
+
+ SDLoc SL(N);
+
+ SDNode *MaskedCond =
+ CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1,
+ CurDAG->getRegister(AMDGPU::EXEC, MVT::i1),
+ Cond);
+ SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC,
+ SDValue(MaskedCond, 0),
+ SDValue()); // Passing SDValue() adds a
+ // glue output.
+ CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other,
+ N->getOperand(2), // Basic Block
+ VCC.getValue(0), // Chain
+ VCC.getValue(1)); // Glue
+ return;
+}
+
+// This is here because there isn't a way to use the generated sub0_sub1 as the
+// subreg index to EXTRACT_SUBREG in tablegen.
+void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
+ MemSDNode *Mem = cast<MemSDNode>(N);
+ unsigned AS = Mem->getAddressSpace();
+ if (AS == AMDGPUAS::FLAT_ADDRESS) {
+ SelectCode(N);
+ return;
+ }
+
+ MVT VT = N->getSimpleValueType(0);
+ bool Is32 = (VT == MVT::i32);
+ SDLoc SL(N);
+
+ MachineSDNode *CmpSwap = nullptr;
+ if (Subtarget->hasAddr64()) {
+ SDValue SRsrc, VAddr, SOffset, Offset, GLC, SLC;
+
+ if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
+ unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_ADDR64 :
+ AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_ADDR64;
+ SDValue CmpVal = Mem->getOperand(2);
+
+ // XXX - Do we care about glue operands?
+
+ SDValue Ops[] = {
+ CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
+ };
+
+ CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
+ }
+ }
+
+ if (!CmpSwap) {
+ SDValue SRsrc, SOffset, Offset, SLC;
+ if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
+ unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_OFFSET :
+ AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_OFFSET;
+
+ SDValue CmpVal = Mem->getOperand(2);
+ SDValue Ops[] = {
+ CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
+ };
+
+ CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
+ }
+ }
+
+ if (!CmpSwap) {
+ SelectCode(N);
+ return;
+ }
+
+ MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1);
+ *MMOs = Mem->getMemOperand();
+ CmpSwap->setMemRefs(MMOs, MMOs + 1);
+
+ unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
+ SDValue Extract
+ = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
+
+ ReplaceUses(SDValue(N, 0), Extract);
+ ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
+ CurDAG->RemoveDeadNode(N);
}
bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
@@ -1432,62 +1492,59 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
}
void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
- bool Modified = false;
-
- // XXX - Other targets seem to be able to do this without a worklist.
- SmallVector<LoadSDNode *, 8> LoadsToReplace;
- SmallVector<StoreSDNode *, 8> StoresToReplace;
-
- for (SDNode &Node : CurDAG->allnodes()) {
- if (LoadSDNode *LD = dyn_cast<LoadSDNode>(&Node)) {
- EVT VT = LD->getValueType(0);
- if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD)
- continue;
-
- // To simplify the TableGen patters, we replace all i64 loads with v2i32
- // loads. Alternatively, we could promote i64 loads to v2i32 during DAG
- // legalization, however, so places (ExpandUnalignedLoad) in the DAG
- // legalizer assume that if i64 is legal, so doing this promotion early
- // can cause problems.
- LoadsToReplace.push_back(LD);
- } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(&Node)) {
- // Handle i64 stores here for the same reason mentioned above for loads.
- SDValue Value = ST->getValue();
- if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore())
- continue;
- StoresToReplace.push_back(ST);
+ MachineFrameInfo *MFI = CurDAG->getMachineFunction().getFrameInfo();
+
+ // Handle the perverse case where a frame index is being stored. We don't
+ // want to see multiple frame index operands on the same instruction since
+ // it complicates things and violates some assumptions about frame index
+ // lowering.
+ for (int I = MFI->getObjectIndexBegin(), E = MFI->getObjectIndexEnd();
+ I != E; ++I) {
+ SDValue FI = CurDAG->getTargetFrameIndex(I, MVT::i32);
+
+ // It's possible that we have a frame index defined in the function that
+ // isn't used in this block.
+ if (FI.use_empty())
+ continue;
+
+ // Skip over the AssertZext inserted during lowering.
+ SDValue EffectiveFI = FI;
+ auto It = FI->use_begin();
+ if (It->getOpcode() == ISD::AssertZext && FI->hasOneUse()) {
+ EffectiveFI = SDValue(*It, 0);
+ It = EffectiveFI->use_begin();
}
- }
-
- for (LoadSDNode *LD : LoadsToReplace) {
- SDLoc SL(LD);
-
- SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SL, LD->getChain(),
- LD->getBasePtr(), LD->getMemOperand());
- SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL,
- MVT::i64, NewLoad);
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLoad.getValue(1));
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 0), BitCast);
- Modified = true;
- }
- for (StoreSDNode *ST : StoresToReplace) {
- SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(ST),
- MVT::v2i32, ST->getValue());
- const SDValue StoreOps[] = {
- ST->getChain(),
- NewValue,
- ST->getBasePtr(),
- ST->getOffset()
- };
+ for (auto It = EffectiveFI->use_begin(); !It.atEnd(); ) {
+ SDUse &Use = It.getUse();
+ SDNode *User = Use.getUser();
+ unsigned OpIdx = It.getOperandNo();
+ ++It;
+
+ if (MemSDNode *M = dyn_cast<MemSDNode>(User)) {
+ unsigned PtrIdx = M->getOpcode() == ISD::STORE ? 2 : 1;
+ if (OpIdx == PtrIdx)
+ continue;
+
+ unsigned OpN = M->getNumOperands();
+ SDValue NewOps[8];
+
+ assert(OpN < array_lengthof(NewOps));
+ for (unsigned Op = 0; Op != OpN; ++Op) {
+ if (Op != OpIdx) {
+ NewOps[Op] = M->getOperand(Op);
+ continue;
+ }
+
+ MachineSDNode *Mov = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
+ SDLoc(M), MVT::i32, FI);
+ NewOps[Op] = SDValue(Mov, 0);
+ }
- CurDAG->UpdateNodeOperands(ST, StoreOps);
- Modified = true;
+ CurDAG->UpdateNodeOperands(M, makeArrayRef(NewOps, OpN));
+ }
+ }
}
-
- // XXX - Is this necessary?
- if (Modified)
- CurDAG->RemoveDeadNodes();
}
void AMDGPUDAGToDAGISel::PostprocessISelDAG() {