aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2022-03-20 11:40:34 +0000
committerDimitry Andric <dim@FreeBSD.org>2022-05-14 11:43:05 +0000
commit349cc55c9796c4596a5b9904cd3281af295f878f (patch)
tree410c5a785075730a35f1272ca6a7adf72222ad03 /contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
parentcb2ae6163174b90e999326ecec3699ee093a5d43 (diff)
parentc0981da47d5696fe36474fcf86b4ce03ae3ff818 (diff)
downloadsrc-349cc55c9796c4596a5b9904cd3281af295f878f.tar.gz
src-349cc55c9796c4596a5b9904cd3281af295f878f.zip
Merge llvm-project main llvmorg-14-init-10186-gff7f2cfa959b
This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and openmp to llvmorg-14-init-10186-gff7f2cfa959b. PR: 261742 MFC after: 2 weeks
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp468
1 files changed, 46 insertions, 422 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index a3106ded1e38..cee56ee97294 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -11,8 +11,11 @@
//
//===----------------------------------------------------------------------===//
+#include "AMDGPUISelDAGToDAG.h"
#include "AMDGPU.h"
#include "AMDGPUTargetMachine.h"
+#include "MCTargetDesc/R600MCTargetDesc.h"
+#include "R600RegisterInfo.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -32,287 +35,12 @@
using namespace llvm;
-namespace llvm {
-
-class R600InstrInfo;
-
-} // end namespace llvm
-
//===----------------------------------------------------------------------===//
// Instruction Selector Implementation
//===----------------------------------------------------------------------===//
namespace {
-static bool isNullConstantOrUndef(SDValue V) {
- if (V.isUndef())
- return true;
-
- ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
- return Const != nullptr && Const->isNullValue();
-}
-
-static bool getConstantValue(SDValue N, uint32_t &Out) {
- // This is only used for packed vectors, where ussing 0 for undef should
- // always be good.
- if (N.isUndef()) {
- Out = 0;
- return true;
- }
-
- if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
- Out = C->getAPIntValue().getSExtValue();
- return true;
- }
-
- if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
- Out = C->getValueAPF().bitcastToAPInt().getSExtValue();
- return true;
- }
-
- return false;
-}
-
-// TODO: Handle undef as zero
-static SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG,
- bool Negate = false) {
- assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2);
- uint32_t LHSVal, RHSVal;
- if (getConstantValue(N->getOperand(0), LHSVal) &&
- getConstantValue(N->getOperand(1), RHSVal)) {
- SDLoc SL(N);
- uint32_t K = Negate ?
- (-LHSVal & 0xffff) | (-RHSVal << 16) :
- (LHSVal & 0xffff) | (RHSVal << 16);
- return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0),
- DAG.getTargetConstant(K, SL, MVT::i32));
- }
-
- return nullptr;
-}
-
-static SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) {
- return packConstantV2I16(N, DAG, true);
-}
-
-/// AMDGPU specific code to select AMDGPU machine instructions for
-/// SelectionDAG operations.
-class AMDGPUDAGToDAGISel : public SelectionDAGISel {
- // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
- // make the right decision when generating code for different targets.
- const GCNSubtarget *Subtarget;
-
- // Default FP mode for the current function.
- AMDGPU::SIModeRegisterDefaults Mode;
-
- bool EnableLateStructurizeCFG;
-
- // Instructions that will be lowered with a final instruction that zeros the
- // high result bits.
- bool fp16SrcZerosHighBits(unsigned Opc) const;
-
-public:
- explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
- CodeGenOpt::Level OptLevel = CodeGenOpt::Default)
- : SelectionDAGISel(*TM, OptLevel) {
- EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
- }
- ~AMDGPUDAGToDAGISel() override = default;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AMDGPUArgumentUsageInfo>();
- AU.addRequired<LegacyDivergenceAnalysis>();
-#ifdef EXPENSIVE_CHECKS
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
-#endif
- SelectionDAGISel::getAnalysisUsage(AU);
- }
-
- bool matchLoadD16FromBuildVector(SDNode *N) const;
-
- bool runOnMachineFunction(MachineFunction &MF) override;
- void PreprocessISelDAG() override;
- void Select(SDNode *N) override;
- StringRef getPassName() const override;
- void PostprocessISelDAG() override;
-
-protected:
- void SelectBuildVector(SDNode *N, unsigned RegClassID);
-
-private:
- std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
- bool isNoNanSrc(SDValue N) const;
- bool isInlineImmediate(const SDNode *N, bool Negated = false) const;
- bool isNegInlineImmediate(const SDNode *N) const {
- return isInlineImmediate(N, true);
- }
-
- bool isInlineImmediate16(int64_t Imm) const {
- return AMDGPU::isInlinableLiteral16(Imm, Subtarget->hasInv2PiInlineImm());
- }
-
- bool isInlineImmediate32(int64_t Imm) const {
- return AMDGPU::isInlinableLiteral32(Imm, Subtarget->hasInv2PiInlineImm());
- }
-
- bool isInlineImmediate64(int64_t Imm) const {
- return AMDGPU::isInlinableLiteral64(Imm, Subtarget->hasInv2PiInlineImm());
- }
-
- bool isInlineImmediate(const APFloat &Imm) const {
- return Subtarget->getInstrInfo()->isInlineConstant(Imm);
- }
-
- bool isVGPRImm(const SDNode *N) const;
- bool isUniformLoad(const SDNode *N) const;
- bool isUniformBr(const SDNode *N) const;
-
- bool isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
- SDValue &RHS) const;
-
- MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
-
- SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const;
- SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
- SDNode *glueCopyToM0LDSInit(SDNode *N) const;
-
- const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
- virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
- virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
- bool isDSOffsetLegal(SDValue Base, unsigned Offset) const;
- bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1,
- unsigned Size) const;
- bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
- bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
- SDValue &Offset1) const;
- bool SelectDS128Bit8ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
- SDValue &Offset1) const;
- bool SelectDSReadWrite2(SDValue Ptr, SDValue &Base, SDValue &Offset0,
- SDValue &Offset1, unsigned Size) const;
- bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
- SDValue &SOffset, SDValue &Offset, SDValue &Offen,
- SDValue &Idxen, SDValue &Addr64) const;
- bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
- SDValue &SOffset, SDValue &Offset) const;
- bool SelectMUBUFScratchOffen(SDNode *Parent,
- SDValue Addr, SDValue &RSrc, SDValue &VAddr,
- SDValue &SOffset, SDValue &ImmOffset) const;
- bool SelectMUBUFScratchOffset(SDNode *Parent,
- SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
- SDValue &Offset) const;
-
- bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
- SDValue &Offset) const;
-
- bool SelectFlatOffsetImpl(SDNode *N, SDValue Addr, SDValue &VAddr,
- SDValue &Offset, uint64_t FlatVariant) const;
- bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
- SDValue &Offset) const;
- bool SelectGlobalOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
- SDValue &Offset) const;
- bool SelectScratchOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
- SDValue &Offset) const;
- bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
- SDValue &VOffset, SDValue &Offset) const;
- bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
- SDValue &Offset) const;
-
- bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
- bool &Imm) const;
- SDValue Expand32BitAddress(SDValue Addr) const;
- bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
- bool &Imm) const;
- bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
- bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
- bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
- bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
- bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
- bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
-
- bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
- bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods,
- bool AllowAbs = true) const;
- bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
- bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
- bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
- bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
- SDValue &Clamp, SDValue &Omod) const;
- bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
- SDValue &Clamp, SDValue &Omod) const;
- bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
- SDValue &Clamp, SDValue &Omod) const;
-
- bool SelectVOP3OMods(SDValue In, SDValue &Src,
- SDValue &Clamp, SDValue &Omod) const;
-
- bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
-
- bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
-
- bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
- bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
- bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
-
- SDValue getHi16Elt(SDValue In) const;
-
- SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const;
-
- void SelectADD_SUB_I64(SDNode *N);
- void SelectAddcSubb(SDNode *N);
- void SelectUADDO_USUBO(SDNode *N);
- void SelectDIV_SCALE(SDNode *N);
- void SelectMAD_64_32(SDNode *N);
- void SelectFMA_W_CHAIN(SDNode *N);
- void SelectFMUL_W_CHAIN(SDNode *N);
-
- SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
- uint32_t Offset, uint32_t Width);
- void SelectS_BFEFromShifts(SDNode *N);
- void SelectS_BFE(SDNode *N);
- bool isCBranchSCC(const SDNode *N) const;
- void SelectBRCOND(SDNode *N);
- void SelectFMAD_FMA(SDNode *N);
- void SelectATOMIC_CMP_SWAP(SDNode *N);
- void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
- void SelectDS_GWS(SDNode *N, unsigned IntrID);
- void SelectInterpP1F16(SDNode *N);
- void SelectINTRINSIC_W_CHAIN(SDNode *N);
- void SelectINTRINSIC_WO_CHAIN(SDNode *N);
- void SelectINTRINSIC_VOID(SDNode *N);
-
-protected:
- // Include the pieces autogenerated from the target description.
-#include "AMDGPUGenDAGISel.inc"
-};
-
-class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
- const R600Subtarget *Subtarget;
-
- bool isConstantLoad(const MemSDNode *N, int cbID) const;
- bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
- bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
- SDValue& Offset);
-public:
- explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
- AMDGPUDAGToDAGISel(TM, OptLevel) {}
-
- void Select(SDNode *N) override;
-
- bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
- SDValue &Offset) override;
- bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
- SDValue &Offset) override;
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- void PreprocessISelDAG() override {}
-
-protected:
- // Include the pieces autogenerated from the target description.
-#include "R600GenDAGISel.inc"
-};
-
static SDValue stripBitcast(SDValue Val) {
return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
}
@@ -351,7 +79,7 @@ static bool isExtractHiElt(SDValue In, SDValue &Out) {
static SDValue stripExtractLoElt(SDValue In) {
if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) {
- if (Idx->isNullValue() && In.getValueSizeInBits() <= 32)
+ if (Idx->isZero() && In.getValueSizeInBits() <= 32)
return In.getOperand(0);
}
}
@@ -386,11 +114,11 @@ FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM,
return new AMDGPUDAGToDAGISel(TM, OptLevel);
}
-/// This pass converts a legalized DAG into a R600-specific
-// DAG, ready for instruction scheduling.
-FunctionPass *llvm::createR600ISelDag(TargetMachine *TM,
- CodeGenOpt::Level OptLevel) {
- return new R600DAGToDAGISel(TM, OptLevel);
+AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(
+ TargetMachine *TM /*= nullptr*/,
+ CodeGenOpt::Level OptLevel /*= CodeGenOpt::Default*/)
+ : SelectionDAGISel(*TM, OptLevel) {
+ EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
}
bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
@@ -468,6 +196,16 @@ bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(unsigned Opc) const {
}
}
+void AMDGPUDAGToDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AMDGPUArgumentUsageInfo>();
+ AU.addRequired<LegacyDivergenceAnalysis>();
+#ifdef EXPENSIVE_CHECKS
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+#endif
+ SelectionDAGISel::getAnalysisUsage(AU);
+}
+
bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const {
assert(Subtarget->d16PreservesUnusedBits());
MVT VT = N->getValueType(0).getSimpleVT();
@@ -903,8 +641,8 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
uint32_t OffsetVal = Offset->getZExtValue();
uint32_t WidthVal = Width->getZExtValue();
- ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
- SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
+ ReplaceNode(N, getBFE32(Signed, SDLoc(N), N->getOperand(0), OffsetVal,
+ WidthVal));
return;
}
case AMDGPUISD::DIV_SCALE: {
@@ -1207,7 +945,14 @@ void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
Ops[8] = N->getOperand(0);
Ops[9] = N->getOperand(4);
- CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32_e64, N->getVTList(), Ops);
+ // If there are no source modifiers, prefer fmac over fma because it can use
+ // the smaller VOP2 encoding.
+ bool UseFMAC = Subtarget->hasDLInsts() &&
+ cast<ConstantSDNode>(Ops[0])->isZero() &&
+ cast<ConstantSDNode>(Ops[2])->isZero() &&
+ cast<ConstantSDNode>(Ops[4])->isZero();
+ unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
+ CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), Ops);
}
void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
@@ -1707,7 +1452,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
!cast<ConstantSDNode>(Idxen)->getSExtValue() &&
!cast<ConstantSDNode>(Addr64)->getSExtValue()) {
uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
- APInt::getAllOnesValue(32).getZExtValue(); // Size
+ APInt::getAllOnes(32).getZExtValue(); // Size
SDLoc DL(Addr);
const SITargetLowering& Lowering =
@@ -2202,9 +1947,17 @@ bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
return true;
}
-SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
+SDNode *AMDGPUDAGToDAGISel::getBFE32(bool IsSigned, const SDLoc &DL,
SDValue Val, uint32_t Offset,
uint32_t Width) {
+ if (Val->isDivergent()) {
+ unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
+ SDValue Off = CurDAG->getTargetConstant(Offset, DL, MVT::i32);
+ SDValue W = CurDAG->getTargetConstant(Width, DL, MVT::i32);
+
+ return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, Off, W);
+ }
+ unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
// Transformation function, pack the offset and width of a BFE into
// the format expected by the S_BFE_I32 / S_BFE_U32. In the second
// source, bits [5:0] contain the offset and bits [22:16] the width.
@@ -2229,10 +1982,8 @@ void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
if (0 < BVal && BVal <= CVal && CVal < 32) {
bool Signed = N->getOpcode() == ISD::SRA;
- unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
-
- ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
- 32 - CVal));
+ ReplaceNode(N, getBFE32(Signed, SDLoc(N), Shl.getOperand(0), CVal - BVal,
+ 32 - CVal));
return;
}
}
@@ -2255,9 +2006,8 @@ void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
if (isMask_32(MaskVal)) {
uint32_t WidthVal = countPopulation(MaskVal);
-
- ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
- Srl.getOperand(0), ShiftVal, WidthVal));
+ ReplaceNode(N, getBFE32(false, SDLoc(N), Srl.getOperand(0), ShiftVal,
+ WidthVal));
return;
}
}
@@ -2277,9 +2027,8 @@ void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
if (isMask_32(MaskVal)) {
uint32_t WidthVal = countPopulation(MaskVal);
-
- ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
- And.getOperand(0), ShiftVal, WidthVal));
+ ReplaceNode(N, getBFE32(false, SDLoc(N), And.getOperand(0), ShiftVal,
+ WidthVal));
return;
}
}
@@ -2306,7 +2055,7 @@ void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
break;
unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
- ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
+ ReplaceNode(N, getBFE32(true, SDLoc(N), Src.getOperand(0),
Amt->getZExtValue(), Width));
return;
}
@@ -3111,128 +2860,3 @@ void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
CurDAG->RemoveDeadNodes();
} while (IsModified);
}
-
-bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
- Subtarget = &MF.getSubtarget<R600Subtarget>();
- return SelectionDAGISel::runOnMachineFunction(MF);
-}
-
-bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
- if (!N->readMem())
- return false;
- if (CbId == -1)
- return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
- N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
-
- return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
-}
-
-bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
- SDValue& IntPtr) {
- if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
- IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
- true);
- return true;
- }
- return false;
-}
-
-bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
- SDValue& BaseReg, SDValue &Offset) {
- if (!isa<ConstantSDNode>(Addr)) {
- BaseReg = Addr;
- Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
- return true;
- }
- return false;
-}
-
-void R600DAGToDAGISel::Select(SDNode *N) {
- unsigned int Opc = N->getOpcode();
- if (N->isMachineOpcode()) {
- N->setNodeId(-1);
- return; // Already selected.
- }
-
- switch (Opc) {
- default: break;
- case AMDGPUISD::BUILD_VERTICAL_VECTOR:
- case ISD::SCALAR_TO_VECTOR:
- case ISD::BUILD_VECTOR: {
- EVT VT = N->getValueType(0);
- unsigned NumVectorElts = VT.getVectorNumElements();
- unsigned RegClassID;
- // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
- // that adds a 128 bits reg copy when going through TwoAddressInstructions
- // pass. We want to avoid 128 bits copies as much as possible because they
- // can't be bundled by our scheduler.
- switch(NumVectorElts) {
- case 2: RegClassID = R600::R600_Reg64RegClassID; break;
- case 4:
- if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
- RegClassID = R600::R600_Reg128VerticalRegClassID;
- else
- RegClassID = R600::R600_Reg128RegClassID;
- break;
- default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
- }
- SelectBuildVector(N, RegClassID);
- return;
- }
- }
-
- SelectCode(N);
-}
-
-bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
- SDValue &Offset) {
- ConstantSDNode *C;
- SDLoc DL(Addr);
-
- if ((C = dyn_cast<ConstantSDNode>(Addr))) {
- Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
- Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
- } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
- (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
- Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
- Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
- } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
- (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
- Base = Addr.getOperand(0);
- Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
- } else {
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
- }
-
- return true;
-}
-
-bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
- SDValue &Offset) {
- ConstantSDNode *IMMOffset;
-
- if (Addr.getOpcode() == ISD::ADD
- && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
- && isInt<16>(IMMOffset->getZExtValue())) {
-
- Base = Addr.getOperand(0);
- Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
- MVT::i32);
- return true;
- // If the pointer address is constant, we can move it to the offset field.
- } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
- && isInt<16>(IMMOffset->getZExtValue())) {
- Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
- SDLoc(CurDAG->getEntryNode()),
- R600::ZERO, MVT::i32);
- Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
- MVT::i32);
- return true;
- }
-
- // Default case, no offset
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
- return true;
-}