diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2022-03-20 11:40:34 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2022-05-14 11:43:05 +0000 |
commit | 349cc55c9796c4596a5b9904cd3281af295f878f (patch) | |
tree | 410c5a785075730a35f1272ca6a7adf72222ad03 /contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | |
parent | cb2ae6163174b90e999326ecec3699ee093a5d43 (diff) | |
parent | c0981da47d5696fe36474fcf86b4ce03ae3ff818 (diff) | |
download | src-349cc55c9796c4596a5b9904cd3281af295f878f.tar.gz src-349cc55c9796c4596a5b9904cd3281af295f878f.zip |
Merge llvm-project main llvmorg-14-init-10186-gff7f2cfa959b
This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and
openmp to llvmorg-14-init-10186-gff7f2cfa959b.
PR: 261742
MFC after: 2 weeks
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 468 |
1 files changed, 46 insertions, 422 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index a3106ded1e38..cee56ee97294 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -11,8 +11,11 @@ // //===----------------------------------------------------------------------===// +#include "AMDGPUISelDAGToDAG.h" #include "AMDGPU.h" #include "AMDGPUTargetMachine.h" +#include "MCTargetDesc/R600MCTargetDesc.h" +#include "R600RegisterInfo.h" #include "SIMachineFunctionInfo.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/Analysis/ValueTracking.h" @@ -32,287 +35,12 @@ using namespace llvm; -namespace llvm { - -class R600InstrInfo; - -} // end namespace llvm - //===----------------------------------------------------------------------===// // Instruction Selector Implementation //===----------------------------------------------------------------------===// namespace { -static bool isNullConstantOrUndef(SDValue V) { - if (V.isUndef()) - return true; - - ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); - return Const != nullptr && Const->isNullValue(); -} - -static bool getConstantValue(SDValue N, uint32_t &Out) { - // This is only used for packed vectors, where ussing 0 for undef should - // always be good. - if (N.isUndef()) { - Out = 0; - return true; - } - - if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) { - Out = C->getAPIntValue().getSExtValue(); - return true; - } - - if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) { - Out = C->getValueAPF().bitcastToAPInt().getSExtValue(); - return true; - } - - return false; -} - -// TODO: Handle undef as zero -static SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG, - bool Negate = false) { - assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2); - uint32_t LHSVal, RHSVal; - if (getConstantValue(N->getOperand(0), LHSVal) && - getConstantValue(N->getOperand(1), RHSVal)) { - SDLoc SL(N); - uint32_t K = Negate ? - (-LHSVal & 0xffff) | (-RHSVal << 16) : - (LHSVal & 0xffff) | (RHSVal << 16); - return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0), - DAG.getTargetConstant(K, SL, MVT::i32)); - } - - return nullptr; -} - -static SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) { - return packConstantV2I16(N, DAG, true); -} - -/// AMDGPU specific code to select AMDGPU machine instructions for -/// SelectionDAG operations. -class AMDGPUDAGToDAGISel : public SelectionDAGISel { - // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can - // make the right decision when generating code for different targets. - const GCNSubtarget *Subtarget; - - // Default FP mode for the current function. - AMDGPU::SIModeRegisterDefaults Mode; - - bool EnableLateStructurizeCFG; - - // Instructions that will be lowered with a final instruction that zeros the - // high result bits. - bool fp16SrcZerosHighBits(unsigned Opc) const; - -public: - explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr, - CodeGenOpt::Level OptLevel = CodeGenOpt::Default) - : SelectionDAGISel(*TM, OptLevel) { - EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG; - } - ~AMDGPUDAGToDAGISel() override = default; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<AMDGPUArgumentUsageInfo>(); - AU.addRequired<LegacyDivergenceAnalysis>(); -#ifdef EXPENSIVE_CHECKS - AU.addRequired<DominatorTreeWrapperPass>(); - AU.addRequired<LoopInfoWrapperPass>(); -#endif - SelectionDAGISel::getAnalysisUsage(AU); - } - - bool matchLoadD16FromBuildVector(SDNode *N) const; - - bool runOnMachineFunction(MachineFunction &MF) override; - void PreprocessISelDAG() override; - void Select(SDNode *N) override; - StringRef getPassName() const override; - void PostprocessISelDAG() override; - -protected: - void SelectBuildVector(SDNode *N, unsigned RegClassID); - -private: - std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const; - bool isNoNanSrc(SDValue N) const; - bool isInlineImmediate(const SDNode *N, bool Negated = false) const; - bool isNegInlineImmediate(const SDNode *N) const { - return isInlineImmediate(N, true); - } - - bool isInlineImmediate16(int64_t Imm) const { - return AMDGPU::isInlinableLiteral16(Imm, Subtarget->hasInv2PiInlineImm()); - } - - bool isInlineImmediate32(int64_t Imm) const { - return AMDGPU::isInlinableLiteral32(Imm, Subtarget->hasInv2PiInlineImm()); - } - - bool isInlineImmediate64(int64_t Imm) const { - return AMDGPU::isInlinableLiteral64(Imm, Subtarget->hasInv2PiInlineImm()); - } - - bool isInlineImmediate(const APFloat &Imm) const { - return Subtarget->getInstrInfo()->isInlineConstant(Imm); - } - - bool isVGPRImm(const SDNode *N) const; - bool isUniformLoad(const SDNode *N) const; - bool isUniformBr(const SDNode *N) const; - - bool isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS, - SDValue &RHS) const; - - MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const; - - SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const; - SDNode *glueCopyToM0(SDNode *N, SDValue Val) const; - SDNode *glueCopyToM0LDSInit(SDNode *N) const; - - const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; - virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); - virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); - bool isDSOffsetLegal(SDValue Base, unsigned Offset) const; - bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1, - unsigned Size) const; - bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; - bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, - SDValue &Offset1) const; - bool SelectDS128Bit8ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, - SDValue &Offset1) const; - bool SelectDSReadWrite2(SDValue Ptr, SDValue &Base, SDValue &Offset0, - SDValue &Offset1, unsigned Size) const; - bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, - SDValue &SOffset, SDValue &Offset, SDValue &Offen, - SDValue &Idxen, SDValue &Addr64) const; - bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, - SDValue &SOffset, SDValue &Offset) const; - bool SelectMUBUFScratchOffen(SDNode *Parent, - SDValue Addr, SDValue &RSrc, SDValue &VAddr, - SDValue &SOffset, SDValue &ImmOffset) const; - bool SelectMUBUFScratchOffset(SDNode *Parent, - SDValue Addr, SDValue &SRsrc, SDValue &Soffset, - SDValue &Offset) const; - - bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, - SDValue &Offset) const; - - bool SelectFlatOffsetImpl(SDNode *N, SDValue Addr, SDValue &VAddr, - SDValue &Offset, uint64_t FlatVariant) const; - bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr, - SDValue &Offset) const; - bool SelectGlobalOffset(SDNode *N, SDValue Addr, SDValue &VAddr, - SDValue &Offset) const; - bool SelectScratchOffset(SDNode *N, SDValue Addr, SDValue &VAddr, - SDValue &Offset) const; - bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr, - SDValue &VOffset, SDValue &Offset) const; - bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr, - SDValue &Offset) const; - - bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, - bool &Imm) const; - SDValue Expand32BitAddress(SDValue Addr) const; - bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, - bool &Imm) const; - bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; - bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; - bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; - bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; - bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; - bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; - - bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const; - bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods, - bool AllowAbs = true) const; - bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; - bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; - bool SelectVOP3NoMods(SDValue In, SDValue &Src) const; - bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, - SDValue &Clamp, SDValue &Omod) const; - bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods, - SDValue &Clamp, SDValue &Omod) const; - bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, - SDValue &Clamp, SDValue &Omod) const; - - bool SelectVOP3OMods(SDValue In, SDValue &Src, - SDValue &Clamp, SDValue &Omod) const; - - bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; - - bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const; - - bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; - bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const; - bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; - - SDValue getHi16Elt(SDValue In) const; - - SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const; - - void SelectADD_SUB_I64(SDNode *N); - void SelectAddcSubb(SDNode *N); - void SelectUADDO_USUBO(SDNode *N); - void SelectDIV_SCALE(SDNode *N); - void SelectMAD_64_32(SDNode *N); - void SelectFMA_W_CHAIN(SDNode *N); - void SelectFMUL_W_CHAIN(SDNode *N); - - SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val, - uint32_t Offset, uint32_t Width); - void SelectS_BFEFromShifts(SDNode *N); - void SelectS_BFE(SDNode *N); - bool isCBranchSCC(const SDNode *N) const; - void SelectBRCOND(SDNode *N); - void SelectFMAD_FMA(SDNode *N); - void SelectATOMIC_CMP_SWAP(SDNode *N); - void SelectDSAppendConsume(SDNode *N, unsigned IntrID); - void SelectDS_GWS(SDNode *N, unsigned IntrID); - void SelectInterpP1F16(SDNode *N); - void SelectINTRINSIC_W_CHAIN(SDNode *N); - void SelectINTRINSIC_WO_CHAIN(SDNode *N); - void SelectINTRINSIC_VOID(SDNode *N); - -protected: - // Include the pieces autogenerated from the target description. -#include "AMDGPUGenDAGISel.inc" -}; - -class R600DAGToDAGISel : public AMDGPUDAGToDAGISel { - const R600Subtarget *Subtarget; - - bool isConstantLoad(const MemSDNode *N, int cbID) const; - bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); - bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, - SDValue& Offset); -public: - explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) : - AMDGPUDAGToDAGISel(TM, OptLevel) {} - - void Select(SDNode *N) override; - - bool SelectADDRIndirect(SDValue Addr, SDValue &Base, - SDValue &Offset) override; - bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, - SDValue &Offset) override; - - bool runOnMachineFunction(MachineFunction &MF) override; - - void PreprocessISelDAG() override {} - -protected: - // Include the pieces autogenerated from the target description. -#include "R600GenDAGISel.inc" -}; - static SDValue stripBitcast(SDValue Val) { return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val; } @@ -351,7 +79,7 @@ static bool isExtractHiElt(SDValue In, SDValue &Out) { static SDValue stripExtractLoElt(SDValue In) { if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) { - if (Idx->isNullValue() && In.getValueSizeInBits() <= 32) + if (Idx->isZero() && In.getValueSizeInBits() <= 32) return In.getOperand(0); } } @@ -386,11 +114,11 @@ FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM, return new AMDGPUDAGToDAGISel(TM, OptLevel); } -/// This pass converts a legalized DAG into a R600-specific -// DAG, ready for instruction scheduling. -FunctionPass *llvm::createR600ISelDag(TargetMachine *TM, - CodeGenOpt::Level OptLevel) { - return new R600DAGToDAGISel(TM, OptLevel); +AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel( + TargetMachine *TM /*= nullptr*/, + CodeGenOpt::Level OptLevel /*= CodeGenOpt::Default*/) + : SelectionDAGISel(*TM, OptLevel) { + EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG; } bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { @@ -468,6 +196,16 @@ bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(unsigned Opc) const { } } +void AMDGPUDAGToDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<AMDGPUArgumentUsageInfo>(); + AU.addRequired<LegacyDivergenceAnalysis>(); +#ifdef EXPENSIVE_CHECKS + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<LoopInfoWrapperPass>(); +#endif + SelectionDAGISel::getAnalysisUsage(AU); +} + bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const { assert(Subtarget->d16PreservesUnusedBits()); MVT VT = N->getValueType(0).getSimpleVT(); @@ -903,8 +641,8 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { uint32_t OffsetVal = Offset->getZExtValue(); uint32_t WidthVal = Width->getZExtValue(); - ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, - SDLoc(N), N->getOperand(0), OffsetVal, WidthVal)); + ReplaceNode(N, getBFE32(Signed, SDLoc(N), N->getOperand(0), OffsetVal, + WidthVal)); return; } case AMDGPUISD::DIV_SCALE: { @@ -1207,7 +945,14 @@ void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) { Ops[8] = N->getOperand(0); Ops[9] = N->getOperand(4); - CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32_e64, N->getVTList(), Ops); + // If there are no source modifiers, prefer fmac over fma because it can use + // the smaller VOP2 encoding. + bool UseFMAC = Subtarget->hasDLInsts() && + cast<ConstantSDNode>(Ops[0])->isZero() && + cast<ConstantSDNode>(Ops[2])->isZero() && + cast<ConstantSDNode>(Ops[4])->isZero(); + unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64; + CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), Ops); } void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) { @@ -1707,7 +1452,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, !cast<ConstantSDNode>(Idxen)->getSExtValue() && !cast<ConstantSDNode>(Addr64)->getSExtValue()) { uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | - APInt::getAllOnesValue(32).getZExtValue(); // Size + APInt::getAllOnes(32).getZExtValue(); // Size SDLoc DL(Addr); const SITargetLowering& Lowering = @@ -2202,9 +1947,17 @@ bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index, return true; } -SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL, +SDNode *AMDGPUDAGToDAGISel::getBFE32(bool IsSigned, const SDLoc &DL, SDValue Val, uint32_t Offset, uint32_t Width) { + if (Val->isDivergent()) { + unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64; + SDValue Off = CurDAG->getTargetConstant(Offset, DL, MVT::i32); + SDValue W = CurDAG->getTargetConstant(Width, DL, MVT::i32); + + return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, Off, W); + } + unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; // Transformation function, pack the offset and width of a BFE into // the format expected by the S_BFE_I32 / S_BFE_U32. In the second // source, bits [5:0] contain the offset and bits [22:16] the width. @@ -2229,10 +1982,8 @@ void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { if (0 < BVal && BVal <= CVal && CVal < 32) { bool Signed = N->getOpcode() == ISD::SRA; - unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; - - ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal, - 32 - CVal)); + ReplaceNode(N, getBFE32(Signed, SDLoc(N), Shl.getOperand(0), CVal - BVal, + 32 - CVal)); return; } } @@ -2255,9 +2006,8 @@ void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { if (isMask_32(MaskVal)) { uint32_t WidthVal = countPopulation(MaskVal); - - ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), - Srl.getOperand(0), ShiftVal, WidthVal)); + ReplaceNode(N, getBFE32(false, SDLoc(N), Srl.getOperand(0), ShiftVal, + WidthVal)); return; } } @@ -2277,9 +2027,8 @@ void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { if (isMask_32(MaskVal)) { uint32_t WidthVal = countPopulation(MaskVal); - - ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), - And.getOperand(0), ShiftVal, WidthVal)); + ReplaceNode(N, getBFE32(false, SDLoc(N), And.getOperand(0), ShiftVal, + WidthVal)); return; } } @@ -2306,7 +2055,7 @@ void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { break; unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); - ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0), + ReplaceNode(N, getBFE32(true, SDLoc(N), Src.getOperand(0), Amt->getZExtValue(), Width)); return; } @@ -3111,128 +2860,3 @@ void AMDGPUDAGToDAGISel::PostprocessISelDAG() { CurDAG->RemoveDeadNodes(); } while (IsModified); } - -bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { - Subtarget = &MF.getSubtarget<R600Subtarget>(); - return SelectionDAGISel::runOnMachineFunction(MF); -} - -bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { - if (!N->readMem()) - return false; - if (CbId == -1) - return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS || - N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT; - - return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId; -} - -bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, - SDValue& IntPtr) { - if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { - IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), - true); - return true; - } - return false; -} - -bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, - SDValue& BaseReg, SDValue &Offset) { - if (!isa<ConstantSDNode>(Addr)) { - BaseReg = Addr; - Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); - return true; - } - return false; -} - -void R600DAGToDAGISel::Select(SDNode *N) { - unsigned int Opc = N->getOpcode(); - if (N->isMachineOpcode()) { - N->setNodeId(-1); - return; // Already selected. - } - - switch (Opc) { - default: break; - case AMDGPUISD::BUILD_VERTICAL_VECTOR: - case ISD::SCALAR_TO_VECTOR: - case ISD::BUILD_VECTOR: { - EVT VT = N->getValueType(0); - unsigned NumVectorElts = VT.getVectorNumElements(); - unsigned RegClassID; - // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG - // that adds a 128 bits reg copy when going through TwoAddressInstructions - // pass. We want to avoid 128 bits copies as much as possible because they - // can't be bundled by our scheduler. - switch(NumVectorElts) { - case 2: RegClassID = R600::R600_Reg64RegClassID; break; - case 4: - if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) - RegClassID = R600::R600_Reg128VerticalRegClassID; - else - RegClassID = R600::R600_Reg128RegClassID; - break; - default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); - } - SelectBuildVector(N, RegClassID); - return; - } - } - - SelectCode(N); -} - -bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, - SDValue &Offset) { - ConstantSDNode *C; - SDLoc DL(Addr); - - if ((C = dyn_cast<ConstantSDNode>(Addr))) { - Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32); - Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); - } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && - (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { - Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32); - Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); - } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && - (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { - Base = Addr.getOperand(0); - Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); - } else { - Base = Addr; - Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); - } - - return true; -} - -bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, - SDValue &Offset) { - ConstantSDNode *IMMOffset; - - if (Addr.getOpcode() == ISD::ADD - && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) - && isInt<16>(IMMOffset->getZExtValue())) { - - Base = Addr.getOperand(0); - Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), - MVT::i32); - return true; - // If the pointer address is constant, we can move it to the offset field. - } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) - && isInt<16>(IMMOffset->getZExtValue())) { - Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), - SDLoc(CurDAG->getEntryNode()), - R600::ZERO, MVT::i32); - Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), - MVT::i32); - return true; - } - - // Default case, no offset - Base = Addr; - Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); - return true; -} |