aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp601
1 files changed, 384 insertions, 217 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index aaf448346b53..340f4ac6f57a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -12,48 +12,21 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUArgumentUsageInfo.h"
-#include "AMDGPUISelLowering.h" // For AMDGPUISD
-#include "AMDGPUInstrInfo.h"
-#include "AMDGPUPerfHintAnalysis.h"
-#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "SIDefines.h"
-#include "SIISelLowering.h"
-#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
-#include "SIRegisterInfo.h"
-#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
-#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
-#include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/InitializePasses.h"
+
#ifdef EXPENSIVE_CHECKS
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/Dominators.h"
#endif
-#include "llvm/IR/Instruction.h"
-#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/CodeGen.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
-#include "llvm/Support/MathExtras.h"
-#include <cassert>
-#include <cstdint>
-#include <new>
-#include <vector>
#define DEBUG_TYPE "isel"
@@ -191,6 +164,9 @@ private:
bool isUniformLoad(const SDNode *N) const;
bool isUniformBr(const SDNode *N) const;
+ bool isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
+ SDValue &RHS) const;
+
MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const;
@@ -200,11 +176,16 @@ private:
const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
- bool isDSOffsetLegal(SDValue Base, unsigned Offset,
- unsigned OffsetBits) const;
+ bool isDSOffsetLegal(SDValue Base, unsigned Offset) const;
+ bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1,
+ unsigned Size) const;
bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
SDValue &Offset1) const;
+ bool SelectDS128Bit8ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
+ SDValue &Offset1) const;
+ bool SelectDSReadWrite2(SDValue Ptr, SDValue &Base, SDValue &Offset0,
+ SDValue &Offset1, unsigned Size) const;
bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
SDValue &SOffset, SDValue &Offset, SDValue &Offen,
SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
@@ -233,11 +214,11 @@ private:
template <bool IsSigned>
bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
- SDValue &Offset, SDValue &SLC) const;
- bool SelectFlatAtomic(SDNode *N, SDValue Addr, SDValue &VAddr,
- SDValue &Offset, SDValue &SLC) const;
- bool SelectFlatAtomicSigned(SDNode *N, SDValue Addr, SDValue &VAddr,
- SDValue &Offset, SDValue &SLC) const;
+ SDValue &Offset) const;
+ bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
+ SDValue &VOffset, SDValue &Offset) const;
+ bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
+ SDValue &Offset) const;
bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
bool &Imm) const;
@@ -252,11 +233,15 @@ private:
bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
- bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
+ bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods,
+ bool AllowAbs = true) const;
bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Clamp, SDValue &Omod) const;
+ bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
+ SDValue &Clamp, SDValue &Omod) const;
bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Clamp, SDValue &Omod) const;
@@ -519,8 +504,8 @@ bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
return true;
// TODO: Move into isKnownNeverNaN
- if (N->getFlags().isDefined())
- return N->getFlags().hasNoNaNs();
+ if (N->getFlags().hasNoNaNs())
+ return true;
return CurDAG->isKnownNeverNaN(N);
}
@@ -557,8 +542,8 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
unsigned OpNo) const {
if (!N->isMachineOpcode()) {
if (N->getOpcode() == ISD::CopyToReg) {
- unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
- if (Register::isVirtualRegister(Reg)) {
+ Register Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
+ if (Reg.isVirtual()) {
MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
return MRI.getRegClass(Reg);
}
@@ -716,8 +701,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
(Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
Opc == ISD::ATOMIC_LOAD_FADD ||
Opc == AMDGPUISD::ATOMIC_LOAD_FMIN ||
- Opc == AMDGPUISD::ATOMIC_LOAD_FMAX ||
- Opc == AMDGPUISD::ATOMIC_LOAD_CSUB)) {
+ Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) {
N = glueCopyToM0LDSInit(N);
SelectCode(N);
return;
@@ -920,6 +904,53 @@ bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
Term->getMetadata("structurizecfg.uniform");
}
+static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr,
+ SDValue &N0, SDValue &N1) {
+ if (Addr.getValueType() == MVT::i64 && Addr.getOpcode() == ISD::BITCAST &&
+ Addr.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
+ // As we split 64-bit `or` earlier, it's complicated pattern to match, i.e.
+ // (i64 (bitcast (v2i32 (build_vector
+ // (or (extract_vector_elt V, 0), OFFSET),
+ // (extract_vector_elt V, 1)))))
+ SDValue Lo = Addr.getOperand(0).getOperand(0);
+ if (Lo.getOpcode() == ISD::OR && DAG.isBaseWithConstantOffset(Lo)) {
+ SDValue BaseLo = Lo.getOperand(0);
+ SDValue BaseHi = Addr.getOperand(0).getOperand(1);
+ // Check that split base (Lo and Hi) are extracted from the same one.
+ if (BaseLo.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ BaseHi.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ BaseLo.getOperand(0) == BaseHi.getOperand(0) &&
+ // Lo is statically extracted from index 0.
+ isa<ConstantSDNode>(BaseLo.getOperand(1)) &&
+ BaseLo.getConstantOperandVal(1) == 0 &&
+ // Hi is statically extracted from index 0.
+ isa<ConstantSDNode>(BaseHi.getOperand(1)) &&
+ BaseHi.getConstantOperandVal(1) == 1) {
+ N0 = BaseLo.getOperand(0).getOperand(0);
+ N1 = Lo.getOperand(1);
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+bool AMDGPUDAGToDAGISel::isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
+ SDValue &RHS) const {
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ LHS = Addr.getOperand(0);
+ RHS = Addr.getOperand(1);
+ return true;
+ }
+
+ if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, LHS, RHS)) {
+ assert(LHS && RHS && isa<ConstantSDNode>(RHS));
+ return true;
+ }
+
+ return false;
+}
+
StringRef AMDGPUDAGToDAGISel::getPassName() const {
return "AMDGPU DAG->DAG Pattern Instruction Selection";
}
@@ -994,7 +1025,7 @@ void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
static const unsigned OpcMap[2][2][2] = {
{{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
- {AMDGPU::V_SUB_I32_e32, AMDGPU::V_ADD_I32_e32}},
+ {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
{{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
{AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
@@ -1073,7 +1104,7 @@ void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
}
if (IsVALU) {
- unsigned Opc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
+ unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
CurDAG->SelectNodeTo(
N, Opc, N->getVTList(),
@@ -1099,7 +1130,7 @@ void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
Ops[8] = N->getOperand(0);
Ops[9] = N->getOperand(4);
- CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops);
+ CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32_e64, N->getVTList(), Ops);
}
void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
@@ -1124,9 +1155,14 @@ void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
assert(VT == MVT::f32 || VT == MVT::f64);
unsigned Opc
- = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
+ = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
- SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
+ // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
+ // omod
+ SDValue Ops[8];
+ SelectVOP3BMods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
+ SelectVOP3BMods(N->getOperand(1), Ops[3], Ops[2]);
+ SelectVOP3BMods(N->getOperand(2), Ops[5], Ops[4]);
CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
}
@@ -1135,7 +1171,7 @@ void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
SDLoc SL(N);
bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
- unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32;
+ unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
@@ -1143,13 +1179,11 @@ void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
}
-bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset,
- unsigned OffsetBits) const {
- if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
- (OffsetBits == 8 && !isUInt<8>(Offset)))
+bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset) const {
+ if (!isUInt<16>(Offset))
return false;
- if (Subtarget->hasUsableDSOffset() ||
+ if (!Base || Subtarget->hasUsableDSOffset() ||
Subtarget->unsafeDSOffsetFoldingEnabled())
return true;
@@ -1165,7 +1199,7 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
SDValue N0 = Addr.getOperand(0);
SDValue N1 = Addr.getOperand(1);
ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
- if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
+ if (isDSOffsetLegal(N0, C1->getSExtValue())) {
// (add n0, c0)
Base = N0;
Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
@@ -1175,7 +1209,7 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
// sub C, x -> add (sub 0, x), C
if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
int64_t ByteOffset = C->getSExtValue();
- if (isUInt<16>(ByteOffset)) {
+ if (isDSOffsetLegal(SDValue(), ByteOffset)) {
SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
// XXX - This is kind of hacky. Create a dummy sub node so we can check
@@ -1184,13 +1218,13 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
Zero, Addr.getOperand(1));
- if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
+ if (isDSOffsetLegal(Sub, ByteOffset)) {
SmallVector<SDValue, 3> Opnds;
Opnds.push_back(Zero);
Opnds.push_back(Addr.getOperand(1));
// FIXME: Select to VOP3 version for with-carry.
- unsigned SubOp = AMDGPU::V_SUB_I32_e32;
+ unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
if (Subtarget->hasAddNoCarry()) {
SubOp = AMDGPU::V_SUB_U32_e64;
Opnds.push_back(
@@ -1214,7 +1248,7 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
SDLoc DL(Addr);
- if (isUInt<16>(CAddr->getZExtValue())) {
+ if (isDSOffsetLegal(SDValue(), CAddr->getZExtValue())) {
SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
DL, MVT::i32, Zero);
@@ -1230,75 +1264,104 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
return true;
}
+bool AMDGPUDAGToDAGISel::isDSOffset2Legal(SDValue Base, unsigned Offset0,
+ unsigned Offset1,
+ unsigned Size) const {
+ if (Offset0 % Size != 0 || Offset1 % Size != 0)
+ return false;
+ if (!isUInt<8>(Offset0 / Size) || !isUInt<8>(Offset1 / Size))
+ return false;
+
+ if (!Base || Subtarget->hasUsableDSOffset() ||
+ Subtarget->unsafeDSOffsetFoldingEnabled())
+ return true;
+
+ // On Southern Islands instruction with a negative base value and an offset
+ // don't seem to work.
+ return CurDAG->SignBitIsZero(Base);
+}
+
// TODO: If offset is too big, put low 16-bit into offset.
bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
SDValue &Offset0,
SDValue &Offset1) const {
+ return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 4);
+}
+
+bool AMDGPUDAGToDAGISel::SelectDS128Bit8ByteAligned(SDValue Addr, SDValue &Base,
+ SDValue &Offset0,
+ SDValue &Offset1) const {
+ return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 8);
+}
+
+bool AMDGPUDAGToDAGISel::SelectDSReadWrite2(SDValue Addr, SDValue &Base,
+ SDValue &Offset0, SDValue &Offset1,
+ unsigned Size) const {
SDLoc DL(Addr);
if (CurDAG->isBaseWithConstantOffset(Addr)) {
SDValue N0 = Addr.getOperand(0);
SDValue N1 = Addr.getOperand(1);
ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
- unsigned DWordOffset0 = C1->getZExtValue() / 4;
- unsigned DWordOffset1 = DWordOffset0 + 1;
+ unsigned OffsetValue0 = C1->getZExtValue();
+ unsigned OffsetValue1 = OffsetValue0 + Size;
+
// (add n0, c0)
- if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
+ if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1, Size)) {
Base = N0;
- Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
- Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
+ Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
+ Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
return true;
}
} else if (Addr.getOpcode() == ISD::SUB) {
// sub C, x -> add (sub 0, x), C
- if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
- unsigned DWordOffset0 = C->getZExtValue() / 4;
- unsigned DWordOffset1 = DWordOffset0 + 1;
+ if (const ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
+ unsigned OffsetValue0 = C->getZExtValue();
+ unsigned OffsetValue1 = OffsetValue0 + Size;
- if (isUInt<8>(DWordOffset0)) {
+ if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
SDLoc DL(Addr);
SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
// XXX - This is kind of hacky. Create a dummy sub node so we can check
// the known bits in isDSOffsetLegal. We need to emit the selected node
// here, so this is thrown away.
- SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
- Zero, Addr.getOperand(1));
+ SDValue Sub =
+ CurDAG->getNode(ISD::SUB, DL, MVT::i32, Zero, Addr.getOperand(1));
- if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
+ if (isDSOffset2Legal(Sub, OffsetValue0, OffsetValue1, Size)) {
SmallVector<SDValue, 3> Opnds;
Opnds.push_back(Zero);
Opnds.push_back(Addr.getOperand(1));
- unsigned SubOp = AMDGPU::V_SUB_I32_e32;
+ unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
if (Subtarget->hasAddNoCarry()) {
SubOp = AMDGPU::V_SUB_U32_e64;
Opnds.push_back(
CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
}
- MachineSDNode *MachineSub
- = CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
+ MachineSDNode *MachineSub = CurDAG->getMachineNode(
+ SubOp, DL, MVT::getIntegerVT(Size * 8), Opnds);
Base = SDValue(MachineSub, 0);
- Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
- Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
+ Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
+ Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
return true;
}
}
}
} else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
- unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
- unsigned DWordOffset1 = DWordOffset0 + 1;
- assert(4 * DWordOffset0 == CAddr->getZExtValue());
+ unsigned OffsetValue0 = CAddr->getZExtValue();
+ unsigned OffsetValue1 = OffsetValue0 + Size;
- if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
+ if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
- MachineSDNode *MovZero
- = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
- DL, MVT::i32, Zero);
+ MachineSDNode *MovZero =
+ CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32, Zero);
Base = SDValue(MovZero, 0);
- Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
- Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
+ Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
+ Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
return true;
}
}
@@ -1454,22 +1517,16 @@ static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
SDLoc DL(N);
- const MachineFunction &MF = CurDAG->getMachineFunction();
- const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
-
- if (auto FI = dyn_cast<FrameIndexSDNode>(N)) {
- SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
- FI->getValueType(0));
- // If we can resolve this to a frame index access, this will be relative to
- // either the stack or frame pointer SGPR.
- return std::make_pair(
- TFI, CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32));
- }
+ auto *FI = dyn_cast<FrameIndexSDNode>(N);
+ SDValue TFI =
+ FI ? CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) : N;
- // If we don't know this private access is a local stack object, it needs to
- // be relative to the entry point's scratch wave offset.
- return std::make_pair(N, CurDAG->getTargetConstant(0, DL, MVT::i32));
+ // We rebase the base address into an absolute stack address and hence
+ // use constant 0 for soffset. This value must be retained until
+ // frame elimination and eliminateFrameIndex will choose the appropriate
+ // frame register if need be.
+ return std::make_pair(TFI, CurDAG->getTargetConstant(0, DL, MVT::i32));
}
bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
@@ -1628,155 +1685,245 @@ static MemSDNode* findMemSDNode(SDNode *N) {
llvm_unreachable("cannot find MemSDNode in the pattern!");
}
-static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr,
- SDValue &N0, SDValue &N1) {
- if (Addr.getValueType() == MVT::i64 && Addr.getOpcode() == ISD::BITCAST &&
- Addr.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
- // As we split 64-bit `or` earlier, it's complicated pattern to match, i.e.
- // (i64 (bitcast (v2i32 (build_vector
- // (or (extract_vector_elt V, 0), OFFSET),
- // (extract_vector_elt V, 1)))))
- SDValue Lo = Addr.getOperand(0).getOperand(0);
- if (Lo.getOpcode() == ISD::OR && DAG.isBaseWithConstantOffset(Lo)) {
- SDValue BaseLo = Lo.getOperand(0);
- SDValue BaseHi = Addr.getOperand(0).getOperand(1);
- // Check that split base (Lo and Hi) are extracted from the same one.
- if (BaseLo.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- BaseHi.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- BaseLo.getOperand(0) == BaseHi.getOperand(0) &&
- // Lo is statically extracted from index 0.
- isa<ConstantSDNode>(BaseLo.getOperand(1)) &&
- BaseLo.getConstantOperandVal(1) == 0 &&
- // Hi is statically extracted from index 0.
- isa<ConstantSDNode>(BaseHi.getOperand(1)) &&
- BaseHi.getConstantOperandVal(1) == 1) {
- N0 = BaseLo.getOperand(0).getOperand(0);
- N1 = Lo.getOperand(1);
- return true;
- }
- }
- }
- return false;
-}
-
template <bool IsSigned>
bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N,
SDValue Addr,
SDValue &VAddr,
- SDValue &Offset,
- SDValue &SLC) const {
+ SDValue &Offset) const {
int64_t OffsetVal = 0;
+ unsigned AS = findMemSDNode(N)->getAddressSpace();
+
if (Subtarget->hasFlatInstOffsets() &&
(!Subtarget->hasFlatSegmentOffsetBug() ||
- findMemSDNode(N)->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS)) {
+ AS != AMDGPUAS::FLAT_ADDRESS)) {
SDValue N0, N1;
- if (CurDAG->isBaseWithConstantOffset(Addr)) {
- N0 = Addr.getOperand(0);
- N1 = Addr.getOperand(1);
- } else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) {
- assert(N0 && N1 && isa<ConstantSDNode>(N1));
- }
- if (N0 && N1) {
+ if (isBaseWithConstantOffset64(Addr, N0, N1)) {
uint64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
const SIInstrInfo *TII = Subtarget->getInstrInfo();
- unsigned AS = findMemSDNode(N)->getAddressSpace();
if (TII->isLegalFLATOffset(COffsetVal, AS, IsSigned)) {
Addr = N0;
OffsetVal = COffsetVal;
} else {
// If the offset doesn't fit, put the low bits into the offset field and
// add the rest.
+ //
+ // For a FLAT instruction the hardware decides whether to access
+ // global/scratch/shared memory based on the high bits of vaddr,
+ // ignoring the offset field, so we have to ensure that when we add
+ // remainder to vaddr it still points into the same underlying object.
+ // The easiest way to do that is to make sure that we split the offset
+ // into two pieces that are both >= 0 or both <= 0.
SDLoc DL(N);
- uint64_t ImmField;
- const unsigned NumBits = TII->getNumFlatOffsetBits(AS, IsSigned);
- if (IsSigned) {
- ImmField = SignExtend64(COffsetVal, NumBits);
-
- // Don't use a negative offset field if the base offset is positive.
- // Since the scheduler currently relies on the offset field, doing so
- // could result in strange scheduling decisions.
-
- // TODO: Should we not do this in the opposite direction as well?
- if (static_cast<int64_t>(COffsetVal) > 0) {
- if (static_cast<int64_t>(ImmField) < 0) {
- const uint64_t OffsetMask =
- maskTrailingOnes<uint64_t>(NumBits - 1);
- ImmField = COffsetVal & OffsetMask;
- }
- }
- } else {
- // TODO: Should we do this for a negative offset?
- const uint64_t OffsetMask = maskTrailingOnes<uint64_t>(NumBits);
- ImmField = COffsetVal & OffsetMask;
- }
+ uint64_t RemainderOffset;
- uint64_t RemainderOffset = COffsetVal - ImmField;
+ std::tie(OffsetVal, RemainderOffset)
+ = TII->splitFlatOffset(COffsetVal, AS, IsSigned);
- assert(TII->isLegalFLATOffset(ImmField, AS, IsSigned));
- assert(RemainderOffset + ImmField == COffsetVal);
-
- OffsetVal = ImmField;
+ SDValue AddOffsetLo =
+ getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
+ SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
- // TODO: Should this try to use a scalar add pseudo if the base address
- // is uniform and saddr is usable?
- SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
- SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
+ if (Addr.getValueType().getSizeInBits() == 32) {
+ SmallVector<SDValue, 3> Opnds;
+ Opnds.push_back(N0);
+ Opnds.push_back(AddOffsetLo);
+ unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
+ if (Subtarget->hasAddNoCarry()) {
+ AddOp = AMDGPU::V_ADD_U32_e64;
+ Opnds.push_back(Clamp);
+ }
+ Addr = SDValue(CurDAG->getMachineNode(AddOp, DL, MVT::i32, Opnds), 0);
+ } else {
+ // TODO: Should this try to use a scalar add pseudo if the base address
+ // is uniform and saddr is usable?
+ SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
+ SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
- SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
- MVT::i32, N0, Sub0);
- SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
- MVT::i32, N0, Sub1);
+ SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ DL, MVT::i32, N0, Sub0);
+ SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ DL, MVT::i32, N0, Sub1);
- SDValue AddOffsetLo =
- getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
- SDValue AddOffsetHi =
- getMaterializedScalarImm32(Hi_32(RemainderOffset), DL);
+ SDValue AddOffsetHi =
+ getMaterializedScalarImm32(Hi_32(RemainderOffset), DL);
- SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i1);
- SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
+ SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i1);
- SDNode *Add =
- CurDAG->getMachineNode(AMDGPU::V_ADD_I32_e64, DL, VTs,
- {AddOffsetLo, SDValue(N0Lo, 0), Clamp});
+ SDNode *Add =
+ CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64, DL, VTs,
+ {AddOffsetLo, SDValue(N0Lo, 0), Clamp});
- SDNode *Addc = CurDAG->getMachineNode(
- AMDGPU::V_ADDC_U32_e64, DL, VTs,
- {AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp});
+ SDNode *Addc = CurDAG->getMachineNode(
+ AMDGPU::V_ADDC_U32_e64, DL, VTs,
+ {AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp});
- SDValue RegSequenceArgs[] = {
- CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32),
- SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1};
+ SDValue RegSequenceArgs[] = {
+ CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32),
+ SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1};
- Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
- MVT::i64, RegSequenceArgs),
- 0);
+ Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
+ MVT::i64, RegSequenceArgs),
+ 0);
+ }
}
}
}
VAddr = Addr;
Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
- SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
return true;
}
-bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDNode *N,
- SDValue Addr,
- SDValue &VAddr,
- SDValue &Offset,
- SDValue &SLC) const {
- return SelectFlatOffset<false>(N, Addr, VAddr, Offset, SLC);
+// If this matches zero_extend i32:x, return x
+static SDValue matchZExtFromI32(SDValue Op) {
+ if (Op.getOpcode() != ISD::ZERO_EXTEND)
+ return SDValue();
+
+ SDValue ExtSrc = Op.getOperand(0);
+ return (ExtSrc.getValueType() == MVT::i32) ? ExtSrc : SDValue();
+}
+
+// Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset)
+bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
+ SDValue Addr,
+ SDValue &SAddr,
+ SDValue &VOffset,
+ SDValue &Offset) const {
+ int64_t ImmOffset = 0;
+
+ // Match the immediate offset first, which canonically is moved as low as
+ // possible.
+
+ SDValue LHS, RHS;
+ if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
+ int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
+ const SIInstrInfo *TII = Subtarget->getInstrInfo();
+
+ if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, true)) {
+ Addr = LHS;
+ ImmOffset = COffsetVal;
+ } else if (!LHS->isDivergent() && COffsetVal > 0) {
+ SDLoc SL(N);
+ // saddr + large_offset -> saddr + (voffset = large_offset & ~MaxOffset) +
+ // (large_offset & MaxOffset);
+ int64_t SplitImmOffset, RemainderOffset;
+ std::tie(SplitImmOffset, RemainderOffset)
+ = TII->splitFlatOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, true);
+
+ if (isUInt<32>(RemainderOffset)) {
+ SDNode *VMov = CurDAG->getMachineNode(
+ AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
+ CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
+ VOffset = SDValue(VMov, 0);
+ SAddr = LHS;
+ Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
+ return true;
+ }
+ }
+ }
+
+ // Match the variable offset.
+ if (Addr.getOpcode() != ISD::ADD) {
+ if (Addr->isDivergent() || Addr.getOpcode() == ISD::UNDEF ||
+ isa<ConstantSDNode>(Addr))
+ return false;
+
+ // It's cheaper to materialize a single 32-bit zero for vaddr than the two
+ // moves required to copy a 64-bit SGPR to VGPR.
+ SAddr = Addr;
+ SDNode *VMov = CurDAG->getMachineNode(
+ AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,
+ CurDAG->getTargetConstant(0, SDLoc(), MVT::i32));
+ VOffset = SDValue(VMov, 0);
+ Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
+ return true;
+ }
+
+ LHS = Addr.getOperand(0);
+ RHS = Addr.getOperand(1);
+
+ if (!LHS->isDivergent()) {
+ // add (i64 sgpr), (zero_extend (i32 vgpr))
+ if (SDValue ZextRHS = matchZExtFromI32(RHS)) {
+ SAddr = LHS;
+ VOffset = ZextRHS;
+ }
+ }
+
+ if (!SAddr && !RHS->isDivergent()) {
+ // add (zero_extend (i32 vgpr)), (i64 sgpr)
+ if (SDValue ZextLHS = matchZExtFromI32(LHS)) {
+ SAddr = RHS;
+ VOffset = ZextLHS;
+ }
+ }
+
+ if (!SAddr)
+ return false;
+
+ Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
+ return true;
}
-bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDNode *N,
- SDValue Addr,
- SDValue &VAddr,
- SDValue &Offset,
- SDValue &SLC) const {
- return SelectFlatOffset<true>(N, Addr, VAddr, Offset, SLC);
+// Match (32-bit SGPR base) + sext(imm offset)
+bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *N,
+ SDValue Addr,
+ SDValue &SAddr,
+ SDValue &Offset) const {
+ if (Addr->isDivergent())
+ return false;
+
+ SAddr = Addr;
+ int64_t COffsetVal = 0;
+
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ COffsetVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
+ SAddr = Addr.getOperand(0);
+ }
+
+ if (auto FI = dyn_cast<FrameIndexSDNode>(SAddr)) {
+ SAddr = CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));
+ } else if (SAddr.getOpcode() == ISD::ADD &&
+ isa<FrameIndexSDNode>(SAddr.getOperand(0))) {
+ // Materialize this into a scalar move for scalar address to avoid
+ // readfirstlane.
+ auto FI = cast<FrameIndexSDNode>(SAddr.getOperand(0));
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
+ FI->getValueType(0));
+ SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_U32, SDLoc(SAddr),
+ MVT::i32, TFI, SAddr.getOperand(1)),
+ 0);
+ }
+
+ const SIInstrInfo *TII = Subtarget->getInstrInfo();
+
+ if (!TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true)) {
+ int64_t RemainderOffset = COffsetVal;
+ int64_t ImmField = 0;
+ const unsigned NumBits = AMDGPU::getNumFlatOffsetBits(*Subtarget, true);
+ // Use signed division by a power of two to truncate towards 0.
+ int64_t D = 1LL << (NumBits - 1);
+ RemainderOffset = (COffsetVal / D) * D;
+ ImmField = COffsetVal - RemainderOffset;
+
+ assert(TII->isLegalFLATOffset(ImmField, AMDGPUAS::PRIVATE_ADDRESS, true));
+ assert(RemainderOffset + ImmField == COffsetVal);
+
+ COffsetVal = ImmField;
+
+ SDLoc DL(N);
+ SDValue AddOffset =
+ getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
+ SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_U32, DL, MVT::i32,
+ SAddr, AddOffset), 0);
+ }
+
+ Offset = CurDAG->getTargetConstant(COffsetVal, SDLoc(), MVT::i16);
+
+ return true;
}
bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
@@ -2223,11 +2370,12 @@ void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
SDValue CmpVal = Mem->getOperand(2);
+ SDValue GLC = CurDAG->getTargetConstant(1, SL, MVT::i1);
// XXX - Do we care about glue operands?
SDValue Ops[] = {
- CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
+ CmpVal, VAddr, SRsrc, SOffset, Offset, GLC, SLC, Mem->getChain()
};
CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
@@ -2241,8 +2389,9 @@ void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
SDValue CmpVal = Mem->getOperand(2);
+ SDValue GLC = CurDAG->getTargetConstant(1, SL, MVT::i1);
SDValue Ops[] = {
- CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
+ CmpVal, SRsrc, SOffset, Offset, GLC, SLC, Mem->getChain()
};
CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
@@ -2284,7 +2433,7 @@ void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
SDValue PtrOffset = Ptr.getOperand(1);
const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
- if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue(), 16)) {
+ if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue())) {
N = glueCopyToM0(N, PtrBase);
Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
}
@@ -2379,15 +2528,11 @@ void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
SDValue Chain = N->getOperand(0);
SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
- // TODO: Can this just be removed from the instruction?
- SDValue GDS = CurDAG->getTargetConstant(1, SL, MVT::i1);
-
const unsigned Opc = gwsIntrinToOpcode(IntrID);
SmallVector<SDValue, 5> Ops;
if (HasVSrc)
Ops.push_back(N->getOperand(2));
Ops.push_back(OffsetField);
- Ops.push_back(GDS);
Ops.push_back(Chain);
SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
@@ -2511,7 +2656,8 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
}
bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
- unsigned &Mods) const {
+ unsigned &Mods,
+ bool AllowAbs) const {
Mods = 0;
Src = In;
@@ -2520,7 +2666,7 @@ bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
Src = Src.getOperand(0);
}
- if (Src.getOpcode() == ISD::FABS) {
+ if (AllowAbs && Src.getOpcode() == ISD::FABS) {
Mods |= SISrcMods::ABS;
Src = Src.getOperand(0);
}
@@ -2539,6 +2685,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
return false;
}
+bool AMDGPUDAGToDAGISel::SelectVOP3BMods(SDValue In, SDValue &Src,
+ SDValue &SrcMods) const {
+ unsigned Mods;
+ if (SelectVOP3ModsImpl(In, Src, Mods, /* AllowAbs */ false)) {
+ SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
+ return true;
+ }
+
+ return false;
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
SelectVOP3Mods(In, Src, SrcMods);
@@ -2563,6 +2720,16 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
return SelectVOP3Mods(In, Src, SrcMods);
}
+bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(SDValue In, SDValue &Src,
+ SDValue &SrcMods, SDValue &Clamp,
+ SDValue &Omod) const {
+ SDLoc DL(In);
+ Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
+ Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
+
+ return SelectVOP3BMods(In, Src, SrcMods);
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
SDValue &Clamp, SDValue &Omod) const {
Src = In;