aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/AMDGPUISelLowering.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/AMDGPUISelLowering.h')
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.h50
1 files changed, 37 insertions, 13 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 5c31bddd9b1a..a4c3b413e103 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
//
/// \file
-/// \brief Interface definition of the TargetLowering class that is common
+/// Interface definition of the TargetLowering class that is common
/// to all AMD GPUs.
//
//===----------------------------------------------------------------------===//
@@ -28,6 +28,8 @@ struct ArgDescriptor;
class AMDGPUTargetLowering : public TargetLowering {
private:
+ const AMDGPUSubtarget *Subtarget;
+
/// \returns AMDGPUISD::FFBH_U32 node if the incoming \p Op may have been
/// legalized from a smaller type VT. Need to match pre-legalized type because
/// the generic legalization inserts the add/sub between the select and
@@ -39,12 +41,11 @@ public:
static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG);
protected:
- const AMDGPUSubtarget *Subtarget;
AMDGPUAS AMDGPUASI;
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
- /// \brief Split a vector store into multiple scalar stores.
+ /// Split a vector store into multiple scalar stores.
/// \returns The resulting chain.
SDValue LowerFREM(SDValue Op, SelectionDAG &DAG) const;
@@ -78,7 +79,6 @@ protected:
bool shouldCombineMemoryType(EVT VT) const;
SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const;
- SDValue performClampCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performAssertSZExtCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue splitBinaryBitConstantOpImpl(DAGCombinerInfo &DCI, const SDLoc &SL,
@@ -87,6 +87,7 @@ protected:
SDValue performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performSraCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performSrlCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performTruncateCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performMulhsCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performMulhuCombine(SDNode *N, DAGCombinerInfo &DCI) const;
@@ -96,6 +97,7 @@ protected:
SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performFNegCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performFAbsCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const;
static EVT getEquivalentMemType(LLVMContext &Context, EVT VT);
@@ -108,10 +110,10 @@ protected:
SDValue getLoHalf64(SDValue Op, SelectionDAG &DAG) const;
SDValue getHiHalf64(SDValue Op, SelectionDAG &DAG) const;
- /// \brief Split a vector load into 2 loads of half the vector.
+ /// Split a vector load into 2 loads of half the vector.
SDValue SplitVectorLoad(SDValue Op, SelectionDAG &DAG) const;
- /// \brief Split a vector store into 2 stores of half the vector.
+ /// Split a vector store into 2 stores of half the vector.
SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
@@ -120,8 +122,11 @@ protected:
SDValue LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const;
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &Results) const;
- void analyzeFormalArgumentsCompute(CCState &State,
- const SmallVectorImpl<ISD::InputArg> &Ins) const;
+
+ void analyzeFormalArgumentsCompute(
+ CCState &State,
+ const SmallVectorImpl<ISD::InputArg> &Ins) const;
+
public:
AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI);
@@ -136,6 +141,10 @@ public:
return false;
}
+ static inline SDValue stripBitcast(SDValue Val) {
+ return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
+ }
+
static bool allUsesHaveSourceMods(const SDNode *N,
unsigned CostThreshold = 4);
bool isFAbsFree(EVT VT) const override;
@@ -146,7 +155,6 @@ public:
bool isZExtFree(Type *Src, Type *Dest) const override;
bool isZExtFree(EVT Src, EVT Dest) const override;
bool isZExtFree(SDValue Val, EVT VT2) const override;
- bool isFPExtFoldable(unsigned Opcode, EVT DestVT, EVT SrcVT) const override;
bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
@@ -168,6 +176,7 @@ public:
bool isCheapToSpeculateCttz() const override;
bool isCheapToSpeculateCtlz() const override;
+ bool isSDNodeAlwaysUniform(const SDNode *N) const override;
static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg);
static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg);
@@ -224,7 +233,7 @@ public:
virtual SDNode *PostISelFolding(MachineSDNode *N,
SelectionDAG &DAG) const = 0;
- /// \brief Determine which of the bits specified in \p Mask are known to be
+ /// Determine which of the bits specified in \p Mask are known to be
/// either zero or one and return them in the \p KnownZero and \p KnownOne
/// bitsets.
void computeKnownBitsForTargetNode(const SDValue Op,
@@ -237,7 +246,7 @@ public:
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
- /// \brief Helper function that adds Reg to the LiveIn list of the DAG's
+ /// Helper function that adds Reg to the LiveIn list of the DAG's
/// MachineFunction.
///
/// \returns a RegisterSDNode representing Reg if \p RawReg is true, otherwise
@@ -285,9 +294,9 @@ public:
GRID_OFFSET,
};
- /// \brief Helper function that returns the byte offset of the given
+ /// Helper function that returns the byte offset of the given
/// type of implicit parameter.
- uint32_t getImplicitParameterOffset(const AMDGPUMachineFunction *MFI,
+ uint32_t getImplicitParameterOffset(const MachineFunction &MF,
const ImplicitParameter Param) const;
AMDGPUAS getAMDGPUAS() const {
@@ -357,6 +366,7 @@ enum NodeType : unsigned {
FMED3,
SMED3,
UMED3,
+ FDOT2,
URECIP,
DIV_SCALE,
DIV_FMAS,
@@ -372,6 +382,7 @@ enum NodeType : unsigned {
RSQ,
RCP_LEGACY,
RSQ_LEGACY,
+ RCP_IFLAG,
FMUL_LEGACY,
RSQ_CLAMP,
LDEXP,
@@ -396,6 +407,7 @@ enum NodeType : unsigned {
MAD_I64_I32,
MUL_LOHI_I24,
MUL_LOHI_U24,
+ PERM,
TEXTURE_FETCH,
EXPORT, // exp on SI+
EXPORT_DONE, // exp on SI+ with done bit set
@@ -417,6 +429,10 @@ enum NodeType : unsigned {
// Convert two float 32 numbers into a single register holding two packed f16
// with round to zero.
CVT_PKRTZ_F16_F32,
+ CVT_PKNORM_I16_F32,
+ CVT_PKNORM_U16_F32,
+ CVT_PK_I16_I32,
+ CVT_PK_U16_U32,
// Same as the standard node, except the high bits of the resulting integer
// are known 0.
@@ -451,14 +467,21 @@ enum NodeType : unsigned {
LOAD_CONSTANT,
TBUFFER_STORE_FORMAT,
TBUFFER_STORE_FORMAT_X3,
+ TBUFFER_STORE_FORMAT_D16,
TBUFFER_LOAD_FORMAT,
+ TBUFFER_LOAD_FORMAT_D16,
ATOMIC_CMP_SWAP,
ATOMIC_INC,
ATOMIC_DEC,
+ ATOMIC_LOAD_FADD,
+ ATOMIC_LOAD_FMIN,
+ ATOMIC_LOAD_FMAX,
BUFFER_LOAD,
BUFFER_LOAD_FORMAT,
+ BUFFER_LOAD_FORMAT_D16,
BUFFER_STORE,
BUFFER_STORE_FORMAT,
+ BUFFER_STORE_FORMAT_D16,
BUFFER_ATOMIC_SWAP,
BUFFER_ATOMIC_ADD,
BUFFER_ATOMIC_SUB,
@@ -470,6 +493,7 @@ enum NodeType : unsigned {
BUFFER_ATOMIC_OR,
BUFFER_ATOMIC_XOR,
BUFFER_ATOMIC_CMPSWAP,
+
LAST_AMDGPU_ISD_NUMBER
};