aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/SIInstructions.td
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIInstructions.td')
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td130
1 files changed, 102 insertions, 28 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 636337ede000..7be63ae6964b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1011,7 +1011,7 @@ def : GCNPat <
}
def : GCNPat <
- (i32 (ctpop i32:$popcnt)),
+ (i32 (DivergentUnaryFrag<ctpop> i32:$popcnt)),
(V_BCNT_U32_B32_e64 VSrc_b32:$popcnt, (i32 0))
>;
@@ -1020,6 +1020,14 @@ def : GCNPat <
(V_BCNT_U32_B32_e64 $popcnt, $val)
>;
+def : GCNPat <
+ (i64 (DivergentUnaryFrag<ctpop> i64:$src)),
+ (REG_SEQUENCE VReg_64,
+ (V_BCNT_U32_B32_e64 (i32 (EXTRACT_SUBREG i64:$src, sub1)),
+ (i32 (V_BCNT_U32_B32_e64 (i32 (EXTRACT_SUBREG i64:$src, sub0)), (i32 0)))), sub0,
+ (i32 (V_MOV_B32_e32 (i32 0))), sub1)
+>;
+
/********** ============================================ **********/
/********** Extraction, Insertion, Building and Casting **********/
/********** ============================================ **********/
@@ -1184,6 +1192,26 @@ def : Pat <
(v2f16 (EXTRACT_SUBREG v4f16:$vec, sub1))
>;
+def : Pat <
+ (extract_subvector v8i16:$vec, (i32 0)),
+ (v4i16 (EXTRACT_SUBREG v8i16:$vec, sub0_sub1))
+>;
+
+def : Pat <
+ (extract_subvector v8i16:$vec, (i32 4)),
+ (v4i16 (EXTRACT_SUBREG v8i16:$vec, sub2_sub3))
+>;
+
+def : Pat <
+ (extract_subvector v8f16:$vec, (i32 0)),
+ (v4f16 (EXTRACT_SUBREG v8f16:$vec, sub0_sub1))
+>;
+
+def : Pat <
+ (extract_subvector v8f16:$vec, (i32 4)),
+ (v4f16 (EXTRACT_SUBREG v8f16:$vec, sub2_sub3))
+>;
+
foreach Index = 0-31 in {
def Extract_Element_v32i32_#Index : Extract_Element <
i32, v32i32, Index, !cast<SubRegIndex>(sub#Index)
@@ -1279,6 +1307,26 @@ def : BitConvert <v2i64, v2f64, VReg_128>;
def : BitConvert <v2f64, v2i64, VReg_128>;
def : BitConvert <v4f32, v2i64, VReg_128>;
def : BitConvert <v2i64, v4f32, VReg_128>;
+def : BitConvert <v8i16, v4i32, SReg_128>;
+def : BitConvert <v4i32, v8i16, SReg_128>;
+def : BitConvert <v8f16, v4f32, VReg_128>;
+def : BitConvert <v8f16, v4i32, VReg_128>;
+def : BitConvert <v4f32, v8f16, VReg_128>;
+def : BitConvert <v4i32, v8f16, VReg_128>;
+def : BitConvert <v8i16, v8f16, VReg_128>;
+def : BitConvert <v8f16, v8i16, VReg_128>;
+def : BitConvert <v4f32, v8i16, VReg_128>;
+def : BitConvert <v8i16, v4f32, VReg_128>;
+def : BitConvert <v8i16, v8f16, SReg_128>;
+def : BitConvert <v8i16, v2i64, SReg_128>;
+def : BitConvert <v8i16, v2f64, SReg_128>;
+def : BitConvert <v8f16, v2i64, SReg_128>;
+def : BitConvert <v8f16, v2f64, SReg_128>;
+def : BitConvert <v8f16, v8i16, SReg_128>;
+def : BitConvert <v2i64, v8i16, SReg_128>;
+def : BitConvert <v2f64, v8i16, SReg_128>;
+def : BitConvert <v2i64, v8f16, SReg_128>;
+def : BitConvert <v2f64, v8f16, SReg_128>;
// 160-bit bitcast
def : BitConvert <v5i32, v5f32, SReg_160>;
@@ -1762,44 +1810,44 @@ def BFIImm32 : PatFrag<
// (y & x) | (z & ~x)
def : AMDGPUPat <
(DivergentBinFrag<or> (and i32:$y, i32:$x), (and i32:$z, (not i32:$x))),
- (V_BFI_B32_e64 $x, $y, $z)
+ (V_BFI_B32_e64 VSrc_b32:$x, VSrc_b32:$y, VSrc_b32:$z)
>;
// (y & C) | (z & ~C)
def : AMDGPUPat <
(BFIImm32 i32:$x, i32:$y, i32:$z),
- (V_BFI_B32_e64 $x, $y, $z)
+ (V_BFI_B32_e64 VSrc_b32:$x, VSrc_b32:$y, VSrc_b32:$z)
>;
// 64-bit version
def : AMDGPUPat <
(DivergentBinFrag<or> (and i64:$y, i64:$x), (and i64:$z, (not i64:$x))),
- (REG_SEQUENCE SReg_64,
- (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG SReg_64:$x, sub0)),
- (i32 (EXTRACT_SUBREG SReg_64:$y, sub0)),
- (i32 (EXTRACT_SUBREG SReg_64:$z, sub0))), sub0,
- (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG SReg_64:$x, sub1)),
- (i32 (EXTRACT_SUBREG SReg_64:$y, sub1)),
- (i32 (EXTRACT_SUBREG SReg_64:$z, sub1))), sub1)
+ (REG_SEQUENCE VReg_64,
+ (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub0)),
+ (i32 (EXTRACT_SUBREG VReg_64:$y, sub0)),
+ (i32 (EXTRACT_SUBREG VReg_64:$z, sub0))), sub0,
+ (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub1)),
+ (i32 (EXTRACT_SUBREG VReg_64:$y, sub1)),
+ (i32 (EXTRACT_SUBREG VReg_64:$z, sub1))), sub1)
>;
// SHA-256 Ch function
// z ^ (x & (y ^ z))
def : AMDGPUPat <
(DivergentBinFrag<xor> i32:$z, (and i32:$x, (xor i32:$y, i32:$z))),
- (V_BFI_B32_e64 $x, $y, $z)
+ (V_BFI_B32_e64 VSrc_b32:$x, VSrc_b32:$y, VSrc_b32:$z)
>;
// 64-bit version
def : AMDGPUPat <
(DivergentBinFrag<xor> i64:$z, (and i64:$x, (xor i64:$y, i64:$z))),
- (REG_SEQUENCE SReg_64,
- (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG SReg_64:$x, sub0)),
- (i32 (EXTRACT_SUBREG SReg_64:$y, sub0)),
- (i32 (EXTRACT_SUBREG SReg_64:$z, sub0))), sub0,
- (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG SReg_64:$x, sub1)),
- (i32 (EXTRACT_SUBREG SReg_64:$y, sub1)),
- (i32 (EXTRACT_SUBREG SReg_64:$z, sub1))), sub1)
+ (REG_SEQUENCE VReg_64,
+ (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub0)),
+ (i32 (EXTRACT_SUBREG VReg_64:$y, sub0)),
+ (i32 (EXTRACT_SUBREG VReg_64:$z, sub0))), sub0,
+ (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub1)),
+ (i32 (EXTRACT_SUBREG VReg_64:$y, sub1)),
+ (i32 (EXTRACT_SUBREG VReg_64:$z, sub1))), sub1)
>;
def : AMDGPUPat <
@@ -2725,21 +2773,21 @@ def : AMDGPUPat <
def : AMDGPUPat <
(DivergentBinFrag<or> (and i32:$x, i32:$z),
(and i32:$y, (or i32:$x, i32:$z))),
- (V_BFI_B32_e64 (V_XOR_B32_e64 i32:$x, i32:$y), i32:$z, i32:$y)
+ (V_BFI_B32_e64 (V_XOR_B32_e64 VSrc_b32:$x, VSrc_b32:$y), VSrc_b32:$z, VSrc_b32:$y)
>;
def : AMDGPUPat <
(DivergentBinFrag<or> (and i64:$x, i64:$z),
(and i64:$y, (or i64:$x, i64:$z))),
- (REG_SEQUENCE SReg_64,
- (V_BFI_B32_e64 (V_XOR_B32_e64 (i32 (EXTRACT_SUBREG SReg_64:$x, sub0)),
- (i32 (EXTRACT_SUBREG SReg_64:$y, sub0))),
- (i32 (EXTRACT_SUBREG SReg_64:$z, sub0)),
- (i32 (EXTRACT_SUBREG SReg_64:$y, sub0))), sub0,
- (V_BFI_B32_e64 (V_XOR_B32_e64 (i32 (EXTRACT_SUBREG SReg_64:$x, sub1)),
- (i32 (EXTRACT_SUBREG SReg_64:$y, sub1))),
- (i32 (EXTRACT_SUBREG SReg_64:$z, sub1)),
- (i32 (EXTRACT_SUBREG SReg_64:$y, sub1))), sub1)
+ (REG_SEQUENCE VReg_64,
+ (V_BFI_B32_e64 (V_XOR_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub0)),
+ (i32 (EXTRACT_SUBREG VReg_64:$y, sub0))),
+ (i32 (EXTRACT_SUBREG VReg_64:$z, sub0)),
+ (i32 (EXTRACT_SUBREG VReg_64:$y, sub0))), sub0,
+ (V_BFI_B32_e64 (V_XOR_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub1)),
+ (i32 (EXTRACT_SUBREG VReg_64:$y, sub1))),
+ (i32 (EXTRACT_SUBREG VReg_64:$z, sub1)),
+ (i32 (EXTRACT_SUBREG VReg_64:$y, sub1))), sub1)
>;
multiclass IntMed3Pat<Instruction med3Inst,
@@ -2825,6 +2873,15 @@ class AMDGPUGenericInstruction : GenericInstruction {
let Namespace = "AMDGPU";
}
+// Convert a wave address to a swizzled vector address (i.e. this is
+// for copying the stack pointer to a vector address appropriate to
+// use in the offset field of mubuf instructions).
+def G_AMDGPU_WAVE_ADDRESS : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src);
+ let hasSideEffects = 0;
+}
+
// Returns -1 if the input is zero.
def G_AMDGPU_FFBH_U32 : AMDGPUGenericInstruction {
let OutOperandList = (outs type0:$dst);
@@ -3027,6 +3084,16 @@ def G_AMDGPU_INTRIN_IMAGE_LOAD : AMDGPUGenericInstruction {
let mayStore = 1;
}
+def G_AMDGPU_INTRIN_IMAGE_LOAD_D16 : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins unknown:$intrin, variable_ops);
+ let hasSideEffects = 0;
+ let mayLoad = 1;
+
+ // FIXME: Use separate opcode for atomics.
+ let mayStore = 1;
+}
+
// This is equivalent to the G_INTRINSIC*, but the operands may have
// been legalized depending on the subtarget requirements.
def G_AMDGPU_INTRIN_IMAGE_STORE : AMDGPUGenericInstruction {
@@ -3036,6 +3103,13 @@ def G_AMDGPU_INTRIN_IMAGE_STORE : AMDGPUGenericInstruction {
let mayStore = 1;
}
+def G_AMDGPU_INTRIN_IMAGE_STORE_D16 : AMDGPUGenericInstruction {
+ let OutOperandList = (outs);
+ let InOperandList = (ins unknown:$intrin, variable_ops);
+ let hasSideEffects = 0;
+ let mayStore = 1;
+}
+
def G_AMDGPU_INTRIN_BVH_INTERSECT_RAY : AMDGPUGenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins unknown:$intrin, variable_ops);