aboutsummaryrefslogtreecommitdiff
path: root/test/CodeGen/AMDGPU
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-04-26 19:45:00 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-04-26 19:45:00 +0000
commit12f3ca4cdb95b193af905a00e722a4dcb40b3de3 (patch)
treeae1a7fcfc24a8d4b23206c57121c3f361d4b7f84 /test/CodeGen/AMDGPU
parentd99dafe2e4a385dd2a6c76da6d8258deb100657b (diff)
downloadsrc-12f3ca4cdb95b193af905a00e722a4dcb40b3de3.tar.gz
src-12f3ca4cdb95b193af905a00e722a4dcb40b3de3.zip
Vendor import of llvm trunk r301441:vendor/llvm/llvm-trunk-r301441
Notes
Notes: svn path=/vendor/llvm/dist/; revision=317461 svn path=/vendor/llvm/llvm-trunk-r301441/; revision=317462; tag=vendor/llvm/llvm-trunk-r301441
Diffstat (limited to 'test/CodeGen/AMDGPU')
-rw-r--r--test/CodeGen/AMDGPU/add.v2i16.ll2
-rw-r--r--test/CodeGen/AMDGPU/addrspacecast.ll9
-rw-r--r--test/CodeGen/AMDGPU/ashr.v2i16.ll2
-rw-r--r--test/CodeGen/AMDGPU/code-object-metadata-images.ll80
-rw-r--r--test/CodeGen/AMDGPU/fcanonicalize.f16.ll2
-rw-r--r--test/CodeGen/AMDGPU/fdiv.ll18
-rw-r--r--test/CodeGen/AMDGPU/fence-amdgiz.ll15
-rw-r--r--test/CodeGen/AMDGPU/fmuladd.v2f16.ll18
-rw-r--r--test/CodeGen/AMDGPU/fneg-fabs.f16.ll2
-rw-r--r--test/CodeGen/AMDGPU/immv216.ll2
-rw-r--r--test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll2
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll17
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.unreachable.ll9
-rw-r--r--test/CodeGen/AMDGPU/loop_break.ll2
-rw-r--r--test/CodeGen/AMDGPU/lshr.v2i16.ll2
-rw-r--r--test/CodeGen/AMDGPU/merge-m0.mir132
-rw-r--r--test/CodeGen/AMDGPU/mubuf-offset-private.ll136
-rw-r--r--test/CodeGen/AMDGPU/multi-divergent-exit-region.ll180
-rw-r--r--test/CodeGen/AMDGPU/nested-loop-conditions.ll23
-rw-r--r--test/CodeGen/AMDGPU/private-access-no-objects.ll16
-rw-r--r--test/CodeGen/AMDGPU/readcyclecounter.ll14
-rw-r--r--test/CodeGen/AMDGPU/ret_jump.ll2
-rw-r--r--test/CodeGen/AMDGPU/sext-in-reg.ll2
-rw-r--r--test/CodeGen/AMDGPU/shl.v2i16.ll2
-rw-r--r--test/CodeGen/AMDGPU/sminmax.v2i16.ll2
-rw-r--r--test/CodeGen/AMDGPU/spill-m0.ll22
-rw-r--r--test/CodeGen/AMDGPU/sub.v2i16.ll2
-rw-r--r--test/CodeGen/AMDGPU/trap.ll21
28 files changed, 586 insertions, 150 deletions
diff --git a/test/CodeGen/AMDGPU/add.v2i16.ll b/test/CodeGen/AMDGPU/add.v2i16.ll
index e137ef4bc236..73e80d523f1e 100644
--- a/test/CodeGen/AMDGPU/add.v2i16.ll
+++ b/test/CodeGen/AMDGPU/add.v2i16.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; FIXME: Need to handle non-uniform case for function below (load without gep).
diff --git a/test/CodeGen/AMDGPU/addrspacecast.ll b/test/CodeGen/AMDGPU/addrspacecast.ll
index 6ec93c72ec52..b1e71722d80c 100644
--- a/test/CodeGen/AMDGPU/addrspacecast.ll
+++ b/test/CodeGen/AMDGPU/addrspacecast.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=HSA -check-prefix=CI %s
-; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=HSA -check-prefix=GFX9 %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=CI %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=GFX9 %s
; HSA-LABEL: {{^}}use_group_to_flat_addrspacecast:
; HSA: enable_sgpr_private_segment_buffer = 1
@@ -223,9 +223,8 @@ define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 {
}
; HSA-LABEL: {{^}}cast_0_flat_to_private_addrspacecast:
-; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], 0{{$}}
-; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
-; HSA: buffer_store_dword [[K]], [[PTR]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen
+; HSA: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
+; HSA: buffer_store_dword [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
define amdgpu_kernel void @cast_0_flat_to_private_addrspacecast() #0 {
%cast = addrspacecast i32 addrspace(4)* null to i32 addrspace(0)*
store volatile i32 7, i32* %cast
diff --git a/test/CodeGen/AMDGPU/ashr.v2i16.ll b/test/CodeGen/AMDGPU/ashr.v2i16.ll
index 96a5e3b23758..7f424ef2a147 100644
--- a/test/CodeGen/AMDGPU/ashr.v2i16.ll
+++ b/test/CodeGen/AMDGPU/ashr.v2i16.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=CIVI %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=CIVI %s
diff --git a/test/CodeGen/AMDGPU/code-object-metadata-images.ll b/test/CodeGen/AMDGPU/code-object-metadata-images.ll
new file mode 100644
index 000000000000..918560469852
--- /dev/null
+++ b/test/CodeGen/AMDGPU/code-object-metadata-images.ll
@@ -0,0 +1,80 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX800 --check-prefix=NOTES %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
+
+%opencl.image1d_t = type opaque
+%opencl.image1d_array_t = type opaque
+%opencl.image1d_buffer_t = type opaque
+%opencl.image2d_t = type opaque
+%opencl.image2d_array_t = type opaque
+%opencl.image2d_array_depth_t = type opaque
+%opencl.image2d_array_msaa_t = type opaque
+%opencl.image2d_array_msaa_depth_t = type opaque
+%opencl.image2d_depth_t = type opaque
+%opencl.image2d_msaa_t = type opaque
+%opencl.image2d_msaa_depth_t = type opaque
+%opencl.image3d_t = type opaque
+
+; CHECK: ---
+; CHECK: Version: [ 1, 0 ]
+
+; CHECK: Kernels:
+; CHECK: - Name: test
+; CHECK: Args:
+; CHECK: - Size: 8
+; CHECK: ValueKind: Image
+; CHECK: TypeName: image1d_t
+; CHECK: - Size: 8
+; CHECK: ValueKind: Image
+; CHECK: TypeName: image1d_array_t
+; CHECK: - Size: 8
+; CHECK: ValueKind: Image
+; CHECK: TypeName: image1d_buffer_t
+; CHECK: - Size: 8
+; CHECK: ValueKind: Image
+; CHECK: TypeName: image2d_t
+; CHECK: - Size: 8
+; CHECK: ValueKind: Image
+; CHECK: TypeName: image2d_array_t
+; CHECK: - Size: 8
+; CHECK: ValueKind: Image
+; CHECK: TypeName: image2d_array_depth_t
+; CHECK: - Size: 8
+; CHECK: ValueKind: Image
+; CHECK: TypeName: image2d_array_msaa_t
+; CHECK: - Size: 8
+; CHECK: ValueKind: Image
+; CHECK: TypeName: image2d_array_msaa_depth_t
+; CHECK: - Size: 8
+; CHECK: ValueKind: Image
+; CHECK: TypeName: image2d_depth_t
+; CHECK: - Size: 8
+; CHECK: ValueKind: Image
+; CHECK: TypeName: image2d_msaa_t
+; CHECK: - Size: 8
+; CHECK: ValueKind: Image
+; CHECK: TypeName: image2d_msaa_depth_t
+; CHECK: - Size: 8
+; CHECK: ValueKind: Image
+; CHECK: TypeName: image3d_t
+define amdgpu_kernel void @test(%opencl.image1d_t addrspace(1)* %a,
+ %opencl.image1d_array_t addrspace(1)* %b,
+ %opencl.image1d_buffer_t addrspace(1)* %c,
+ %opencl.image2d_t addrspace(1)* %d,
+ %opencl.image2d_array_t addrspace(1)* %e,
+ %opencl.image2d_array_depth_t addrspace(1)* %f,
+ %opencl.image2d_array_msaa_t addrspace(1)* %g,
+ %opencl.image2d_array_msaa_depth_t addrspace(1)* %h,
+ %opencl.image2d_depth_t addrspace(1)* %i,
+ %opencl.image2d_msaa_t addrspace(1)* %j,
+ %opencl.image2d_msaa_depth_t addrspace(1)* %k,
+ %opencl.image3d_t addrspace(1)* %l)
+ !kernel_arg_type !1 !kernel_arg_base_type !1 {
+ ret void
+}
+
+!1 = !{!"image1d_t", !"image1d_array_t", !"image1d_buffer_t",
+ !"image2d_t", !"image2d_array_t", !"image2d_array_depth_t",
+ !"image2d_array_msaa_t", !"image2d_array_msaa_depth_t",
+ !"image2d_depth_t", !"image2d_msaa_t", !"image2d_msaa_depth_t",
+ !"image3d_t"}
diff --git a/test/CodeGen/AMDGPU/fcanonicalize.f16.ll b/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
index f2686a5582dc..c9787bb478ef 100644
--- a/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
+++ b/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
@@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
-; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
declare half @llvm.fabs.f16(half) #0
declare half @llvm.canonicalize.f16(half) #0
diff --git a/test/CodeGen/AMDGPU/fdiv.ll b/test/CodeGen/AMDGPU/fdiv.ll
index b3a2b6643720..738a5adba14f 100644
--- a/test/CodeGen/AMDGPU/fdiv.ll
+++ b/test/CodeGen/AMDGPU/fdiv.ll
@@ -85,10 +85,20 @@ entry:
}
; FUNC-LABEL: {{^}}fdiv_fast_denormals_f32:
-; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}}
-; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]]
-; GCN-NOT: [[RESULT]]
-; GCN: buffer_store_dword [[RESULT]]
+; GCN: v_div_scale_f32 [[NUM_SCALE:v[0-9]+]]
+; GCN-DAG: v_div_scale_f32 [[DEN_SCALE:v[0-9]+]]
+; GCN-DAG: v_rcp_f32_e32 [[NUM_RCP:v[0-9]+]], [[NUM_SCALE]]
+
+; GCN-NOT: s_setreg
+; GCN: v_fma_f32 [[A:v[0-9]+]], -[[NUM_SCALE]], [[NUM_RCP]], 1.0
+; GCN: v_fma_f32 [[B:v[0-9]+]], [[A]], [[NUM_RCP]], [[NUM_RCP]]
+; GCN: v_mul_f32_e32 [[C:v[0-9]+]], [[B]], [[DEN_SCALE]]
+; GCN: v_fma_f32 [[D:v[0-9]+]], -[[NUM_SCALE]], [[C]], [[DEN_SCALE]]
+; GCN: v_fma_f32 [[E:v[0-9]+]], [[D]], [[B]], [[C]]
+; GCN: v_fma_f32 [[F:v[0-9]+]], -[[NUM_SCALE]], [[E]], [[DEN_SCALE]]
+; GCN-NOT: s_setreg
+; GCN: v_div_fmas_f32 [[FMAS:v[0-9]+]], [[F]], [[B]], [[E]]
+; GCN: v_div_fixup_f32 v{{[0-9]+}}, [[FMAS]],
define amdgpu_kernel void @fdiv_fast_denormals_f32(float addrspace(1)* %out, float %a, float %b) #2 {
entry:
%fdiv = fdiv fast float %a, %b
diff --git a/test/CodeGen/AMDGPU/fence-amdgiz.ll b/test/CodeGen/AMDGPU/fence-amdgiz.ll
new file mode 100644
index 000000000000..df675c9a8692
--- /dev/null
+++ b/test/CodeGen/AMDGPU/fence-amdgiz.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
+target triple = "amdgcn-amd-amdhsa-amdgizcl"
+
+; CHECK_LABEL: atomic_fence
+; CHECK: BB#0:
+; CHECK: ATOMIC_FENCE 4, 1
+; CHECK: s_endpgm
+
+define amdgpu_kernel void @atomic_fence() {
+ fence acquire
+ ret void
+}
+
diff --git a/test/CodeGen/AMDGPU/fmuladd.v2f16.ll b/test/CodeGen/AMDGPU/fmuladd.v2f16.ll
index bdd3c04fd318..624610096cbc 100644
--- a/test/CodeGen/AMDGPU/fmuladd.v2f16.ll
+++ b/test/CodeGen/AMDGPU/fmuladd.v2f16.ll
@@ -1,12 +1,12 @@
-; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s
-; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s
-; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s
-; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s
-
-; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s
-; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s
-; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s
-; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s
+
+; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s
declare i32 @llvm.amdgcn.workitem.id.x() #1
declare <2 x half> @llvm.fmuladd.v2f16(<2 x half>, <2 x half>, <2 x half>) #1
diff --git a/test/CodeGen/AMDGPU/fneg-fabs.f16.ll b/test/CodeGen/AMDGPU/fneg-fabs.f16.ll
index 555764c15519..506b2a02f828 100644
--- a/test/CodeGen/AMDGPU/fneg-fabs.f16.ll
+++ b/test/CodeGen/AMDGPU/fneg-fabs.f16.ll
@@ -1,6 +1,6 @@
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=CIVI %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GFX89 -check-prefix=GCN -check-prefix=CIVI %s
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX89 -check-prefix=GFX9 -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GFX89 -check-prefix=GFX9 -check-prefix=GCN %s
; GCN-LABEL: {{^}}fneg_fabs_fadd_f16:
; CI: v_cvt_f32_f16_e32
diff --git a/test/CodeGen/AMDGPU/immv216.ll b/test/CodeGen/AMDGPU/immv216.ll
index 85ad365d02a8..c15a30e3c540 100644
--- a/test/CodeGen/AMDGPU/immv216.ll
+++ b/test/CodeGen/AMDGPU/immv216.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
; FIXME: Merge into imm.ll
diff --git a/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll b/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
index a3f82b8a0117..89adcff1a278 100644
--- a/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
+++ b/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
@@ -216,7 +216,7 @@ define amdgpu_kernel void @v_insertelement_v2i16_0(<2 x i16> addrspace(1)* %out,
; CIVI-DAG: v_and_b32_e32 [[ELT1:v[0-9]+]], 0xffff0000, [[VEC]]
; CIVI: v_or_b32_e32 [[RES:v[0-9]+]], [[ELT0_SHIFT]], [[ELT1]]
-; GFX9-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff{{$}}
+; GFX9-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0000{{$}}
; GFX9-DAG: v_lshrrev_b32_e64 [[ELT0_SHIFT:v[0-9]+]], 16, [[ELT0]]
; GFX9: v_and_or_b32 [[RES:v[0-9]+]], [[VEC]], [[MASK]], [[ELT0_SHIFT]]
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll
index 5e892fad3741..cbd8f0a9c23a 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll
@@ -19,6 +19,20 @@ define amdgpu_kernel void @test_readlane_imm_sreg(i32 addrspace(1)* %out, i32 %s
ret void
}
+; CHECK-LABEL: {{^}}test_readlane_vregs:
+; CHECK: v_readfirstlane_b32 [[LANE:s[0-9]+]], v{{[0-9]+}}
+; CHECK: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, [[LANE]]
+define amdgpu_kernel void @test_readlane_vregs(i32 addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep.in = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 %tid
+ %args = load <2 x i32>, <2 x i32> addrspace(1)* %gep.in
+ %value = extractelement <2 x i32> %args, i32 0
+ %lane = extractelement <2 x i32> %args, i32 1
+ %readlane = call i32 @llvm.amdgcn.readlane(i32 %value, i32 %lane)
+ store i32 %readlane, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
; TODO: m0 should be folded.
; CHECK-LABEL: {{^}}test_readlane_m0_sreg:
; CHECK: s_mov_b32 m0, -1
@@ -40,5 +54,8 @@ define amdgpu_kernel void @test_readlane_imm(i32 addrspace(1)* %out, i32 %src0)
ret void
}
+declare i32 @llvm.amdgcn.workitem.id.x() #2
+
attributes #0 = { nounwind readnone convergent }
attributes #1 = { nounwind }
+attributes #2 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.unreachable.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.unreachable.ll
new file mode 100644
index 000000000000..bafafa33016f
--- /dev/null
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.unreachable.ll
@@ -0,0 +1,9 @@
+; RUN: llc -march amdgcn %s -filetype=obj -o /dev/null
+; RUN: llc -march amdgcn <%s | FileCheck %s
+define amdgpu_kernel void @f() {
+ ; CHECK: ; divergent unreachable
+ call void @llvm.amdgcn.unreachable()
+ ret void
+}
+
+declare void @llvm.amdgcn.unreachable()
diff --git a/test/CodeGen/AMDGPU/loop_break.ll b/test/CodeGen/AMDGPU/loop_break.ll
index 84c42e8bd1e0..b9df2cb779ad 100644
--- a/test/CodeGen/AMDGPU/loop_break.ll
+++ b/test/CodeGen/AMDGPU/loop_break.ll
@@ -10,7 +10,7 @@
; OPT: bb4:
; OPT: load volatile
-; OPT: %cmp1 = icmp sge i32 %tmp, %load
+; OPT: xor i1 %cmp1
; OPT: call i64 @llvm.amdgcn.if.break(
; OPT: br label %Flow
diff --git a/test/CodeGen/AMDGPU/lshr.v2i16.ll b/test/CodeGen/AMDGPU/lshr.v2i16.ll
index e21d0d09bb41..6a90a7a9f2eb 100644
--- a/test/CodeGen/AMDGPU/lshr.v2i16.ll
+++ b/test/CodeGen/AMDGPU/lshr.v2i16.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=CIVI %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=CIVI %s
diff --git a/test/CodeGen/AMDGPU/merge-m0.mir b/test/CodeGen/AMDGPU/merge-m0.mir
new file mode 100644
index 000000000000..064db49924e1
--- /dev/null
+++ b/test/CodeGen/AMDGPU/merge-m0.mir
@@ -0,0 +1,132 @@
+# RUN: llc -march=amdgcn -amdgpu-enable-merge-m0 -verify-machineinstrs -run-pass si-fix-sgpr-copies %s -o - | FileCheck -check-prefix=GCN %s
+
+# GCN: bb.0.entry:
+# GCN: SI_INIT_M0 -1
+# GCN-NEXT: DS_WRITE_B32
+# GCN-NEXT: DS_WRITE_B32
+# GCN-NEXT: SI_INIT_M0 65536
+# GCN-NEXT: DS_WRITE_B32
+# GCN-NEXT: DS_WRITE_B32
+# GCN-NEXT: SI_INIT_M0 -1
+# GCN-NEXT: DS_WRITE_B32
+# GCN-NEXT: SI_INIT_M0 65536
+# GCN-NEXT: DS_WRITE_B32
+
+# GCN: bb.1:
+# GCN: SI_INIT_M0 -1
+# GCN-NEXT: DS_WRITE_B32
+# GCN-NEXT: DS_WRITE_B32
+
+# GCN: bb.2:
+# GCN: SI_INIT_M0 65536
+# GCN-NEXT: DS_WRITE_B32
+
+# GCN: bb.3:
+# GCN: SI_INIT_M0 3
+
+# GCN: bb.4:
+# GCN-NOT: SI_INIT_M0
+# GCN: DS_WRITE_B32
+# GCN-NEXT: SI_INIT_M0 4
+# GCN-NEXT: DS_WRITE_B32
+
+# GCN: bb.5:
+# GCN-NOT: SI_INIT_M0
+# GCN: DS_WRITE_B32
+# GCN-NEXT: SI_INIT_M0 4
+# GCN-NEXT: DS_WRITE_B32
+
+# GCN: bb.6:
+# GCN: SI_INIT_M0 -1,
+# GCN-NEXT: DS_WRITE_B32
+# GCN: SI_INIT_M0 %2
+# GCN-NEXT: DS_WRITE_B32
+# GCN-NEXT: SI_INIT_M0 %2
+# GCN-NEXT: DS_WRITE_B32
+# GCN-NEXT: SI_INIT_M0 -1
+# GCN-NEXT: DS_WRITE_B32
+
+---
+name: test
+alignment: 0
+exposesReturnsTwice: false
+noVRegs: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vgpr_32 }
+ - { id: 1, class: vgpr_32 }
+ - { id: 2, class: sreg_32_xm0 }
+body: |
+ bb.0.entry:
+ successors: %bb.1, %bb.2
+
+ %0 = IMPLICIT_DEF
+ %1 = IMPLICIT_DEF
+ SI_INIT_M0 -1, implicit-def %m0
+ DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec
+ SI_INIT_M0 -1, implicit-def %m0
+ DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec
+ SI_INIT_M0 65536, implicit-def %m0
+ DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec
+ SI_INIT_M0 65536, implicit-def %m0
+ DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec
+ SI_INIT_M0 -1, implicit-def %m0
+ DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec
+ SI_INIT_M0 65536, implicit-def %m0
+ DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec
+ S_CBRANCH_VCCZ %bb.1, implicit undef %vcc
+ S_BRANCH %bb.2
+
+ bb.1:
+ successors: %bb.2
+ SI_INIT_M0 -1, implicit-def %m0
+ DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec
+ SI_INIT_M0 -1, implicit-def %m0
+ DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec
+ S_BRANCH %bb.2
+
+ bb.2:
+ successors: %bb.3
+ SI_INIT_M0 65536, implicit-def %m0
+ DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec
+ S_BRANCH %bb.3
+
+ bb.3:
+ successors: %bb.4, %bb.5
+ S_CBRANCH_VCCZ %bb.4, implicit undef %vcc
+ S_BRANCH %bb.5
+
+ bb.4:
+ successors: %bb.6
+ SI_INIT_M0 3, implicit-def %m0
+ DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec
+ SI_INIT_M0 4, implicit-def %m0
+ DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec
+ S_BRANCH %bb.6
+
+ bb.5:
+ successors: %bb.6
+ SI_INIT_M0 3, implicit-def %m0
+ DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec
+ SI_INIT_M0 4, implicit-def %m0
+ DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec
+ S_BRANCH %bb.6
+
+ bb.6:
+ successors: %bb.0.entry, %bb.6
+ SI_INIT_M0 -1, implicit-def %m0
+ DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec
+ %2 = IMPLICIT_DEF
+ SI_INIT_M0 %2, implicit-def %m0
+ DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec
+ SI_INIT_M0 %2, implicit-def %m0
+ DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec
+ SI_INIT_M0 -1, implicit-def %m0
+ DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec
+ S_CBRANCH_VCCZ %bb.6, implicit undef %vcc
+ S_BRANCH %bb.0.entry
+
+...
diff --git a/test/CodeGen/AMDGPU/mubuf-offset-private.ll b/test/CodeGen/AMDGPU/mubuf-offset-private.ll
new file mode 100644
index 000000000000..3a0605fa182a
--- /dev/null
+++ b/test/CodeGen/AMDGPU/mubuf-offset-private.ll
@@ -0,0 +1,136 @@
+; RUN: llc -march=amdgcn -mattr=+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
+
+; Test addressing modes when the scratch base is not a frame index.
+
+; GCN-LABEL: {{^}}store_private_offset_i8:
+; GCN: buffer_store_byte v{{[0-9]+}}, off, s[4:7], s8 offset:8
+define amdgpu_kernel void @store_private_offset_i8() #0 {
+ store volatile i8 5, i8* inttoptr (i32 8 to i8*)
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_private_offset_i16:
+; GCN: buffer_store_short v{{[0-9]+}}, off, s[4:7], s8 offset:8
+define amdgpu_kernel void @store_private_offset_i16() #0 {
+ store volatile i16 5, i16* inttoptr (i32 8 to i16*)
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_private_offset_i32:
+; GCN: buffer_store_dword v{{[0-9]+}}, off, s[4:7], s8 offset:8
+define amdgpu_kernel void @store_private_offset_i32() #0 {
+ store volatile i32 5, i32* inttoptr (i32 8 to i32*)
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_private_offset_v2i32:
+; GCN: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s8 offset:8
+define amdgpu_kernel void @store_private_offset_v2i32() #0 {
+ store volatile <2 x i32> <i32 5, i32 10>, <2 x i32>* inttoptr (i32 8 to <2 x i32>*)
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_private_offset_v4i32:
+; GCN: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s8 offset:8
+define amdgpu_kernel void @store_private_offset_v4i32() #0 {
+ store volatile <4 x i32> <i32 5, i32 10, i32 15, i32 0>, <4 x i32>* inttoptr (i32 8 to <4 x i32>*)
+ ret void
+}
+
+; GCN-LABEL: {{^}}load_private_offset_i8:
+; GCN: buffer_load_ubyte v{{[0-9]+}}, off, s[4:7], s8 offset:8
+define amdgpu_kernel void @load_private_offset_i8() #0 {
+ %load = load volatile i8, i8* inttoptr (i32 8 to i8*)
+ ret void
+}
+
+; GCN-LABEL: {{^}}sextload_private_offset_i8:
+; GCN: buffer_load_sbyte v{{[0-9]+}}, off, s[4:7], s8 offset:8
+define amdgpu_kernel void @sextload_private_offset_i8(i32 addrspace(1)* %out) #0 {
+ %load = load volatile i8, i8* inttoptr (i32 8 to i8*)
+ %sextload = sext i8 %load to i32
+ store i32 %sextload, i32 addrspace(1)* undef
+ ret void
+}
+
+; GCN-LABEL: {{^}}zextload_private_offset_i8:
+; GCN: buffer_load_ubyte v{{[0-9]+}}, off, s[4:7], s8 offset:8
+define amdgpu_kernel void @zextload_private_offset_i8(i32 addrspace(1)* %out) #0 {
+ %load = load volatile i8, i8* inttoptr (i32 8 to i8*)
+ %zextload = zext i8 %load to i32
+ store i32 %zextload, i32 addrspace(1)* undef
+ ret void
+}
+
+; GCN-LABEL: {{^}}load_private_offset_i16:
+; GCN: buffer_load_ushort v{{[0-9]+}}, off, s[4:7], s8 offset:8
+define amdgpu_kernel void @load_private_offset_i16() #0 {
+ %load = load volatile i16, i16* inttoptr (i32 8 to i16*)
+ ret void
+}
+
+; GCN-LABEL: {{^}}sextload_private_offset_i16:
+; GCN: buffer_load_sshort v{{[0-9]+}}, off, s[4:7], s8 offset:8
+define amdgpu_kernel void @sextload_private_offset_i16(i32 addrspace(1)* %out) #0 {
+ %load = load volatile i16, i16* inttoptr (i32 8 to i16*)
+ %sextload = sext i16 %load to i32
+ store i32 %sextload, i32 addrspace(1)* undef
+ ret void
+}
+
+; GCN-LABEL: {{^}}zextload_private_offset_i16:
+; GCN: buffer_load_ushort v{{[0-9]+}}, off, s[4:7], s8 offset:8
+define amdgpu_kernel void @zextload_private_offset_i16(i32 addrspace(1)* %out) #0 {
+ %load = load volatile i16, i16* inttoptr (i32 8 to i16*)
+ %zextload = zext i16 %load to i32
+ store i32 %zextload, i32 addrspace(1)* undef
+ ret void
+}
+
+; GCN-LABEL: {{^}}load_private_offset_i32:
+; GCN: buffer_load_dword v{{[0-9]+}}, off, s[4:7], s8 offset:8
+define amdgpu_kernel void @load_private_offset_i32() #0 {
+ %load = load volatile i32, i32* inttoptr (i32 8 to i32*)
+ ret void
+}
+
+; GCN-LABEL: {{^}}load_private_offset_v2i32:
+; GCN: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s8 offset:8
+define amdgpu_kernel void @load_private_offset_v2i32() #0 {
+ %load = load volatile <2 x i32>, <2 x i32>* inttoptr (i32 8 to <2 x i32>*)
+ ret void
+}
+
+; GCN-LABEL: {{^}}load_private_offset_v4i32:
+; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s8 offset:8
+define amdgpu_kernel void @load_private_offset_v4i32() #0 {
+ %load = load volatile <4 x i32>, <4 x i32>* inttoptr (i32 8 to <4 x i32>*)
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_private_offset_i8_max_offset:
+; GCN: buffer_store_byte v{{[0-9]+}}, off, s[4:7], s8 offset:4095
+define amdgpu_kernel void @store_private_offset_i8_max_offset() #0 {
+ store volatile i8 5, i8* inttoptr (i32 4095 to i8*)
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_private_offset_i8_max_offset_plus1:
+; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1000
+; GCN: buffer_store_byte v{{[0-9]+}}, [[OFFSET]], s[4:7], s8 offen{{$}}
+define amdgpu_kernel void @store_private_offset_i8_max_offset_plus1() #0 {
+ store volatile i8 5, i8* inttoptr (i32 4096 to i8*)
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_private_offset_i8_max_offset_plus2:
+; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1000
+; GCN: buffer_store_byte v{{[0-9]+}}, [[OFFSET]], s[4:7], s8 offen offset:1{{$}}
+define amdgpu_kernel void @store_private_offset_i8_max_offset_plus2() #0 {
+ store volatile i8 5, i8* inttoptr (i32 4097 to i8*)
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll b/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
index 4bd8bff4809a..9d0b6b395996 100644
--- a/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
+++ b/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
@@ -9,19 +9,18 @@
; StructurizeCFG.
; IR-LABEL: @multi_divergent_region_exit_ret_ret(
-; IR: %Pivot = icmp sge i32 %tmp16, 2
-; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %Pivot)
-; IR: %1 = extractvalue { i1, i64 } %0, 0
-; IR: %2 = extractvalue { i1, i64 } %0, 1
-; IR: br i1 %1, label %LeafBlock1, label %Flow
+; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0)
+; IR: %2 = extractvalue { i1, i64 } %1, 0
+; IR: %3 = extractvalue { i1, i64 } %1, 1
+; IR: br i1 %2, label %LeafBlock1, label %Flow
; IR: Flow:
-; IR: %3 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
-; IR: %4 = phi i1 [ %SwitchLeaf2, %LeafBlock1 ], [ false, %entry ]
-; IR: %5 = call { i1, i64 } @llvm.amdgcn.else(i64 %2)
-; IR: %6 = extractvalue { i1, i64 } %5, 0
-; IR: %7 = extractvalue { i1, i64 } %5, 1
-; IR: br i1 %6, label %LeafBlock, label %Flow1
+; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
+; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ]
+; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
+; IR: %7 = extractvalue { i1, i64 } %6, 0
+; IR: %8 = extractvalue { i1, i64 } %6, 1
+; IR: br i1 %7, label %LeafBlock, label %Flow1
; IR: LeafBlock:
; IR: br label %Flow1
@@ -30,32 +29,32 @@
; IR: br label %Flow{{$}}
; IR: Flow2:
-; IR: %8 = phi i1 [ false, %exit1 ], [ %12, %Flow1 ]
-; IR: call void @llvm.amdgcn.end.cf(i64 %16)
-; IR: [[IF:%[0-9]+]] = call { i1, i64 } @llvm.amdgcn.if(i1 %8)
-; IR: %10 = extractvalue { i1, i64 } [[IF]], 0
-; IR: %11 = extractvalue { i1, i64 } [[IF]], 1
-; IR: br i1 %10, label %exit0, label %UnifiedReturnBlock
+; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
+; IR: call void @llvm.amdgcn.end.cf(i64 %19)
+; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
+; IR: %13 = extractvalue { i1, i64 } %12, 0
+; IR: %14 = extractvalue { i1, i64 } %12, 1
+; IR: br i1 %13, label %exit0, label %UnifiedReturnBlock
; IR: exit0:
; IR: store volatile i32 9, i32 addrspace(1)* undef
; IR: br label %UnifiedReturnBlock
; IR: Flow1:
-; IR: %12 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %3, %Flow ]
-; IR: %13 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %4, %Flow ]
-; IR: call void @llvm.amdgcn.end.cf(i64 %7)
-; IR: %14 = call { i1, i64 } @llvm.amdgcn.if(i1 %13)
-; IR: %15 = extractvalue { i1, i64 } %14, 0
-; IR: %16 = extractvalue { i1, i64 } %14, 1
-; IR: br i1 %15, label %exit1, label %Flow2
+; IR: %15 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %4, %Flow ]
+; IR: %16 = phi i1 [ %9, %LeafBlock ], [ %5, %Flow ]
+; IR: call void @llvm.amdgcn.end.cf(i64 %8)
+; IR: %17 = call { i1, i64 } @llvm.amdgcn.if(i1 %16)
+; IR: %18 = extractvalue { i1, i64 } %17, 0
+; IR: %19 = extractvalue { i1, i64 } %17, 1
+; IR: br i1 %18, label %exit1, label %Flow2
; IR: exit1:
; IR: store volatile i32 17, i32 addrspace(3)* undef
; IR: br label %Flow2
; IR: UnifiedReturnBlock:
-; IR: call void @llvm.amdgcn.end.cf(i64 %11)
+; IR: call void @llvm.amdgcn.end.cf(i64 %14)
; IR: ret void
@@ -65,9 +64,11 @@
; GCN: s_xor_b64
-; GCN: ; %LeafBlock
-; GCN: v_cmp_ne_u32_e32 vcc, 1, [[REG:v[0-9]+]]
+; FIXME: Why is this compare essentially repeated?
+; GCN: v_cmp_eq_u32_e32 vcc, 1, [[REG:v[0-9]+]]
+; GCN-NEXT: v_cmp_ne_u32_e64 s{{\[[0-9]+:[0-9]+\]}}, 1, [[REG]]
; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, -1, vcc
+; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, -1
; GCN: ; %Flow1
; GCN-NEXT: s_or_b64 exec, exec
@@ -125,15 +126,14 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
}
; IR-LABEL: @multi_divergent_region_exit_unreachable_unreachable(
-; IR: %Pivot = icmp sge i32 %tmp16, 2
-; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %Pivot)
+; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0)
-; IR: %5 = call { i1, i64 } @llvm.amdgcn.else(i64 %2)
+; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
-; IR: %8 = phi i1 [ false, %exit1 ], [ %12, %Flow1 ]
-; IR: call void @llvm.amdgcn.end.cf(i64 %16)
-; IR: %9 = call { i1, i64 } @llvm.amdgcn.if(i1 %8)
-; IR: br i1 %10, label %exit0, label %UnifiedUnreachableBlock
+; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
+; IR: call void @llvm.amdgcn.end.cf(i64 %19)
+; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
+; IR: br i1 %13, label %exit0, label %UnifiedUnreachableBlock
; IR: UnifiedUnreachableBlock:
@@ -181,49 +181,51 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
}
; IR-LABEL: @multi_exit_region_divergent_ret_uniform_ret(
-; IR: %divergent.cond0 = icmp sge i32 %tmp16, 2
+; IR: %divergent.cond0 = icmp slt i32 %tmp16, 2
; IR: llvm.amdgcn.if
; IR: br i1
; IR: {{^}}Flow:
-; IR: %3 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
-; IR: %4 = phi i1 [ %uniform.cond0, %LeafBlock1 ], [ false, %entry ]
-; IR: %5 = call { i1, i64 } @llvm.amdgcn.else(i64 %2)
-; IR: br i1 %6, label %LeafBlock, label %Flow1
+; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
+; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ]
+; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
+; IR: br i1 %7, label %LeafBlock, label %Flow1
; IR: {{^}}LeafBlock:
-; IR: %divergent.cond1 = icmp ne i32 %tmp16, 1
+; IR: %divergent.cond1 = icmp eq i32 %tmp16, 1
+; IR: %9 = xor i1 %divergent.cond1, true
; IR: br label %Flow1
; IR: LeafBlock1:
-; IR: %uniform.cond0 = icmp ne i32 %arg3, 2
+; IR: %uniform.cond0 = icmp eq i32 %arg3, 2
+; IR: %10 = xor i1 %uniform.cond0, true
; IR: br label %Flow
; IR: Flow2:
-; IR: %8 = phi i1 [ false, %exit1 ], [ %12, %Flow1 ]
-; IR: call void @llvm.amdgcn.end.cf(i64 %16)
-; IR: %9 = call { i1, i64 } @llvm.amdgcn.if(i1 %8)
-; IR: br i1 %10, label %exit0, label %UnifiedReturnBlock
+; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
+; IR: call void @llvm.amdgcn.end.cf(i64 %19)
+; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
+; IR: br i1 %13, label %exit0, label %UnifiedReturnBlock
; IR: exit0:
; IR: store volatile i32 9, i32 addrspace(1)* undef
; IR: br label %UnifiedReturnBlock
; IR: {{^}}Flow1:
-; IR: %12 = phi i1 [ %divergent.cond1, %LeafBlock ], [ %3, %Flow ]
-; IR: %13 = phi i1 [ %divergent.cond1, %LeafBlock ], [ %4, %Flow ]
-; IR: call void @llvm.amdgcn.end.cf(i64 %7)
-; IR: %14 = call { i1, i64 } @llvm.amdgcn.if(i1 %13)
-; IR: %15 = extractvalue { i1, i64 } %14, 0
-; IR: %16 = extractvalue { i1, i64 } %14, 1
-; IR: br i1 %15, label %exit1, label %Flow2
+; IR: %15 = phi i1 [ %divergent.cond1, %LeafBlock ], [ %4, %Flow ]
+; IR: %16 = phi i1 [ %9, %LeafBlock ], [ %5, %Flow ]
+; IR: call void @llvm.amdgcn.end.cf(i64 %8)
+; IR: %17 = call { i1, i64 } @llvm.amdgcn.if(i1 %16)
+; IR: %18 = extractvalue { i1, i64 } %17, 0
+; IR: %19 = extractvalue { i1, i64 } %17, 1
+; IR: br i1 %18, label %exit1, label %Flow2
; IR: exit1:
; IR: store volatile i32 17, i32 addrspace(3)* undef
; IR: br label %Flow2
; IR: UnifiedReturnBlock:
-; IR: call void @llvm.amdgcn.end.cf(i64 %11)
+; IR: call void @llvm.amdgcn.end.cf(i64 %14)
; IR: ret void
define amdgpu_kernel void @multi_exit_region_divergent_ret_uniform_ret(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2, i32 %arg3) #0 {
entry:
@@ -262,18 +264,17 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
}
; IR-LABEL: @multi_exit_region_uniform_ret_divergent_ret(
-; IR: %Pivot = icmp sge i32 %tmp16, 2
-; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %Pivot)
-; IR: br i1 %1, label %LeafBlock1, label %Flow
+; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0)
+; IR: br i1 %2, label %LeafBlock1, label %Flow
; IR: Flow:
-; IR: %3 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
-; IR: %4 = phi i1 [ %SwitchLeaf2, %LeafBlock1 ], [ false, %entry ]
-; IR: %5 = call { i1, i64 } @llvm.amdgcn.else(i64 %2)
+; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
+; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ]
+; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
-; IR: %8 = phi i1 [ false, %exit1 ], [ %12, %Flow1 ]
-; IR: call void @llvm.amdgcn.end.cf(i64 %16)
-; IR: %9 = call { i1, i64 } @llvm.amdgcn.if(i1 %8)
+; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
+; IR: call void @llvm.amdgcn.end.cf(i64 %19)
+; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
define amdgpu_kernel void @multi_exit_region_uniform_ret_divergent_ret(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2, i32 %arg3) #0 {
entry:
@@ -313,13 +314,13 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
; IR-LABEL: @multi_divergent_region_exit_ret_ret_return_value(
; IR: Flow2:
-; IR: %8 = phi float [ 2.000000e+00, %exit1 ], [ undef, %Flow1 ]
-; IR: %9 = phi i1 [ false, %exit1 ], [ %13, %Flow1 ]
-; IR: call void @llvm.amdgcn.end.cf(i64 %17)
+; IR: %11 = phi float [ 2.000000e+00, %exit1 ], [ undef, %Flow1 ]
+; IR: %12 = phi i1 [ false, %exit1 ], [ %16, %Flow1 ]
+; IR: call void @llvm.amdgcn.end.cf(i64 %20)
; IR: UnifiedReturnBlock:
-; IR: %UnifiedRetVal = phi float [ %8, %Flow2 ], [ 1.000000e+00, %exit0 ]
-; IR: call void @llvm.amdgcn.end.cf(i64 %12)
+; IR: %UnifiedRetVal = phi float [ %11, %Flow2 ], [ 1.000000e+00, %exit0 ]
+; IR: call void @llvm.amdgcn.end.cf(i64 %15)
; IR: ret float %UnifiedRetVal
define amdgpu_ps float @multi_divergent_region_exit_ret_ret_return_value(i32 %vgpr) #0 {
entry:
@@ -386,32 +387,31 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
}
; IR-LABEL: @multi_divergent_region_exit_ret_unreachable(
-; IR: %Pivot = icmp sge i32 %tmp16, 2
-; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %Pivot)
+; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0)
; IR: Flow:
-; IR: %3 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
-; IR: %4 = phi i1 [ %SwitchLeaf2, %LeafBlock1 ], [ false, %entry ]
-; IR: %5 = call { i1, i64 } @llvm.amdgcn.else(i64 %2)
+; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
+; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ]
+; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
; IR: Flow2:
-; IR: %8 = phi i1 [ false, %exit1 ], [ %12, %Flow1 ]
-; IR: call void @llvm.amdgcn.end.cf(i64 %16)
-; IR: %9 = call { i1, i64 } @llvm.amdgcn.if(i1 %8)
-; IR: br i1 %10, label %exit0, label %UnifiedReturnBlock
+; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
+; IR: call void @llvm.amdgcn.end.cf(i64 %19)
+; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
+; IR: br i1 %13, label %exit0, label %UnifiedReturnBlock
; IR: exit0:
; IR-NEXT: store volatile i32 17, i32 addrspace(3)* undef
; IR-NEXT: br label %UnifiedReturnBlock
; IR: Flow1:
-; IR: %12 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %3, %Flow ]
-; IR: %13 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %4, %Flow ]
-; IR: call void @llvm.amdgcn.end.cf(i64 %7)
-; IR: %14 = call { i1, i64 } @llvm.amdgcn.if(i1 %13)
-; IR: %15 = extractvalue { i1, i64 } %14, 0
-; IR: %16 = extractvalue { i1, i64 } %14, 1
-; IR: br i1 %15, label %exit1, label %Flow2
+; IR: %15 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %4, %Flow ]
+; IR: %16 = phi i1 [ %9, %LeafBlock ], [ %5, %Flow ]
+; IR: call void @llvm.amdgcn.end.cf(i64 %8)
+; IR: %17 = call { i1, i64 } @llvm.amdgcn.if(i1 %16)
+; IR: %18 = extractvalue { i1, i64 } %17, 0
+; IR: %19 = extractvalue { i1, i64 } %17, 1
+; IR: br i1 %18, label %exit1, label %Flow2
; IR: exit1:
; IR-NEXT: store volatile i32 9, i32 addrspace(1)* undef
@@ -419,7 +419,7 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
; IR-NEXT: br label %Flow2
; IR: UnifiedReturnBlock:
-; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %11)
+; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %14)
; IR-NEXT: ret void
define amdgpu_kernel void @multi_divergent_region_exit_ret_unreachable(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 {
entry:
@@ -475,7 +475,7 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
; IR-NEXT: br label %Flow2
; IR: UnifiedReturnBlock: ; preds = %exit0, %Flow2
-; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %11)
+; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %14)
; IR-NEXT: ret void
define amdgpu_kernel void @indirect_multi_divergent_region_exit_ret_unreachable(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 {
entry:
@@ -622,15 +622,15 @@ uniform.ret:
; IR-LABEL: @uniform_complex_multi_ret_nest_in_divergent_triangle(
; IR: Flow1: ; preds = %uniform.ret1, %uniform.multi.exit.region
-; IR: %6 = phi i1 [ false, %uniform.ret1 ], [ true, %uniform.multi.exit.region ]
-; IR: br i1 %6, label %uniform.if, label %Flow2
+; IR: %8 = phi i1 [ false, %uniform.ret1 ], [ true, %uniform.multi.exit.region ]
+; IR: br i1 %8, label %uniform.if, label %Flow2
; IR: Flow: ; preds = %uniform.then, %uniform.if
-; IR: %7 = phi i1 [ %uniform.cond2, %uniform.then ], [ %uniform.cond1, %uniform.if ]
-; IR: br i1 %7, label %uniform.endif, label %uniform.ret0
+; IR: %11 = phi i1 [ %10, %uniform.then ], [ %9, %uniform.if ]
+; IR: br i1 %11, label %uniform.endif, label %uniform.ret0
; IR: UnifiedReturnBlock: ; preds = %Flow3, %Flow2
-; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %5)
+; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %6)
; IR-NEXT: ret void
define amdgpu_kernel void @uniform_complex_multi_ret_nest_in_divergent_triangle(i32 %arg0) #0 {
entry:
diff --git a/test/CodeGen/AMDGPU/nested-loop-conditions.ll b/test/CodeGen/AMDGPU/nested-loop-conditions.ll
index c0b4eaff60aa..672549c8ea63 100644
--- a/test/CodeGen/AMDGPU/nested-loop-conditions.ll
+++ b/test/CodeGen/AMDGPU/nested-loop-conditions.ll
@@ -133,9 +133,9 @@ bb23: ; preds = %bb10
; IR: Flow1:
; IR-NEXT: %loop.phi = phi i64 [ %loop.phi9, %Flow6 ], [ %phi.broken, %bb14 ]
-; IR-NEXT: %13 = phi <4 x i32> [ %28, %Flow6 ], [ undef, %bb14 ]
-; IR-NEXT: %14 = phi i32 [ %29, %Flow6 ], [ undef, %bb14 ]
-; IR-NEXT: %15 = phi i1 [ %30, %Flow6 ], [ false, %bb14 ]
+; IR-NEXT: %13 = phi <4 x i32> [ %29, %Flow6 ], [ undef, %bb14 ]
+; IR-NEXT: %14 = phi i32 [ %30, %Flow6 ], [ undef, %bb14 ]
+; IR-NEXT: %15 = phi i1 [ %31, %Flow6 ], [ false, %bb14 ]
; IR-NEXT: %16 = phi i1 [ false, %Flow6 ], [ %8, %bb14 ]
; IR-NEXT: %17 = call i64 @llvm.amdgcn.else.break(i64 %11, i64 %loop.phi)
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %11)
@@ -144,9 +144,9 @@ bb23: ; preds = %bb10
; IR: Flow2:
; IR-NEXT: %loop.phi10 = phi i64 [ %loop.phi11, %Flow5 ], [ %12, %bb16 ]
-; IR-NEXT: %19 = phi <4 x i32> [ %28, %Flow5 ], [ undef, %bb16 ]
-; IR-NEXT: %20 = phi i32 [ %29, %Flow5 ], [ undef, %bb16 ]
-; IR-NEXT: %21 = phi i1 [ %30, %Flow5 ], [ false, %bb16 ]
+; IR-NEXT: %19 = phi <4 x i32> [ %29, %Flow5 ], [ undef, %bb16 ]
+; IR-NEXT: %20 = phi i32 [ %30, %Flow5 ], [ undef, %bb16 ]
+; IR-NEXT: %21 = phi i1 [ %31, %Flow5 ], [ false, %bb16 ]
; IR-NEXT: %22 = phi i1 [ false, %Flow5 ], [ false, %bb16 ]
; IR-NEXT: %23 = phi i1 [ false, %Flow5 ], [ %8, %bb16 ]
; IR-NEXT: %24 = call { i1, i64 } @llvm.amdgcn.if(i1 %23)
@@ -156,15 +156,16 @@ bb23: ; preds = %bb10
; IR: bb21:
; IR: %tmp12 = icmp slt i32 %tmp11, 9
-; IR-NEXT: %27 = call i64 @llvm.amdgcn.if.break(i1 %tmp12, i64 %phi.broken)
+; IR-NEXT: %27 = xor i1 %tmp12, true
+; IR-NEXT: %28 = call i64 @llvm.amdgcn.if.break(i1 %27, i64 %phi.broken)
; IR-NEXT: br label %Flow3
; IR: Flow3:
; IR-NEXT: %loop.phi11 = phi i64 [ %phi.broken, %bb21 ], [ %phi.broken, %Flow2 ]
-; IR-NEXT: %loop.phi9 = phi i64 [ %27, %bb21 ], [ %loop.phi10, %Flow2 ]
-; IR-NEXT: %28 = phi <4 x i32> [ %tmp9, %bb21 ], [ %19, %Flow2 ]
-; IR-NEXT: %29 = phi i32 [ %tmp10, %bb21 ], [ %20, %Flow2 ]
-; IR-NEXT: %30 = phi i1 [ %tmp12, %bb21 ], [ %21, %Flow2 ]
+; IR-NEXT: %loop.phi9 = phi i64 [ %28, %bb21 ], [ %loop.phi10, %Flow2 ]
+; IR-NEXT: %29 = phi <4 x i32> [ %tmp9, %bb21 ], [ %19, %Flow2 ]
+; IR-NEXT: %30 = phi i32 [ %tmp10, %bb21 ], [ %20, %Flow2 ]
+; IR-NEXT: %31 = phi i1 [ %27, %bb21 ], [ %21, %Flow2 ]
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %26)
; IR-NEXT: br i1 %22, label %bb31.loopexit, label %Flow4
diff --git a/test/CodeGen/AMDGPU/private-access-no-objects.ll b/test/CodeGen/AMDGPU/private-access-no-objects.ll
index af2683510293..dcb089010e99 100644
--- a/test/CodeGen/AMDGPU/private-access-no-objects.ll
+++ b/test/CodeGen/AMDGPU/private-access-no-objects.ll
@@ -1,7 +1,7 @@
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=OPT %s
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=OPT %s
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=iceland -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=OPT %s
-; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=OPTNONE %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=OPT %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=CI -check-prefix=OPT %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=iceland -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=OPT %s
+; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=OPTNONE %s
; There are no stack objects, but still a private memory access. The
; private access regiters need to be correctly initialized anyway, and
@@ -27,9 +27,9 @@ define amdgpu_kernel void @store_to_undef() #0 {
; OPT-DAG: s_mov_b64 s{{\[}}[[RSRC_LO:[0-9]+]]:{{[0-9]+\]}}, s[0:1]
; OPT-DAG: s_mov_b64 s{{\[[0-9]+}}:[[RSRC_HI:[0-9]+]]{{\]}}, s[2:3]
; OPT-DAG: s_mov_b32 [[SOFFSET:s[0-9]+]], s7{{$}}
-; OPT: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, [[SOFFSET]] offen{{$}}
+; OPT: buffer_store_dword v{{[0-9]+}}, off, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, [[SOFFSET]] offset:124{{$}}
define amdgpu_kernel void @store_to_inttoptr() #0 {
- store volatile i32 0, i32* inttoptr (i32 123 to i32*)
+ store volatile i32 0, i32* inttoptr (i32 124 to i32*)
ret void
}
@@ -47,9 +47,9 @@ define amdgpu_kernel void @load_from_undef() #0 {
; OPT-DAG: s_mov_b64 s{{\[}}[[RSRC_LO:[0-9]+]]:{{[0-9]+\]}}, s[0:1]
; OPT-DAG: s_mov_b64 s{{\[[0-9]+}}:[[RSRC_HI:[0-9]+]]{{\]}}, s[2:3]
; OPT-DAG: s_mov_b32 [[SOFFSET:s[0-9]+]], s7{{$}}
-; OPT: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, [[SOFFSET]] offen{{$}}
+; OPT: buffer_load_dword v{{[0-9]+}}, off, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, [[SOFFSET]] offset:124{{$}}
define amdgpu_kernel void @load_from_inttoptr() #0 {
- %ld = load volatile i32, i32* inttoptr (i32 123 to i32*)
+ %ld = load volatile i32, i32* inttoptr (i32 124 to i32*)
ret void
}
diff --git a/test/CodeGen/AMDGPU/readcyclecounter.ll b/test/CodeGen/AMDGPU/readcyclecounter.ll
index 5c698c839fa6..d7b353cd25d3 100644
--- a/test/CodeGen/AMDGPU/readcyclecounter.ll
+++ b/test/CodeGen/AMDGPU/readcyclecounter.ll
@@ -22,4 +22,18 @@ define amdgpu_kernel void @test_readcyclecounter(i64 addrspace(1)* %out) #0 {
ret void
}
+; This test used to crash in ScheduleDAG.
+;
+; GCN-LABEL: {{^}}test_readcyclecounter_smem:
+; SI-DAG: s_memtime
+; VI-DAG: s_memrealtime
+; GCN-DAG: s_load_dword
+define amdgpu_cs i32 @test_readcyclecounter_smem(i64 addrspace(2)* inreg %in) #0 {
+ %cycle0 = call i64 @llvm.readcyclecounter()
+ %in.v = load i64, i64 addrspace(2)* %in
+ %r.64 = add i64 %cycle0, %in.v
+ %r.32 = trunc i64 %r.64 to i32
+ ret i32 %r.32
+}
+
attributes #0 = { nounwind }
diff --git a/test/CodeGen/AMDGPU/ret_jump.ll b/test/CodeGen/AMDGPU/ret_jump.ll
index 748f98a12c59..f2fbacbab82e 100644
--- a/test/CodeGen/AMDGPU/ret_jump.ll
+++ b/test/CodeGen/AMDGPU/ret_jump.ll
@@ -56,7 +56,7 @@ ret.bb: ; preds = %else, %main_body
}
; GCN-LABEL: {{^}}uniform_br_nontrivial_ret_divergent_br_nontrivial_unreachable:
-; GCN: s_cbranch_scc1 [[RET_BB:BB[0-9]+_[0-9]+]]
+; GCN: s_cbranch_vccnz [[RET_BB:BB[0-9]+_[0-9]+]]
; GCN: ; BB#{{[0-9]+}}: ; %else
; GCN: s_and_saveexec_b64 [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]], vcc
diff --git a/test/CodeGen/AMDGPU/sext-in-reg.ll b/test/CodeGen/AMDGPU/sext-in-reg.ll
index b702e1c07200..160fb6a038fe 100644
--- a/test/CodeGen/AMDGPU/sext-in-reg.ll
+++ b/test/CodeGen/AMDGPU/sext-in-reg.ll
@@ -1,6 +1,6 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FIXME: i16 promotion pass ruins the scalar cases when legal.
diff --git a/test/CodeGen/AMDGPU/shl.v2i16.ll b/test/CodeGen/AMDGPU/shl.v2i16.ll
index eac29bad7cf2..115221c5316d 100644
--- a/test/CodeGen/AMDGPU/shl.v2i16.ll
+++ b/test/CodeGen/AMDGPU/shl.v2i16.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=CIVI %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=CIVI %s
diff --git a/test/CodeGen/AMDGPU/sminmax.v2i16.ll b/test/CodeGen/AMDGPU/sminmax.v2i16.ll
index 4e093cdece21..16ce86bf8b11 100644
--- a/test/CodeGen/AMDGPU/sminmax.v2i16.ll
+++ b/test/CodeGen/AMDGPU/sminmax.v2i16.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=CIVI -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=CIVI -check-prefix=GCN %s
diff --git a/test/CodeGen/AMDGPU/spill-m0.ll b/test/CodeGen/AMDGPU/spill-m0.ll
index 0e715c453209..8f1aebfe9ceb 100644
--- a/test/CodeGen/AMDGPU/spill-m0.ll
+++ b/test/CodeGen/AMDGPU/spill-m0.ll
@@ -69,19 +69,20 @@ endif:
; TOSMEM-NOT: s_m0
; TOSMEM: s_add_u32 m0, s7, 0x100
; TOSMEM-NEXT: s_buffer_store_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 4-byte Folded Spill
-; TOSMEM-NOT: m0
+; FIXME: RegScavenger::isRegUsed() always returns true if m0 is reserved, so we have to save and restore it
+; FIXME-TOSMEM-NOT: m0
-; TOSMEM-NOT: m0
+; FIXME-TOSMEM-NOT: m0
; TOSMEM: s_add_u32 m0, s7, 0x200
; TOSMEM: s_buffer_store_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Spill
-; TOSMEM-NOT: m0
+; FIXME-TOSMEM-NOT: m0
; TOSMEM: s_mov_b64 exec,
; TOSMEM: s_cbranch_execz
; TOSMEM: s_branch
; TOSMEM: BB{{[0-9]+_[0-9]+}}:
-; TOSMEM-NEXT: s_add_u32 m0, s7, 0x200
+; TOSMEM: s_add_u32 m0, s7, 0x200
; TOSMEM-NEXT: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Reload
@@ -130,7 +131,7 @@ endif: ; preds = %else, %if
; TOSMEM: s_branch
; TOSMEM: BB{{[0-9]+_[0-9]+}}:
-; TOSMEM-NEXT: s_add_u32 m0, s3, 0x100
+; TOSMEM: s_add_u32 m0, s3, 0x100
; TOSMEM-NEXT: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Reload
; GCN-NOT: v_readlane_b32 m0
@@ -159,13 +160,14 @@ endif:
; GCN-LABEL: {{^}}restore_m0_lds:
; TOSMEM: s_load_dwordx2 [[REG:s\[[0-9]+:[0-9]+\]]]
; TOSMEM: s_cmp_eq_u32
-; TOSMEM-NOT: m0
+; FIXME: RegScavenger::isRegUsed() always returns true if m0 is reserved, so we have to save and restore it
+; FIXME-TOSMEM-NOT: m0
; TOSMEM: s_add_u32 m0, s3, 0x100
; TOSMEM: s_buffer_store_dwordx2 [[REG]], s[88:91], m0 ; 8-byte Folded Spill
-; TOSMEM-NOT: m0
+; FIXME-TOSMEM-NOT: m0
; TOSMEM: s_add_u32 m0, s3, 0x300
; TOSMEM: s_buffer_store_dword s{{[0-9]+}}, s[88:91], m0 ; 4-byte Folded Spill
-; TOSMEM-NOT: m0
+; FIXME-TOSMEM-NOT: m0
; TOSMEM: s_cbranch_scc1
; TOSMEM: s_mov_b32 m0, -1
@@ -178,10 +180,10 @@ endif:
; TOSMEM: ds_write_b64
-; TOSMEM-NOT: m0
+; FIXME-TOSMEM-NOT: m0
; TOSMEM: s_add_u32 m0, s3, 0x300
; TOSMEM: s_buffer_load_dword s0, s[88:91], m0 ; 4-byte Folded Reload
-; TOSMEM-NOT: m0
+; FIXME-TOSMEM-NOT: m0
; TOSMEM: s_waitcnt lgkmcnt(0)
; TOSMEM-NOT: m0
; TOSMEM: s_mov_b32 m0, s0
diff --git a/test/CodeGen/AMDGPU/sub.v2i16.ll b/test/CodeGen/AMDGPU/sub.v2i16.ll
index 69f0accef628..431344670ffb 100644
--- a/test/CodeGen/AMDGPU/sub.v2i16.ll
+++ b/test/CodeGen/AMDGPU/sub.v2i16.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
; FIXME: Need to handle non-uniform case for function below (load without gep).
diff --git a/test/CodeGen/AMDGPU/trap.ll b/test/CodeGen/AMDGPU/trap.ll
index 77ad895d0e86..51771c9723e0 100644
--- a/test/CodeGen/AMDGPU/trap.ll
+++ b/test/CodeGen/AMDGPU/trap.ll
@@ -80,4 +80,25 @@ define amdgpu_kernel void @trap() {
ret void
}
+; GCN-LABEL: {{^}}non_entry_trap:
+; TRAP-BIT: enable_trap_handler = 1
+; NO-TRAP-BIT: enable_trap_handler = 0
+
+; HSA: BB{{[0-9]_[0-9]+]]: ; %trap
+; HSA-TRAP: s_mov_b64 s[0:1], s[4:5]
+; HSA-TRAP-NEXT: s_trap 2
+define amdgpu_kernel void @non_entry_trap(i32 addrspace(1)* nocapture readonly %arg0) local_unnamed_addr #1 {
+entry:
+ %tmp29 = load volatile i32, i32 addrspace(1)* %arg0
+ %cmp = icmp eq i32 %tmp29, -1
+ br i1 %cmp, label %ret, label %trap
+
+trap:
+ call void @llvm.trap()
+ unreachable
+
+ret:
+ ret void
+}
+
attributes #0 = { nounwind noreturn }