diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-04-26 19:45:00 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-04-26 19:45:00 +0000 |
commit | 12f3ca4cdb95b193af905a00e722a4dcb40b3de3 (patch) | |
tree | ae1a7fcfc24a8d4b23206c57121c3f361d4b7f84 /test/CodeGen/AMDGPU | |
parent | d99dafe2e4a385dd2a6c76da6d8258deb100657b (diff) | |
download | src-12f3ca4cdb95b193af905a00e722a4dcb40b3de3.tar.gz src-12f3ca4cdb95b193af905a00e722a4dcb40b3de3.zip |
Vendor import of llvm trunk r301441:vendor/llvm/llvm-trunk-r301441
Notes
Notes:
svn path=/vendor/llvm/dist/; revision=317461
svn path=/vendor/llvm/llvm-trunk-r301441/; revision=317462; tag=vendor/llvm/llvm-trunk-r301441
Diffstat (limited to 'test/CodeGen/AMDGPU')
28 files changed, 586 insertions, 150 deletions
diff --git a/test/CodeGen/AMDGPU/add.v2i16.ll b/test/CodeGen/AMDGPU/add.v2i16.ll index e137ef4bc236..73e80d523f1e 100644 --- a/test/CodeGen/AMDGPU/add.v2i16.ll +++ b/test/CodeGen/AMDGPU/add.v2i16.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; FIXME: Need to handle non-uniform case for function below (load without gep). diff --git a/test/CodeGen/AMDGPU/addrspacecast.ll b/test/CodeGen/AMDGPU/addrspacecast.ll index 6ec93c72ec52..b1e71722d80c 100644 --- a/test/CodeGen/AMDGPU/addrspacecast.ll +++ b/test/CodeGen/AMDGPU/addrspacecast.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=HSA -check-prefix=CI %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=HSA -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=CI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=GFX9 %s ; HSA-LABEL: {{^}}use_group_to_flat_addrspacecast: ; HSA: enable_sgpr_private_segment_buffer = 1 @@ -223,9 +223,8 @@ define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 { } ; HSA-LABEL: {{^}}cast_0_flat_to_private_addrspacecast: -; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], 0{{$}} -; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}} -; HSA: buffer_store_dword [[K]], [[PTR]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen +; HSA: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}} +; HSA: buffer_store_dword [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}} define amdgpu_kernel void @cast_0_flat_to_private_addrspacecast() #0 { %cast = addrspacecast i32 addrspace(4)* null to i32 addrspace(0)* store volatile i32 7, i32* %cast diff --git a/test/CodeGen/AMDGPU/ashr.v2i16.ll b/test/CodeGen/AMDGPU/ashr.v2i16.ll index 96a5e3b23758..7f424ef2a147 100644 --- a/test/CodeGen/AMDGPU/ashr.v2i16.ll +++ b/test/CodeGen/AMDGPU/ashr.v2i16.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=CIVI %s ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=CIVI %s diff --git a/test/CodeGen/AMDGPU/code-object-metadata-images.ll b/test/CodeGen/AMDGPU/code-object-metadata-images.ll new file mode 100644 index 000000000000..918560469852 --- /dev/null +++ b/test/CodeGen/AMDGPU/code-object-metadata-images.ll @@ -0,0 +1,80 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX800 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s + +%opencl.image1d_t = type opaque +%opencl.image1d_array_t = type opaque +%opencl.image1d_buffer_t = type opaque +%opencl.image2d_t = type opaque +%opencl.image2d_array_t = type opaque +%opencl.image2d_array_depth_t = type opaque +%opencl.image2d_array_msaa_t = type opaque +%opencl.image2d_array_msaa_depth_t = type opaque +%opencl.image2d_depth_t = type opaque +%opencl.image2d_msaa_t = type opaque +%opencl.image2d_msaa_depth_t = type opaque +%opencl.image3d_t = type opaque + +; CHECK: --- +; CHECK: Version: [ 1, 0 ] + +; CHECK: Kernels: +; CHECK: - Name: test +; CHECK: Args: +; CHECK: - Size: 8 +; CHECK: ValueKind: Image +; CHECK: TypeName: image1d_t +; CHECK: - Size: 8 +; CHECK: ValueKind: Image +; CHECK: TypeName: image1d_array_t +; CHECK: - Size: 8 +; CHECK: ValueKind: Image +; CHECK: TypeName: image1d_buffer_t +; CHECK: - Size: 8 +; CHECK: ValueKind: Image +; CHECK: TypeName: image2d_t +; CHECK: - Size: 8 +; CHECK: ValueKind: Image +; CHECK: TypeName: image2d_array_t +; CHECK: - Size: 8 +; CHECK: ValueKind: Image +; CHECK: TypeName: image2d_array_depth_t +; CHECK: - Size: 8 +; CHECK: ValueKind: Image +; CHECK: TypeName: image2d_array_msaa_t +; CHECK: - Size: 8 +; CHECK: ValueKind: Image +; CHECK: TypeName: image2d_array_msaa_depth_t +; CHECK: - Size: 8 +; CHECK: ValueKind: Image +; CHECK: TypeName: image2d_depth_t +; CHECK: - Size: 8 +; CHECK: ValueKind: Image +; CHECK: TypeName: image2d_msaa_t +; CHECK: - Size: 8 +; CHECK: ValueKind: Image +; CHECK: TypeName: image2d_msaa_depth_t +; CHECK: - Size: 8 +; CHECK: ValueKind: Image +; CHECK: TypeName: image3d_t +define amdgpu_kernel void @test(%opencl.image1d_t addrspace(1)* %a, + %opencl.image1d_array_t addrspace(1)* %b, + %opencl.image1d_buffer_t addrspace(1)* %c, + %opencl.image2d_t addrspace(1)* %d, + %opencl.image2d_array_t addrspace(1)* %e, + %opencl.image2d_array_depth_t addrspace(1)* %f, + %opencl.image2d_array_msaa_t addrspace(1)* %g, + %opencl.image2d_array_msaa_depth_t addrspace(1)* %h, + %opencl.image2d_depth_t addrspace(1)* %i, + %opencl.image2d_msaa_t addrspace(1)* %j, + %opencl.image2d_msaa_depth_t addrspace(1)* %k, + %opencl.image3d_t addrspace(1)* %l) + !kernel_arg_type !1 !kernel_arg_base_type !1 { + ret void +} + +!1 = !{!"image1d_t", !"image1d_array_t", !"image1d_buffer_t", + !"image2d_t", !"image2d_array_t", !"image2d_array_depth_t", + !"image2d_array_msaa_t", !"image2d_array_msaa_depth_t", + !"image2d_depth_t", !"image2d_msaa_t", !"image2d_msaa_depth_t", + !"image3d_t"} diff --git a/test/CodeGen/AMDGPU/fcanonicalize.f16.ll b/test/CodeGen/AMDGPU/fcanonicalize.f16.ll index f2686a5582dc..c9787bb478ef 100644 --- a/test/CodeGen/AMDGPU/fcanonicalize.f16.ll +++ b/test/CodeGen/AMDGPU/fcanonicalize.f16.ll @@ -1,5 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s declare half @llvm.fabs.f16(half) #0 declare half @llvm.canonicalize.f16(half) #0 diff --git a/test/CodeGen/AMDGPU/fdiv.ll b/test/CodeGen/AMDGPU/fdiv.ll index b3a2b6643720..738a5adba14f 100644 --- a/test/CodeGen/AMDGPU/fdiv.ll +++ b/test/CodeGen/AMDGPU/fdiv.ll @@ -85,10 +85,20 @@ entry: } ; FUNC-LABEL: {{^}}fdiv_fast_denormals_f32: -; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}} -; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]] -; GCN-NOT: [[RESULT]] -; GCN: buffer_store_dword [[RESULT]] +; GCN: v_div_scale_f32 [[NUM_SCALE:v[0-9]+]] +; GCN-DAG: v_div_scale_f32 [[DEN_SCALE:v[0-9]+]] +; GCN-DAG: v_rcp_f32_e32 [[NUM_RCP:v[0-9]+]], [[NUM_SCALE]] + +; GCN-NOT: s_setreg +; GCN: v_fma_f32 [[A:v[0-9]+]], -[[NUM_SCALE]], [[NUM_RCP]], 1.0 +; GCN: v_fma_f32 [[B:v[0-9]+]], [[A]], [[NUM_RCP]], [[NUM_RCP]] +; GCN: v_mul_f32_e32 [[C:v[0-9]+]], [[B]], [[DEN_SCALE]] +; GCN: v_fma_f32 [[D:v[0-9]+]], -[[NUM_SCALE]], [[C]], [[DEN_SCALE]] +; GCN: v_fma_f32 [[E:v[0-9]+]], [[D]], [[B]], [[C]] +; GCN: v_fma_f32 [[F:v[0-9]+]], -[[NUM_SCALE]], [[E]], [[DEN_SCALE]] +; GCN-NOT: s_setreg +; GCN: v_div_fmas_f32 [[FMAS:v[0-9]+]], [[F]], [[B]], [[E]] +; GCN: v_div_fixup_f32 v{{[0-9]+}}, [[FMAS]], define amdgpu_kernel void @fdiv_fast_denormals_f32(float addrspace(1)* %out, float %a, float %b) #2 { entry: %fdiv = fdiv fast float %a, %b diff --git a/test/CodeGen/AMDGPU/fence-amdgiz.ll b/test/CodeGen/AMDGPU/fence-amdgiz.ll new file mode 100644 index 000000000000..df675c9a8692 --- /dev/null +++ b/test/CodeGen/AMDGPU/fence-amdgiz.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" +target triple = "amdgcn-amd-amdhsa-amdgizcl" + +; CHECK_LABEL: atomic_fence +; CHECK: BB#0: +; CHECK: ATOMIC_FENCE 4, 1 +; CHECK: s_endpgm + +define amdgpu_kernel void @atomic_fence() { + fence acquire + ret void +} + diff --git a/test/CodeGen/AMDGPU/fmuladd.v2f16.ll b/test/CodeGen/AMDGPU/fmuladd.v2f16.ll index bdd3c04fd318..624610096cbc 100644 --- a/test/CodeGen/AMDGPU/fmuladd.v2f16.ll +++ b/test/CodeGen/AMDGPU/fmuladd.v2f16.ll @@ -1,12 +1,12 @@ -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s - -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s + +; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s declare i32 @llvm.amdgcn.workitem.id.x() #1 declare <2 x half> @llvm.fmuladd.v2f16(<2 x half>, <2 x half>, <2 x half>) #1 diff --git a/test/CodeGen/AMDGPU/fneg-fabs.f16.ll b/test/CodeGen/AMDGPU/fneg-fabs.f16.ll index 555764c15519..506b2a02f828 100644 --- a/test/CodeGen/AMDGPU/fneg-fabs.f16.ll +++ b/test/CodeGen/AMDGPU/fneg-fabs.f16.ll @@ -1,6 +1,6 @@ ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=CIVI %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GFX89 -check-prefix=GCN -check-prefix=CIVI %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX89 -check-prefix=GFX9 -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GFX89 -check-prefix=GFX9 -check-prefix=GCN %s ; GCN-LABEL: {{^}}fneg_fabs_fadd_f16: ; CI: v_cvt_f32_f16_e32 diff --git a/test/CodeGen/AMDGPU/immv216.ll b/test/CodeGen/AMDGPU/immv216.ll index 85ad365d02a8..c15a30e3c540 100644 --- a/test/CodeGen/AMDGPU/immv216.ll +++ b/test/CodeGen/AMDGPU/immv216.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s ; FIXME: Merge into imm.ll diff --git a/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll b/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll index a3f82b8a0117..89adcff1a278 100644 --- a/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll +++ b/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll @@ -216,7 +216,7 @@ define amdgpu_kernel void @v_insertelement_v2i16_0(<2 x i16> addrspace(1)* %out, ; CIVI-DAG: v_and_b32_e32 [[ELT1:v[0-9]+]], 0xffff0000, [[VEC]] ; CIVI: v_or_b32_e32 [[RES:v[0-9]+]], [[ELT0_SHIFT]], [[ELT1]] -; GFX9-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff{{$}} +; GFX9-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0000{{$}} ; GFX9-DAG: v_lshrrev_b32_e64 [[ELT0_SHIFT:v[0-9]+]], 16, [[ELT0]] ; GFX9: v_and_or_b32 [[RES:v[0-9]+]], [[VEC]], [[MASK]], [[ELT0_SHIFT]] diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll index 5e892fad3741..cbd8f0a9c23a 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll @@ -19,6 +19,20 @@ define amdgpu_kernel void @test_readlane_imm_sreg(i32 addrspace(1)* %out, i32 %s ret void } +; CHECK-LABEL: {{^}}test_readlane_vregs: +; CHECK: v_readfirstlane_b32 [[LANE:s[0-9]+]], v{{[0-9]+}} +; CHECK: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, [[LANE]] +define amdgpu_kernel void @test_readlane_vregs(i32 addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #1 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep.in = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 %tid + %args = load <2 x i32>, <2 x i32> addrspace(1)* %gep.in + %value = extractelement <2 x i32> %args, i32 0 + %lane = extractelement <2 x i32> %args, i32 1 + %readlane = call i32 @llvm.amdgcn.readlane(i32 %value, i32 %lane) + store i32 %readlane, i32 addrspace(1)* %out, align 4 + ret void +} + ; TODO: m0 should be folded. ; CHECK-LABEL: {{^}}test_readlane_m0_sreg: ; CHECK: s_mov_b32 m0, -1 @@ -40,5 +54,8 @@ define amdgpu_kernel void @test_readlane_imm(i32 addrspace(1)* %out, i32 %src0) ret void } +declare i32 @llvm.amdgcn.workitem.id.x() #2 + attributes #0 = { nounwind readnone convergent } attributes #1 = { nounwind } +attributes #2 = { nounwind readnone } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.unreachable.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.unreachable.ll new file mode 100644 index 000000000000..bafafa33016f --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.unreachable.ll @@ -0,0 +1,9 @@ +; RUN: llc -march amdgcn %s -filetype=obj -o /dev/null +; RUN: llc -march amdgcn <%s | FileCheck %s +define amdgpu_kernel void @f() { + ; CHECK: ; divergent unreachable + call void @llvm.amdgcn.unreachable() + ret void +} + +declare void @llvm.amdgcn.unreachable() diff --git a/test/CodeGen/AMDGPU/loop_break.ll b/test/CodeGen/AMDGPU/loop_break.ll index 84c42e8bd1e0..b9df2cb779ad 100644 --- a/test/CodeGen/AMDGPU/loop_break.ll +++ b/test/CodeGen/AMDGPU/loop_break.ll @@ -10,7 +10,7 @@ ; OPT: bb4: ; OPT: load volatile -; OPT: %cmp1 = icmp sge i32 %tmp, %load +; OPT: xor i1 %cmp1 ; OPT: call i64 @llvm.amdgcn.if.break( ; OPT: br label %Flow diff --git a/test/CodeGen/AMDGPU/lshr.v2i16.ll b/test/CodeGen/AMDGPU/lshr.v2i16.ll index e21d0d09bb41..6a90a7a9f2eb 100644 --- a/test/CodeGen/AMDGPU/lshr.v2i16.ll +++ b/test/CodeGen/AMDGPU/lshr.v2i16.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=CIVI %s ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=CIVI %s diff --git a/test/CodeGen/AMDGPU/merge-m0.mir b/test/CodeGen/AMDGPU/merge-m0.mir new file mode 100644 index 000000000000..064db49924e1 --- /dev/null +++ b/test/CodeGen/AMDGPU/merge-m0.mir @@ -0,0 +1,132 @@ +# RUN: llc -march=amdgcn -amdgpu-enable-merge-m0 -verify-machineinstrs -run-pass si-fix-sgpr-copies %s -o - | FileCheck -check-prefix=GCN %s + +# GCN: bb.0.entry: +# GCN: SI_INIT_M0 -1 +# GCN-NEXT: DS_WRITE_B32 +# GCN-NEXT: DS_WRITE_B32 +# GCN-NEXT: SI_INIT_M0 65536 +# GCN-NEXT: DS_WRITE_B32 +# GCN-NEXT: DS_WRITE_B32 +# GCN-NEXT: SI_INIT_M0 -1 +# GCN-NEXT: DS_WRITE_B32 +# GCN-NEXT: SI_INIT_M0 65536 +# GCN-NEXT: DS_WRITE_B32 + +# GCN: bb.1: +# GCN: SI_INIT_M0 -1 +# GCN-NEXT: DS_WRITE_B32 +# GCN-NEXT: DS_WRITE_B32 + +# GCN: bb.2: +# GCN: SI_INIT_M0 65536 +# GCN-NEXT: DS_WRITE_B32 + +# GCN: bb.3: +# GCN: SI_INIT_M0 3 + +# GCN: bb.4: +# GCN-NOT: SI_INIT_M0 +# GCN: DS_WRITE_B32 +# GCN-NEXT: SI_INIT_M0 4 +# GCN-NEXT: DS_WRITE_B32 + +# GCN: bb.5: +# GCN-NOT: SI_INIT_M0 +# GCN: DS_WRITE_B32 +# GCN-NEXT: SI_INIT_M0 4 +# GCN-NEXT: DS_WRITE_B32 + +# GCN: bb.6: +# GCN: SI_INIT_M0 -1, +# GCN-NEXT: DS_WRITE_B32 +# GCN: SI_INIT_M0 %2 +# GCN-NEXT: DS_WRITE_B32 +# GCN-NEXT: SI_INIT_M0 %2 +# GCN-NEXT: DS_WRITE_B32 +# GCN-NEXT: SI_INIT_M0 -1 +# GCN-NEXT: DS_WRITE_B32 + +--- +name: test +alignment: 0 +exposesReturnsTwice: false +noVRegs: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: vgpr_32 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: sreg_32_xm0 } +body: | + bb.0.entry: + successors: %bb.1, %bb.2 + + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + SI_INIT_M0 -1, implicit-def %m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec + SI_INIT_M0 -1, implicit-def %m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec + SI_INIT_M0 65536, implicit-def %m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec + SI_INIT_M0 65536, implicit-def %m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec + SI_INIT_M0 -1, implicit-def %m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec + SI_INIT_M0 65536, implicit-def %m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec + S_CBRANCH_VCCZ %bb.1, implicit undef %vcc + S_BRANCH %bb.2 + + bb.1: + successors: %bb.2 + SI_INIT_M0 -1, implicit-def %m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec + SI_INIT_M0 -1, implicit-def %m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec + S_BRANCH %bb.2 + + bb.2: + successors: %bb.3 + SI_INIT_M0 65536, implicit-def %m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec + S_BRANCH %bb.3 + + bb.3: + successors: %bb.4, %bb.5 + S_CBRANCH_VCCZ %bb.4, implicit undef %vcc + S_BRANCH %bb.5 + + bb.4: + successors: %bb.6 + SI_INIT_M0 3, implicit-def %m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec + SI_INIT_M0 4, implicit-def %m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec + S_BRANCH %bb.6 + + bb.5: + successors: %bb.6 + SI_INIT_M0 3, implicit-def %m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec + SI_INIT_M0 4, implicit-def %m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec + S_BRANCH %bb.6 + + bb.6: + successors: %bb.0.entry, %bb.6 + SI_INIT_M0 -1, implicit-def %m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec + %2 = IMPLICIT_DEF + SI_INIT_M0 %2, implicit-def %m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec + SI_INIT_M0 %2, implicit-def %m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec + SI_INIT_M0 -1, implicit-def %m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec + S_CBRANCH_VCCZ %bb.6, implicit undef %vcc + S_BRANCH %bb.0.entry + +... diff --git a/test/CodeGen/AMDGPU/mubuf-offset-private.ll b/test/CodeGen/AMDGPU/mubuf-offset-private.ll new file mode 100644 index 000000000000..3a0605fa182a --- /dev/null +++ b/test/CodeGen/AMDGPU/mubuf-offset-private.ll @@ -0,0 +1,136 @@ +; RUN: llc -march=amdgcn -mattr=+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s + +; Test addressing modes when the scratch base is not a frame index. + +; GCN-LABEL: {{^}}store_private_offset_i8: +; GCN: buffer_store_byte v{{[0-9]+}}, off, s[4:7], s8 offset:8 +define amdgpu_kernel void @store_private_offset_i8() #0 { + store volatile i8 5, i8* inttoptr (i32 8 to i8*) + ret void +} + +; GCN-LABEL: {{^}}store_private_offset_i16: +; GCN: buffer_store_short v{{[0-9]+}}, off, s[4:7], s8 offset:8 +define amdgpu_kernel void @store_private_offset_i16() #0 { + store volatile i16 5, i16* inttoptr (i32 8 to i16*) + ret void +} + +; GCN-LABEL: {{^}}store_private_offset_i32: +; GCN: buffer_store_dword v{{[0-9]+}}, off, s[4:7], s8 offset:8 +define amdgpu_kernel void @store_private_offset_i32() #0 { + store volatile i32 5, i32* inttoptr (i32 8 to i32*) + ret void +} + +; GCN-LABEL: {{^}}store_private_offset_v2i32: +; GCN: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s8 offset:8 +define amdgpu_kernel void @store_private_offset_v2i32() #0 { + store volatile <2 x i32> <i32 5, i32 10>, <2 x i32>* inttoptr (i32 8 to <2 x i32>*) + ret void +} + +; GCN-LABEL: {{^}}store_private_offset_v4i32: +; GCN: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s8 offset:8 +define amdgpu_kernel void @store_private_offset_v4i32() #0 { + store volatile <4 x i32> <i32 5, i32 10, i32 15, i32 0>, <4 x i32>* inttoptr (i32 8 to <4 x i32>*) + ret void +} + +; GCN-LABEL: {{^}}load_private_offset_i8: +; GCN: buffer_load_ubyte v{{[0-9]+}}, off, s[4:7], s8 offset:8 +define amdgpu_kernel void @load_private_offset_i8() #0 { + %load = load volatile i8, i8* inttoptr (i32 8 to i8*) + ret void +} + +; GCN-LABEL: {{^}}sextload_private_offset_i8: +; GCN: buffer_load_sbyte v{{[0-9]+}}, off, s[4:7], s8 offset:8 +define amdgpu_kernel void @sextload_private_offset_i8(i32 addrspace(1)* %out) #0 { + %load = load volatile i8, i8* inttoptr (i32 8 to i8*) + %sextload = sext i8 %load to i32 + store i32 %sextload, i32 addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}zextload_private_offset_i8: +; GCN: buffer_load_ubyte v{{[0-9]+}}, off, s[4:7], s8 offset:8 +define amdgpu_kernel void @zextload_private_offset_i8(i32 addrspace(1)* %out) #0 { + %load = load volatile i8, i8* inttoptr (i32 8 to i8*) + %zextload = zext i8 %load to i32 + store i32 %zextload, i32 addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}load_private_offset_i16: +; GCN: buffer_load_ushort v{{[0-9]+}}, off, s[4:7], s8 offset:8 +define amdgpu_kernel void @load_private_offset_i16() #0 { + %load = load volatile i16, i16* inttoptr (i32 8 to i16*) + ret void +} + +; GCN-LABEL: {{^}}sextload_private_offset_i16: +; GCN: buffer_load_sshort v{{[0-9]+}}, off, s[4:7], s8 offset:8 +define amdgpu_kernel void @sextload_private_offset_i16(i32 addrspace(1)* %out) #0 { + %load = load volatile i16, i16* inttoptr (i32 8 to i16*) + %sextload = sext i16 %load to i32 + store i32 %sextload, i32 addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}zextload_private_offset_i16: +; GCN: buffer_load_ushort v{{[0-9]+}}, off, s[4:7], s8 offset:8 +define amdgpu_kernel void @zextload_private_offset_i16(i32 addrspace(1)* %out) #0 { + %load = load volatile i16, i16* inttoptr (i32 8 to i16*) + %zextload = zext i16 %load to i32 + store i32 %zextload, i32 addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}load_private_offset_i32: +; GCN: buffer_load_dword v{{[0-9]+}}, off, s[4:7], s8 offset:8 +define amdgpu_kernel void @load_private_offset_i32() #0 { + %load = load volatile i32, i32* inttoptr (i32 8 to i32*) + ret void +} + +; GCN-LABEL: {{^}}load_private_offset_v2i32: +; GCN: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s8 offset:8 +define amdgpu_kernel void @load_private_offset_v2i32() #0 { + %load = load volatile <2 x i32>, <2 x i32>* inttoptr (i32 8 to <2 x i32>*) + ret void +} + +; GCN-LABEL: {{^}}load_private_offset_v4i32: +; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s8 offset:8 +define amdgpu_kernel void @load_private_offset_v4i32() #0 { + %load = load volatile <4 x i32>, <4 x i32>* inttoptr (i32 8 to <4 x i32>*) + ret void +} + +; GCN-LABEL: {{^}}store_private_offset_i8_max_offset: +; GCN: buffer_store_byte v{{[0-9]+}}, off, s[4:7], s8 offset:4095 +define amdgpu_kernel void @store_private_offset_i8_max_offset() #0 { + store volatile i8 5, i8* inttoptr (i32 4095 to i8*) + ret void +} + +; GCN-LABEL: {{^}}store_private_offset_i8_max_offset_plus1: +; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1000 +; GCN: buffer_store_byte v{{[0-9]+}}, [[OFFSET]], s[4:7], s8 offen{{$}} +define amdgpu_kernel void @store_private_offset_i8_max_offset_plus1() #0 { + store volatile i8 5, i8* inttoptr (i32 4096 to i8*) + ret void +} + +; GCN-LABEL: {{^}}store_private_offset_i8_max_offset_plus2: +; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1000 +; GCN: buffer_store_byte v{{[0-9]+}}, [[OFFSET]], s[4:7], s8 offen offset:1{{$}} +define amdgpu_kernel void @store_private_offset_i8_max_offset_plus2() #0 { + store volatile i8 5, i8* inttoptr (i32 4097 to i8*) + ret void +} + +attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll b/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll index 4bd8bff4809a..9d0b6b395996 100644 --- a/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll +++ b/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll @@ -9,19 +9,18 @@ ; StructurizeCFG. ; IR-LABEL: @multi_divergent_region_exit_ret_ret( -; IR: %Pivot = icmp sge i32 %tmp16, 2 -; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %Pivot) -; IR: %1 = extractvalue { i1, i64 } %0, 0 -; IR: %2 = extractvalue { i1, i64 } %0, 1 -; IR: br i1 %1, label %LeafBlock1, label %Flow +; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0) +; IR: %2 = extractvalue { i1, i64 } %1, 0 +; IR: %3 = extractvalue { i1, i64 } %1, 1 +; IR: br i1 %2, label %LeafBlock1, label %Flow ; IR: Flow: -; IR: %3 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ] -; IR: %4 = phi i1 [ %SwitchLeaf2, %LeafBlock1 ], [ false, %entry ] -; IR: %5 = call { i1, i64 } @llvm.amdgcn.else(i64 %2) -; IR: %6 = extractvalue { i1, i64 } %5, 0 -; IR: %7 = extractvalue { i1, i64 } %5, 1 -; IR: br i1 %6, label %LeafBlock, label %Flow1 +; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ] +; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ] +; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3) +; IR: %7 = extractvalue { i1, i64 } %6, 0 +; IR: %8 = extractvalue { i1, i64 } %6, 1 +; IR: br i1 %7, label %LeafBlock, label %Flow1 ; IR: LeafBlock: ; IR: br label %Flow1 @@ -30,32 +29,32 @@ ; IR: br label %Flow{{$}} ; IR: Flow2: -; IR: %8 = phi i1 [ false, %exit1 ], [ %12, %Flow1 ] -; IR: call void @llvm.amdgcn.end.cf(i64 %16) -; IR: [[IF:%[0-9]+]] = call { i1, i64 } @llvm.amdgcn.if(i1 %8) -; IR: %10 = extractvalue { i1, i64 } [[IF]], 0 -; IR: %11 = extractvalue { i1, i64 } [[IF]], 1 -; IR: br i1 %10, label %exit0, label %UnifiedReturnBlock +; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ] +; IR: call void @llvm.amdgcn.end.cf(i64 %19) +; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11) +; IR: %13 = extractvalue { i1, i64 } %12, 0 +; IR: %14 = extractvalue { i1, i64 } %12, 1 +; IR: br i1 %13, label %exit0, label %UnifiedReturnBlock ; IR: exit0: ; IR: store volatile i32 9, i32 addrspace(1)* undef ; IR: br label %UnifiedReturnBlock ; IR: Flow1: -; IR: %12 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %3, %Flow ] -; IR: %13 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %4, %Flow ] -; IR: call void @llvm.amdgcn.end.cf(i64 %7) -; IR: %14 = call { i1, i64 } @llvm.amdgcn.if(i1 %13) -; IR: %15 = extractvalue { i1, i64 } %14, 0 -; IR: %16 = extractvalue { i1, i64 } %14, 1 -; IR: br i1 %15, label %exit1, label %Flow2 +; IR: %15 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %4, %Flow ] +; IR: %16 = phi i1 [ %9, %LeafBlock ], [ %5, %Flow ] +; IR: call void @llvm.amdgcn.end.cf(i64 %8) +; IR: %17 = call { i1, i64 } @llvm.amdgcn.if(i1 %16) +; IR: %18 = extractvalue { i1, i64 } %17, 0 +; IR: %19 = extractvalue { i1, i64 } %17, 1 +; IR: br i1 %18, label %exit1, label %Flow2 ; IR: exit1: ; IR: store volatile i32 17, i32 addrspace(3)* undef ; IR: br label %Flow2 ; IR: UnifiedReturnBlock: -; IR: call void @llvm.amdgcn.end.cf(i64 %11) +; IR: call void @llvm.amdgcn.end.cf(i64 %14) ; IR: ret void @@ -65,9 +64,11 @@ ; GCN: s_xor_b64 -; GCN: ; %LeafBlock -; GCN: v_cmp_ne_u32_e32 vcc, 1, [[REG:v[0-9]+]] +; FIXME: Why is this compare essentially repeated? +; GCN: v_cmp_eq_u32_e32 vcc, 1, [[REG:v[0-9]+]] +; GCN-NEXT: v_cmp_ne_u32_e64 s{{\[[0-9]+:[0-9]+\]}}, 1, [[REG]] ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, -1, vcc +; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, -1 ; GCN: ; %Flow1 ; GCN-NEXT: s_or_b64 exec, exec @@ -125,15 +126,14 @@ exit1: ; preds = %LeafBlock, %LeafBlock1 } ; IR-LABEL: @multi_divergent_region_exit_unreachable_unreachable( -; IR: %Pivot = icmp sge i32 %tmp16, 2 -; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %Pivot) +; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0) -; IR: %5 = call { i1, i64 } @llvm.amdgcn.else(i64 %2) +; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3) -; IR: %8 = phi i1 [ false, %exit1 ], [ %12, %Flow1 ] -; IR: call void @llvm.amdgcn.end.cf(i64 %16) -; IR: %9 = call { i1, i64 } @llvm.amdgcn.if(i1 %8) -; IR: br i1 %10, label %exit0, label %UnifiedUnreachableBlock +; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ] +; IR: call void @llvm.amdgcn.end.cf(i64 %19) +; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11) +; IR: br i1 %13, label %exit0, label %UnifiedUnreachableBlock ; IR: UnifiedUnreachableBlock: @@ -181,49 +181,51 @@ exit1: ; preds = %LeafBlock, %LeafBlock1 } ; IR-LABEL: @multi_exit_region_divergent_ret_uniform_ret( -; IR: %divergent.cond0 = icmp sge i32 %tmp16, 2 +; IR: %divergent.cond0 = icmp slt i32 %tmp16, 2 ; IR: llvm.amdgcn.if ; IR: br i1 ; IR: {{^}}Flow: -; IR: %3 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ] -; IR: %4 = phi i1 [ %uniform.cond0, %LeafBlock1 ], [ false, %entry ] -; IR: %5 = call { i1, i64 } @llvm.amdgcn.else(i64 %2) -; IR: br i1 %6, label %LeafBlock, label %Flow1 +; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ] +; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ] +; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3) +; IR: br i1 %7, label %LeafBlock, label %Flow1 ; IR: {{^}}LeafBlock: -; IR: %divergent.cond1 = icmp ne i32 %tmp16, 1 +; IR: %divergent.cond1 = icmp eq i32 %tmp16, 1 +; IR: %9 = xor i1 %divergent.cond1, true ; IR: br label %Flow1 ; IR: LeafBlock1: -; IR: %uniform.cond0 = icmp ne i32 %arg3, 2 +; IR: %uniform.cond0 = icmp eq i32 %arg3, 2 +; IR: %10 = xor i1 %uniform.cond0, true ; IR: br label %Flow ; IR: Flow2: -; IR: %8 = phi i1 [ false, %exit1 ], [ %12, %Flow1 ] -; IR: call void @llvm.amdgcn.end.cf(i64 %16) -; IR: %9 = call { i1, i64 } @llvm.amdgcn.if(i1 %8) -; IR: br i1 %10, label %exit0, label %UnifiedReturnBlock +; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ] +; IR: call void @llvm.amdgcn.end.cf(i64 %19) +; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11) +; IR: br i1 %13, label %exit0, label %UnifiedReturnBlock ; IR: exit0: ; IR: store volatile i32 9, i32 addrspace(1)* undef ; IR: br label %UnifiedReturnBlock ; IR: {{^}}Flow1: -; IR: %12 = phi i1 [ %divergent.cond1, %LeafBlock ], [ %3, %Flow ] -; IR: %13 = phi i1 [ %divergent.cond1, %LeafBlock ], [ %4, %Flow ] -; IR: call void @llvm.amdgcn.end.cf(i64 %7) -; IR: %14 = call { i1, i64 } @llvm.amdgcn.if(i1 %13) -; IR: %15 = extractvalue { i1, i64 } %14, 0 -; IR: %16 = extractvalue { i1, i64 } %14, 1 -; IR: br i1 %15, label %exit1, label %Flow2 +; IR: %15 = phi i1 [ %divergent.cond1, %LeafBlock ], [ %4, %Flow ] +; IR: %16 = phi i1 [ %9, %LeafBlock ], [ %5, %Flow ] +; IR: call void @llvm.amdgcn.end.cf(i64 %8) +; IR: %17 = call { i1, i64 } @llvm.amdgcn.if(i1 %16) +; IR: %18 = extractvalue { i1, i64 } %17, 0 +; IR: %19 = extractvalue { i1, i64 } %17, 1 +; IR: br i1 %18, label %exit1, label %Flow2 ; IR: exit1: ; IR: store volatile i32 17, i32 addrspace(3)* undef ; IR: br label %Flow2 ; IR: UnifiedReturnBlock: -; IR: call void @llvm.amdgcn.end.cf(i64 %11) +; IR: call void @llvm.amdgcn.end.cf(i64 %14) ; IR: ret void define amdgpu_kernel void @multi_exit_region_divergent_ret_uniform_ret(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2, i32 %arg3) #0 { entry: @@ -262,18 +264,17 @@ exit1: ; preds = %LeafBlock, %LeafBlock1 } ; IR-LABEL: @multi_exit_region_uniform_ret_divergent_ret( -; IR: %Pivot = icmp sge i32 %tmp16, 2 -; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %Pivot) -; IR: br i1 %1, label %LeafBlock1, label %Flow +; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0) +; IR: br i1 %2, label %LeafBlock1, label %Flow ; IR: Flow: -; IR: %3 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ] -; IR: %4 = phi i1 [ %SwitchLeaf2, %LeafBlock1 ], [ false, %entry ] -; IR: %5 = call { i1, i64 } @llvm.amdgcn.else(i64 %2) +; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ] +; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ] +; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3) -; IR: %8 = phi i1 [ false, %exit1 ], [ %12, %Flow1 ] -; IR: call void @llvm.amdgcn.end.cf(i64 %16) -; IR: %9 = call { i1, i64 } @llvm.amdgcn.if(i1 %8) +; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ] +; IR: call void @llvm.amdgcn.end.cf(i64 %19) +; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11) define amdgpu_kernel void @multi_exit_region_uniform_ret_divergent_ret(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2, i32 %arg3) #0 { entry: @@ -313,13 +314,13 @@ exit1: ; preds = %LeafBlock, %LeafBlock1 ; IR-LABEL: @multi_divergent_region_exit_ret_ret_return_value( ; IR: Flow2: -; IR: %8 = phi float [ 2.000000e+00, %exit1 ], [ undef, %Flow1 ] -; IR: %9 = phi i1 [ false, %exit1 ], [ %13, %Flow1 ] -; IR: call void @llvm.amdgcn.end.cf(i64 %17) +; IR: %11 = phi float [ 2.000000e+00, %exit1 ], [ undef, %Flow1 ] +; IR: %12 = phi i1 [ false, %exit1 ], [ %16, %Flow1 ] +; IR: call void @llvm.amdgcn.end.cf(i64 %20) ; IR: UnifiedReturnBlock: -; IR: %UnifiedRetVal = phi float [ %8, %Flow2 ], [ 1.000000e+00, %exit0 ] -; IR: call void @llvm.amdgcn.end.cf(i64 %12) +; IR: %UnifiedRetVal = phi float [ %11, %Flow2 ], [ 1.000000e+00, %exit0 ] +; IR: call void @llvm.amdgcn.end.cf(i64 %15) ; IR: ret float %UnifiedRetVal define amdgpu_ps float @multi_divergent_region_exit_ret_ret_return_value(i32 %vgpr) #0 { entry: @@ -386,32 +387,31 @@ exit1: ; preds = %LeafBlock, %LeafBlock1 } ; IR-LABEL: @multi_divergent_region_exit_ret_unreachable( -; IR: %Pivot = icmp sge i32 %tmp16, 2 -; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %Pivot) +; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0) ; IR: Flow: -; IR: %3 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ] -; IR: %4 = phi i1 [ %SwitchLeaf2, %LeafBlock1 ], [ false, %entry ] -; IR: %5 = call { i1, i64 } @llvm.amdgcn.else(i64 %2) +; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ] +; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ] +; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3) ; IR: Flow2: -; IR: %8 = phi i1 [ false, %exit1 ], [ %12, %Flow1 ] -; IR: call void @llvm.amdgcn.end.cf(i64 %16) -; IR: %9 = call { i1, i64 } @llvm.amdgcn.if(i1 %8) -; IR: br i1 %10, label %exit0, label %UnifiedReturnBlock +; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ] +; IR: call void @llvm.amdgcn.end.cf(i64 %19) +; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11) +; IR: br i1 %13, label %exit0, label %UnifiedReturnBlock ; IR: exit0: ; IR-NEXT: store volatile i32 17, i32 addrspace(3)* undef ; IR-NEXT: br label %UnifiedReturnBlock ; IR: Flow1: -; IR: %12 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %3, %Flow ] -; IR: %13 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %4, %Flow ] -; IR: call void @llvm.amdgcn.end.cf(i64 %7) -; IR: %14 = call { i1, i64 } @llvm.amdgcn.if(i1 %13) -; IR: %15 = extractvalue { i1, i64 } %14, 0 -; IR: %16 = extractvalue { i1, i64 } %14, 1 -; IR: br i1 %15, label %exit1, label %Flow2 +; IR: %15 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %4, %Flow ] +; IR: %16 = phi i1 [ %9, %LeafBlock ], [ %5, %Flow ] +; IR: call void @llvm.amdgcn.end.cf(i64 %8) +; IR: %17 = call { i1, i64 } @llvm.amdgcn.if(i1 %16) +; IR: %18 = extractvalue { i1, i64 } %17, 0 +; IR: %19 = extractvalue { i1, i64 } %17, 1 +; IR: br i1 %18, label %exit1, label %Flow2 ; IR: exit1: ; IR-NEXT: store volatile i32 9, i32 addrspace(1)* undef @@ -419,7 +419,7 @@ exit1: ; preds = %LeafBlock, %LeafBlock1 ; IR-NEXT: br label %Flow2 ; IR: UnifiedReturnBlock: -; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %11) +; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %14) ; IR-NEXT: ret void define amdgpu_kernel void @multi_divergent_region_exit_ret_unreachable(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 { entry: @@ -475,7 +475,7 @@ exit1: ; preds = %LeafBlock, %LeafBlock1 ; IR-NEXT: br label %Flow2 ; IR: UnifiedReturnBlock: ; preds = %exit0, %Flow2 -; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %11) +; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %14) ; IR-NEXT: ret void define amdgpu_kernel void @indirect_multi_divergent_region_exit_ret_unreachable(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 { entry: @@ -622,15 +622,15 @@ uniform.ret: ; IR-LABEL: @uniform_complex_multi_ret_nest_in_divergent_triangle( ; IR: Flow1: ; preds = %uniform.ret1, %uniform.multi.exit.region -; IR: %6 = phi i1 [ false, %uniform.ret1 ], [ true, %uniform.multi.exit.region ] -; IR: br i1 %6, label %uniform.if, label %Flow2 +; IR: %8 = phi i1 [ false, %uniform.ret1 ], [ true, %uniform.multi.exit.region ] +; IR: br i1 %8, label %uniform.if, label %Flow2 ; IR: Flow: ; preds = %uniform.then, %uniform.if -; IR: %7 = phi i1 [ %uniform.cond2, %uniform.then ], [ %uniform.cond1, %uniform.if ] -; IR: br i1 %7, label %uniform.endif, label %uniform.ret0 +; IR: %11 = phi i1 [ %10, %uniform.then ], [ %9, %uniform.if ] +; IR: br i1 %11, label %uniform.endif, label %uniform.ret0 ; IR: UnifiedReturnBlock: ; preds = %Flow3, %Flow2 -; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %5) +; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %6) ; IR-NEXT: ret void define amdgpu_kernel void @uniform_complex_multi_ret_nest_in_divergent_triangle(i32 %arg0) #0 { entry: diff --git a/test/CodeGen/AMDGPU/nested-loop-conditions.ll b/test/CodeGen/AMDGPU/nested-loop-conditions.ll index c0b4eaff60aa..672549c8ea63 100644 --- a/test/CodeGen/AMDGPU/nested-loop-conditions.ll +++ b/test/CodeGen/AMDGPU/nested-loop-conditions.ll @@ -133,9 +133,9 @@ bb23: ; preds = %bb10 ; IR: Flow1: ; IR-NEXT: %loop.phi = phi i64 [ %loop.phi9, %Flow6 ], [ %phi.broken, %bb14 ] -; IR-NEXT: %13 = phi <4 x i32> [ %28, %Flow6 ], [ undef, %bb14 ] -; IR-NEXT: %14 = phi i32 [ %29, %Flow6 ], [ undef, %bb14 ] -; IR-NEXT: %15 = phi i1 [ %30, %Flow6 ], [ false, %bb14 ] +; IR-NEXT: %13 = phi <4 x i32> [ %29, %Flow6 ], [ undef, %bb14 ] +; IR-NEXT: %14 = phi i32 [ %30, %Flow6 ], [ undef, %bb14 ] +; IR-NEXT: %15 = phi i1 [ %31, %Flow6 ], [ false, %bb14 ] ; IR-NEXT: %16 = phi i1 [ false, %Flow6 ], [ %8, %bb14 ] ; IR-NEXT: %17 = call i64 @llvm.amdgcn.else.break(i64 %11, i64 %loop.phi) ; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %11) @@ -144,9 +144,9 @@ bb23: ; preds = %bb10 ; IR: Flow2: ; IR-NEXT: %loop.phi10 = phi i64 [ %loop.phi11, %Flow5 ], [ %12, %bb16 ] -; IR-NEXT: %19 = phi <4 x i32> [ %28, %Flow5 ], [ undef, %bb16 ] -; IR-NEXT: %20 = phi i32 [ %29, %Flow5 ], [ undef, %bb16 ] -; IR-NEXT: %21 = phi i1 [ %30, %Flow5 ], [ false, %bb16 ] +; IR-NEXT: %19 = phi <4 x i32> [ %29, %Flow5 ], [ undef, %bb16 ] +; IR-NEXT: %20 = phi i32 [ %30, %Flow5 ], [ undef, %bb16 ] +; IR-NEXT: %21 = phi i1 [ %31, %Flow5 ], [ false, %bb16 ] ; IR-NEXT: %22 = phi i1 [ false, %Flow5 ], [ false, %bb16 ] ; IR-NEXT: %23 = phi i1 [ false, %Flow5 ], [ %8, %bb16 ] ; IR-NEXT: %24 = call { i1, i64 } @llvm.amdgcn.if(i1 %23) @@ -156,15 +156,16 @@ bb23: ; preds = %bb10 ; IR: bb21: ; IR: %tmp12 = icmp slt i32 %tmp11, 9 -; IR-NEXT: %27 = call i64 @llvm.amdgcn.if.break(i1 %tmp12, i64 %phi.broken) +; IR-NEXT: %27 = xor i1 %tmp12, true +; IR-NEXT: %28 = call i64 @llvm.amdgcn.if.break(i1 %27, i64 %phi.broken) ; IR-NEXT: br label %Flow3 ; IR: Flow3: ; IR-NEXT: %loop.phi11 = phi i64 [ %phi.broken, %bb21 ], [ %phi.broken, %Flow2 ] -; IR-NEXT: %loop.phi9 = phi i64 [ %27, %bb21 ], [ %loop.phi10, %Flow2 ] -; IR-NEXT: %28 = phi <4 x i32> [ %tmp9, %bb21 ], [ %19, %Flow2 ] -; IR-NEXT: %29 = phi i32 [ %tmp10, %bb21 ], [ %20, %Flow2 ] -; IR-NEXT: %30 = phi i1 [ %tmp12, %bb21 ], [ %21, %Flow2 ] +; IR-NEXT: %loop.phi9 = phi i64 [ %28, %bb21 ], [ %loop.phi10, %Flow2 ] +; IR-NEXT: %29 = phi <4 x i32> [ %tmp9, %bb21 ], [ %19, %Flow2 ] +; IR-NEXT: %30 = phi i32 [ %tmp10, %bb21 ], [ %20, %Flow2 ] +; IR-NEXT: %31 = phi i1 [ %27, %bb21 ], [ %21, %Flow2 ] ; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %26) ; IR-NEXT: br i1 %22, label %bb31.loopexit, label %Flow4 diff --git a/test/CodeGen/AMDGPU/private-access-no-objects.ll b/test/CodeGen/AMDGPU/private-access-no-objects.ll index af2683510293..dcb089010e99 100644 --- a/test/CodeGen/AMDGPU/private-access-no-objects.ll +++ b/test/CodeGen/AMDGPU/private-access-no-objects.ll @@ -1,7 +1,7 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=OPT %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=OPT %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=iceland -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=OPT %s -; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=OPTNONE %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=OPT %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=CI -check-prefix=OPT %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=iceland -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=OPT %s +; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=OPTNONE %s ; There are no stack objects, but still a private memory access. The ; private access regiters need to be correctly initialized anyway, and @@ -27,9 +27,9 @@ define amdgpu_kernel void @store_to_undef() #0 { ; OPT-DAG: s_mov_b64 s{{\[}}[[RSRC_LO:[0-9]+]]:{{[0-9]+\]}}, s[0:1] ; OPT-DAG: s_mov_b64 s{{\[[0-9]+}}:[[RSRC_HI:[0-9]+]]{{\]}}, s[2:3] ; OPT-DAG: s_mov_b32 [[SOFFSET:s[0-9]+]], s7{{$}} -; OPT: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, [[SOFFSET]] offen{{$}} +; OPT: buffer_store_dword v{{[0-9]+}}, off, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, [[SOFFSET]] offset:124{{$}} define amdgpu_kernel void @store_to_inttoptr() #0 { - store volatile i32 0, i32* inttoptr (i32 123 to i32*) + store volatile i32 0, i32* inttoptr (i32 124 to i32*) ret void } @@ -47,9 +47,9 @@ define amdgpu_kernel void @load_from_undef() #0 { ; OPT-DAG: s_mov_b64 s{{\[}}[[RSRC_LO:[0-9]+]]:{{[0-9]+\]}}, s[0:1] ; OPT-DAG: s_mov_b64 s{{\[[0-9]+}}:[[RSRC_HI:[0-9]+]]{{\]}}, s[2:3] ; OPT-DAG: s_mov_b32 [[SOFFSET:s[0-9]+]], s7{{$}} -; OPT: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, [[SOFFSET]] offen{{$}} +; OPT: buffer_load_dword v{{[0-9]+}}, off, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, [[SOFFSET]] offset:124{{$}} define amdgpu_kernel void @load_from_inttoptr() #0 { - %ld = load volatile i32, i32* inttoptr (i32 123 to i32*) + %ld = load volatile i32, i32* inttoptr (i32 124 to i32*) ret void } diff --git a/test/CodeGen/AMDGPU/readcyclecounter.ll b/test/CodeGen/AMDGPU/readcyclecounter.ll index 5c698c839fa6..d7b353cd25d3 100644 --- a/test/CodeGen/AMDGPU/readcyclecounter.ll +++ b/test/CodeGen/AMDGPU/readcyclecounter.ll @@ -22,4 +22,18 @@ define amdgpu_kernel void @test_readcyclecounter(i64 addrspace(1)* %out) #0 { ret void } +; This test used to crash in ScheduleDAG. +; +; GCN-LABEL: {{^}}test_readcyclecounter_smem: +; SI-DAG: s_memtime +; VI-DAG: s_memrealtime +; GCN-DAG: s_load_dword +define amdgpu_cs i32 @test_readcyclecounter_smem(i64 addrspace(2)* inreg %in) #0 { + %cycle0 = call i64 @llvm.readcyclecounter() + %in.v = load i64, i64 addrspace(2)* %in + %r.64 = add i64 %cycle0, %in.v + %r.32 = trunc i64 %r.64 to i32 + ret i32 %r.32 +} + attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/ret_jump.ll b/test/CodeGen/AMDGPU/ret_jump.ll index 748f98a12c59..f2fbacbab82e 100644 --- a/test/CodeGen/AMDGPU/ret_jump.ll +++ b/test/CodeGen/AMDGPU/ret_jump.ll @@ -56,7 +56,7 @@ ret.bb: ; preds = %else, %main_body } ; GCN-LABEL: {{^}}uniform_br_nontrivial_ret_divergent_br_nontrivial_unreachable: -; GCN: s_cbranch_scc1 [[RET_BB:BB[0-9]+_[0-9]+]] +; GCN: s_cbranch_vccnz [[RET_BB:BB[0-9]+_[0-9]+]] ; GCN: ; BB#{{[0-9]+}}: ; %else ; GCN: s_and_saveexec_b64 [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]], vcc diff --git a/test/CodeGen/AMDGPU/sext-in-reg.ll b/test/CodeGen/AMDGPU/sext-in-reg.ll index b702e1c07200..160fb6a038fe 100644 --- a/test/CodeGen/AMDGPU/sext-in-reg.ll +++ b/test/CodeGen/AMDGPU/sext-in-reg.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FIXME: i16 promotion pass ruins the scalar cases when legal. diff --git a/test/CodeGen/AMDGPU/shl.v2i16.ll b/test/CodeGen/AMDGPU/shl.v2i16.ll index eac29bad7cf2..115221c5316d 100644 --- a/test/CodeGen/AMDGPU/shl.v2i16.ll +++ b/test/CodeGen/AMDGPU/shl.v2i16.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=CIVI %s ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=CIVI %s diff --git a/test/CodeGen/AMDGPU/sminmax.v2i16.ll b/test/CodeGen/AMDGPU/sminmax.v2i16.ll index 4e093cdece21..16ce86bf8b11 100644 --- a/test/CodeGen/AMDGPU/sminmax.v2i16.ll +++ b/test/CodeGen/AMDGPU/sminmax.v2i16.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=CIVI -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=CIVI -check-prefix=GCN %s diff --git a/test/CodeGen/AMDGPU/spill-m0.ll b/test/CodeGen/AMDGPU/spill-m0.ll index 0e715c453209..8f1aebfe9ceb 100644 --- a/test/CodeGen/AMDGPU/spill-m0.ll +++ b/test/CodeGen/AMDGPU/spill-m0.ll @@ -69,19 +69,20 @@ endif: ; TOSMEM-NOT: s_m0 ; TOSMEM: s_add_u32 m0, s7, 0x100 ; TOSMEM-NEXT: s_buffer_store_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 4-byte Folded Spill -; TOSMEM-NOT: m0 +; FIXME: RegScavenger::isRegUsed() always returns true if m0 is reserved, so we have to save and restore it +; FIXME-TOSMEM-NOT: m0 -; TOSMEM-NOT: m0 +; FIXME-TOSMEM-NOT: m0 ; TOSMEM: s_add_u32 m0, s7, 0x200 ; TOSMEM: s_buffer_store_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Spill -; TOSMEM-NOT: m0 +; FIXME-TOSMEM-NOT: m0 ; TOSMEM: s_mov_b64 exec, ; TOSMEM: s_cbranch_execz ; TOSMEM: s_branch ; TOSMEM: BB{{[0-9]+_[0-9]+}}: -; TOSMEM-NEXT: s_add_u32 m0, s7, 0x200 +; TOSMEM: s_add_u32 m0, s7, 0x200 ; TOSMEM-NEXT: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Reload @@ -130,7 +131,7 @@ endif: ; preds = %else, %if ; TOSMEM: s_branch ; TOSMEM: BB{{[0-9]+_[0-9]+}}: -; TOSMEM-NEXT: s_add_u32 m0, s3, 0x100 +; TOSMEM: s_add_u32 m0, s3, 0x100 ; TOSMEM-NEXT: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Reload ; GCN-NOT: v_readlane_b32 m0 @@ -159,13 +160,14 @@ endif: ; GCN-LABEL: {{^}}restore_m0_lds: ; TOSMEM: s_load_dwordx2 [[REG:s\[[0-9]+:[0-9]+\]]] ; TOSMEM: s_cmp_eq_u32 -; TOSMEM-NOT: m0 +; FIXME: RegScavenger::isRegUsed() always returns true if m0 is reserved, so we have to save and restore it +; FIXME-TOSMEM-NOT: m0 ; TOSMEM: s_add_u32 m0, s3, 0x100 ; TOSMEM: s_buffer_store_dwordx2 [[REG]], s[88:91], m0 ; 8-byte Folded Spill -; TOSMEM-NOT: m0 +; FIXME-TOSMEM-NOT: m0 ; TOSMEM: s_add_u32 m0, s3, 0x300 ; TOSMEM: s_buffer_store_dword s{{[0-9]+}}, s[88:91], m0 ; 4-byte Folded Spill -; TOSMEM-NOT: m0 +; FIXME-TOSMEM-NOT: m0 ; TOSMEM: s_cbranch_scc1 ; TOSMEM: s_mov_b32 m0, -1 @@ -178,10 +180,10 @@ endif: ; TOSMEM: ds_write_b64 -; TOSMEM-NOT: m0 +; FIXME-TOSMEM-NOT: m0 ; TOSMEM: s_add_u32 m0, s3, 0x300 ; TOSMEM: s_buffer_load_dword s0, s[88:91], m0 ; 4-byte Folded Reload -; TOSMEM-NOT: m0 +; FIXME-TOSMEM-NOT: m0 ; TOSMEM: s_waitcnt lgkmcnt(0) ; TOSMEM-NOT: m0 ; TOSMEM: s_mov_b32 m0, s0 diff --git a/test/CodeGen/AMDGPU/sub.v2i16.ll b/test/CodeGen/AMDGPU/sub.v2i16.ll index 69f0accef628..431344670ffb 100644 --- a/test/CodeGen/AMDGPU/sub.v2i16.ll +++ b/test/CodeGen/AMDGPU/sub.v2i16.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s ; FIXME: Need to handle non-uniform case for function below (load without gep). diff --git a/test/CodeGen/AMDGPU/trap.ll b/test/CodeGen/AMDGPU/trap.ll index 77ad895d0e86..51771c9723e0 100644 --- a/test/CodeGen/AMDGPU/trap.ll +++ b/test/CodeGen/AMDGPU/trap.ll @@ -80,4 +80,25 @@ define amdgpu_kernel void @trap() { ret void } +; GCN-LABEL: {{^}}non_entry_trap: +; TRAP-BIT: enable_trap_handler = 1 +; NO-TRAP-BIT: enable_trap_handler = 0 + +; HSA: BB{{[0-9]_[0-9]+]]: ; %trap +; HSA-TRAP: s_mov_b64 s[0:1], s[4:5] +; HSA-TRAP-NEXT: s_trap 2 +define amdgpu_kernel void @non_entry_trap(i32 addrspace(1)* nocapture readonly %arg0) local_unnamed_addr #1 { +entry: + %tmp29 = load volatile i32, i32 addrspace(1)* %arg0 + %cmp = icmp eq i32 %tmp29, -1 + br i1 %cmp, label %ret, label %trap + +trap: + call void @llvm.trap() + unreachable + +ret: + ret void +} + attributes #0 = { nounwind noreturn } |