diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-05-08 17:12:57 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-05-08 17:12:57 +0000 |
commit | c46e6a5940c50058e00c0c5f9123fd82e338d29a (patch) | |
tree | 89a719d723035c54a190b1f81d329834f1f93336 /test/CodeGen/AMDGPU | |
parent | 148779df305667b6942fee7e758fdf81a6498f38 (diff) | |
download | src-c46e6a5940c50058e00c0c5f9123fd82e338d29a.tar.gz src-c46e6a5940c50058e00c0c5f9123fd82e338d29a.zip |
Vendor import of llvm trunk r302418:vendor/llvm/llvm-trunk-r302418
Notes
Notes:
svn path=/vendor/llvm/dist/; revision=317948
svn path=/vendor/llvm/llvm-trunk-r302418/; revision=317950; tag=vendor/llvm/llvm-trunk-r302418
Diffstat (limited to 'test/CodeGen/AMDGPU')
-rw-r--r-- | test/CodeGen/AMDGPU/detect-dead-lanes.mir | 6 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/fmuladd.f32.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/inserted-wait-states.mir | 20 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/invert-br-undef-vcc.mir | 3 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/lds-size.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/liveness.mir | 2 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/local-stack-slot-bug.ll | 26 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/optimize-if-exec-masking.mir | 20 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/rename-independent-subregs.mir | 1 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/scratch-simple.ll | 103 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir | 3 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/subreg-intervals.mir | 3 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir | 6 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/waitcnt-looptest.ll | 146 |
14 files changed, 257 insertions, 94 deletions
diff --git a/test/CodeGen/AMDGPU/detect-dead-lanes.mir b/test/CodeGen/AMDGPU/detect-dead-lanes.mir index 32e6f7cc0cdc..3148b9b8ff9d 100644 --- a/test/CodeGen/AMDGPU/detect-dead-lanes.mir +++ b/test/CodeGen/AMDGPU/detect-dead-lanes.mir @@ -294,7 +294,6 @@ registers: - { id: 5, class: sreg_128 } body: | bb.0: - successors: %bb.1 S_NOP 0, implicit-def %0 S_NOP 0, implicit-def %1 S_NOP 0, implicit-def %2 @@ -302,7 +301,6 @@ body: | S_BRANCH %bb.1 bb.1: - successors: %bb.1, %bb.2 %4 = PHI %3, %bb.0, %5, %bb.1 ; let's swiffle some lanes around for fun... @@ -348,7 +346,6 @@ registers: - { id: 6, class: sreg_128 } body: | bb.0: - successors: %bb.1 S_NOP 0, implicit-def %0 S_NOP 0, implicit-def %1 S_NOP 0, implicit-def dead %2 @@ -357,7 +354,6 @@ body: | S_BRANCH %bb.1 bb.1: - successors: %bb.1, %bb.2 %5 = PHI %4, %bb.0, %6, %bb.1 ; rotate lanes, but skip sub2 lane... @@ -396,13 +392,11 @@ registers: - { id: 3, class: sreg_128 } body: | bb.0: - successors: %bb.1 S_NOP 0, implicit-def %0 %1 = REG_SEQUENCE %0, %subreg.sub0 S_BRANCH %bb.1 bb.1: - successors: %bb.1, %bb.2 %2 = PHI %1, %bb.0, %3, %bb.1 ; rotate subreg lanes, skipping sub1 diff --git a/test/CodeGen/AMDGPU/fmuladd.f32.ll b/test/CodeGen/AMDGPU/fmuladd.f32.ll index fb605dd2e4bd..e42255026692 100644 --- a/test/CodeGen/AMDGPU/fmuladd.f32.ll +++ b/test/CodeGen/AMDGPU/fmuladd.f32.ll @@ -191,8 +191,8 @@ define amdgpu_kernel void @fadd_b_a_a_f32(float addrspace(1)* %out, ; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]] -; GCN-DENORM-SLOWFMA: v_mul_f32_e32 [[TMP:v[0-9]+]], -2.0, [[R1]] -; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]] +; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]] +; GCN-DENORM-SLOWFMA: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]] ; SI-DENORM: buffer_store_dword [[RESULT]] ; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] @@ -251,8 +251,8 @@ define amdgpu_kernel void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, ; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], -[[R1]], 2.0, [[R2]] -; GCN-DENORM-SLOWFMA: v_mul_f32_e32 [[TMP:v[0-9]+]], -2.0, [[R1]] -; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]] +; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]] +; GCN-DENORM-SLOWFMA: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]] ; SI-DENORM: buffer_store_dword [[RESULT]] ; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] diff --git a/test/CodeGen/AMDGPU/inserted-wait-states.mir b/test/CodeGen/AMDGPU/inserted-wait-states.mir index 1479303712d0..c6fe6debd225 100644 --- a/test/CodeGen/AMDGPU/inserted-wait-states.mir +++ b/test/CodeGen/AMDGPU/inserted-wait-states.mir @@ -77,19 +77,16 @@ name: div_fmas body: | bb.0: - successors: %bb.1 %vcc = S_MOV_B64 0 %vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec S_BRANCH %bb.1 bb.1: - successors: %bb.2 implicit %vcc = V_CMP_EQ_I32_e32 %vgpr1, %vgpr2, implicit %exec %vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec S_BRANCH %bb.2 bb.2: - successors: %bb.3 %vcc = V_CMP_EQ_I32_e64 %vgpr1, %vgpr2, implicit %exec %vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec S_BRANCH %bb.3 @@ -130,19 +127,16 @@ name: s_getreg body: | bb.0: - successors: %bb.1 S_SETREG_B32 %sgpr0, 1 %sgpr1 = S_GETREG_B32 1 S_BRANCH %bb.1 bb.1: - successors: %bb.2 S_SETREG_IMM32_B32 0, 1 %sgpr1 = S_GETREG_B32 1 S_BRANCH %bb.2 bb.2: - successors: %bb.3 S_SETREG_B32 %sgpr0, 1 %sgpr1 = S_MOV_B32 0 %sgpr2 = S_GETREG_B32 1 @@ -178,13 +172,11 @@ name: s_setreg body: | bb.0: - successors: %bb.1 S_SETREG_B32 %sgpr0, 1 S_SETREG_B32 %sgpr1, 1 S_BRANCH %bb.1 bb.1: - successors: %bb.2 S_SETREG_B32 %sgpr0, 64 S_SETREG_B32 %sgpr1, 128 S_BRANCH %bb.2 @@ -237,7 +229,6 @@ name: vmem_gt_8dw_store body: | bb.0: - successors: %bb.1 BUFFER_STORE_DWORD_OFFSET %vgpr3, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec %vgpr3 = V_MOV_B32_e32 0, implicit %exec BUFFER_STORE_DWORDX3_OFFSET %vgpr2_vgpr3_vgpr4, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec @@ -310,19 +301,16 @@ name: readwrite_lane body: | bb.0: - successors: %bb.1 %vgpr0,%sgpr0_sgpr1 = V_ADD_I32_e64 %vgpr1, %vgpr2, implicit %vcc, implicit %exec %sgpr4 = V_READLANE_B32 %vgpr4, %sgpr0 S_BRANCH %bb.1 bb.1: - successors: %bb.2 %vgpr0,%sgpr0_sgpr1 = V_ADD_I32_e64 %vgpr1, %vgpr2, implicit %vcc, implicit %exec %vgpr4 = V_WRITELANE_B32 %sgpr0, %sgpr0 S_BRANCH %bb.2 bb.2: - successors: %bb.3 %vgpr0,implicit %vcc = V_ADD_I32_e32 %vgpr1, %vgpr2, implicit %vcc, implicit %exec %sgpr4 = V_READLANE_B32 %vgpr4, %vcc_lo S_BRANCH %bb.3 @@ -352,7 +340,6 @@ name: rfe body: | bb.0: - successors: %bb.1 S_SETREG_B32 %sgpr0, 3 S_RFE_B64 %sgpr2_sgpr3 S_BRANCH %bb.1 @@ -382,7 +369,6 @@ name: s_mov_fed_b32 body: | bb.0: - successors: %bb.1 %sgpr0 = S_MOV_FED_B32 %sgpr0 %sgpr0 = S_MOV_B32 %sgpr0 S_BRANCH %bb.1 @@ -423,19 +409,16 @@ name: s_movrel body: | bb.0: - successors: %bb.1 %m0 = S_MOV_B32 0 %sgpr0 = S_MOVRELS_B32 %sgpr0, implicit %m0 S_BRANCH %bb.1 bb.1: - successors: %bb.2 %m0 = S_MOV_B32 0 %sgpr0_sgpr1 = S_MOVRELS_B64 %sgpr0_sgpr1, implicit %m0 S_BRANCH %bb.2 bb.2: - successors: %bb.3 %m0 = S_MOV_B32 0 %sgpr0 = S_MOVRELD_B32 %sgpr0, implicit %m0 S_BRANCH %bb.3 @@ -475,19 +458,16 @@ name: v_interp body: | bb.0: - successors: %bb.1 %m0 = S_MOV_B32 0 %vgpr0 = V_INTERP_P1_F32 %vgpr0, 0, 0, implicit %m0, implicit %exec S_BRANCH %bb.1 bb.1: - successors: %bb.2 %m0 = S_MOV_B32 0 %vgpr0 = V_INTERP_P2_F32 %vgpr0, %vgpr1, 0, 0, implicit %m0, implicit %exec S_BRANCH %bb.2 bb.2: - successors: %bb.3 %m0 = S_MOV_B32 0 %vgpr0 = V_INTERP_P1_F32_16bank %vgpr0, 0, 0, implicit %m0, implicit %exec S_BRANCH %bb.3 diff --git a/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir b/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir index bc1dafe0ea1e..67642282f75b 100644 --- a/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir +++ b/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir @@ -53,7 +53,6 @@ frameInfo: hasMustTailInVarArgFunc: false body: | bb.0.entry: - successors: %bb.2.if, %bb.1.else liveins: %sgpr0_sgpr1 %sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) @@ -62,7 +61,6 @@ body: | S_CBRANCH_VCCNZ %bb.2.if, implicit undef %vcc bb.1.else: - successors: %bb.3.done liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 %vgpr0 = V_MOV_B32_e32 100, implicit %exec @@ -71,7 +69,6 @@ body: | S_BRANCH %bb.3.done bb.2.if: - successors: %bb.3.done liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 %vgpr0 = V_MOV_B32_e32 9, implicit %exec diff --git a/test/CodeGen/AMDGPU/lds-size.ll b/test/CodeGen/AMDGPU/lds-size.ll index c65817abd489..ff78c3bcb18c 100644 --- a/test/CodeGen/AMDGPU/lds-size.ll +++ b/test/CodeGen/AMDGPU/lds-size.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=ALL -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefix=ALL -check-prefix=HSA %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=ALL -check-prefix=EG %s ; This test makes sure we do not double count global values when they are @@ -11,6 +12,9 @@ ; EG-NEXT: .long 1 ; ALL: {{^}}test: +; HSA: granulated_lds_size = 0 +; HSA: workgroup_group_segment_byte_size = 4 + ; GCN: ; LDSByteSize: 4 bytes/workgroup (compile time only) @lds = internal unnamed_addr addrspace(3) global i32 undef, align 4 diff --git a/test/CodeGen/AMDGPU/liveness.mir b/test/CodeGen/AMDGPU/liveness.mir index 48762e3f2ab4..6fd8466492d0 100644 --- a/test/CodeGen/AMDGPU/liveness.mir +++ b/test/CodeGen/AMDGPU/liveness.mir @@ -16,13 +16,11 @@ registers: - { id: 0, class: sreg_64 } body: | bb.0: - successors: %bb.1, %bb.2 S_NOP 0, implicit-def undef %0.sub0 S_CBRANCH_VCCNZ %bb.1, implicit undef %vcc S_BRANCH %bb.2 bb.1: - successors: %bb.2 S_NOP 0, implicit-def %0.sub1 S_NOP 0, implicit %0.sub1 S_BRANCH %bb.2 diff --git a/test/CodeGen/AMDGPU/local-stack-slot-bug.ll b/test/CodeGen/AMDGPU/local-stack-slot-bug.ll deleted file mode 100644 index d3e0f0be4b5f..000000000000 --- a/test/CodeGen/AMDGPU/local-stack-slot-bug.ll +++ /dev/null @@ -1,26 +0,0 @@ -; RUN: llc -march=amdgcn -mcpu=verde -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck %s - -; This used to fail due to a v_add_i32 instruction with an illegal immediate -; operand that was created during Local Stack Slot Allocation. Test case derived -; from https://bugs.freedesktop.org/show_bug.cgi?id=96602 -; -; CHECK-LABEL: {{^}}main: - -; CHECK-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x200 -; CHECK-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0x400{{$}} -; CHECK-DAG: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0 -; CHECK-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, [[BYTES]] - -; CHECK-DAG: v_or_b32_e32 [[LO_OFF:v[0-9]+]], [[CLAMP_IDX]], [[K]] -; CHECK-DAG: v_or_b32_e32 [[HI_OFF:v[0-9]+]], [[CLAMP_IDX]], [[ZERO]] - -; CHECK: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen -; CHECK: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen -define amdgpu_ps float @main(i32 %idx) { -main_body: - %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx - %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx - %r = fadd float %v1, %v2 - ret float %r -} diff --git a/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir b/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir index 2de6b59e59e9..b5dc9d9dac84 100644 --- a/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir +++ b/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir @@ -176,7 +176,6 @@ frameInfo: hasMustTailInVarArgFunc: false body: | bb.0.main_body: - successors: %bb.1.if, %bb.2.end liveins: %vgpr0 %sgpr0_sgpr1 = COPY %exec @@ -189,7 +188,6 @@ body: | S_BRANCH %bb.1.if bb.1.if: - successors: %bb.2.end liveins: %sgpr0_sgpr1 %sgpr7 = S_MOV_B32 61440 @@ -236,7 +234,6 @@ frameInfo: hasMustTailInVarArgFunc: false body: | bb.0.main_body: - successors: %bb.1.if, %bb.2.end liveins: %vgpr0 %sgpr0_sgpr1 = COPY %exec @@ -248,7 +245,6 @@ body: | S_BRANCH %bb.1.if bb.1.if: - successors: %bb.2.end liveins: %sgpr0_sgpr1 %sgpr7 = S_MOV_B32 61440 @@ -295,7 +291,6 @@ frameInfo: hasMustTailInVarArgFunc: false body: | bb.0.main_body: - successors: %bb.1.if, %bb.2.end liveins: %vgpr0 %sgpr0_sgpr1 = COPY %exec @@ -307,7 +302,6 @@ body: | S_BRANCH %bb.1.if bb.1.if: - successors: %bb.2.end liveins: %sgpr0_sgpr1 %sgpr7 = S_MOV_B32 61440 @@ -356,7 +350,6 @@ frameInfo: hasMustTailInVarArgFunc: false body: | bb.0.main_body: - successors: %bb.1.if, %bb.2.end liveins: %vgpr0 %sgpr0_sgpr1 = COPY %exec @@ -370,7 +363,6 @@ body: | S_BRANCH %bb.1.if bb.1.if: - successors: %bb.2.end liveins: %sgpr0_sgpr1 %sgpr7 = S_MOV_B32 61440 @@ -418,7 +410,6 @@ frameInfo: hasMustTailInVarArgFunc: false body: | bb.0.main_body: - successors: %bb.1.if, %bb.2.end liveins: %vgpr0 %sgpr6 = S_MOV_B32 -1 @@ -433,7 +424,6 @@ body: | S_BRANCH %bb.1.if bb.1.if: - successors: %bb.2.end liveins: %sgpr0_sgpr1 , %sgpr4_sgpr5_sgpr6_sgpr7 %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`) @@ -480,7 +470,6 @@ frameInfo: hasMustTailInVarArgFunc: false body: | bb.0.main_body: - successors: %bb.1.if, %bb.2.end liveins: %vgpr0 %sgpr0_sgpr1 = COPY %exec @@ -494,7 +483,6 @@ body: | S_BRANCH %bb.1.if bb.1.if: - successors: %bb.2.end liveins: %sgpr0_sgpr1 %sgpr7 = S_MOV_B32 61440 @@ -544,7 +532,6 @@ frameInfo: hasMustTailInVarArgFunc: false body: | bb.0.main_body: - successors: %bb.1.if, %bb.2.end liveins: %vgpr0 %sgpr0_sgpr1 = COPY %exec @@ -557,7 +544,6 @@ body: | S_BRANCH %bb.1.if bb.1.if: - successors: %bb.2.end liveins: %sgpr0_sgpr1, %sgpr2_sgpr3 S_SLEEP 0, implicit %sgpr2_sgpr3 %sgpr7 = S_MOV_B32 61440 @@ -606,7 +592,6 @@ frameInfo: hasMustTailInVarArgFunc: false body: | bb.0.main_body: - successors: %bb.1.if, %bb.2.end liveins: %vgpr0 %sgpr0_sgpr1 = COPY %exec @@ -618,7 +603,6 @@ body: | S_BRANCH %bb.1.if bb.1.if: - successors: %bb.2.end liveins: %sgpr0_sgpr1 %sgpr7 = S_MOV_B32 61440 @@ -665,7 +649,6 @@ frameInfo: hasMustTailInVarArgFunc: false body: | bb.0.main_body: - successors: %bb.1.if, %bb.2.end liveins: %vgpr0 %sgpr0_sgpr1 = COPY %exec @@ -677,7 +660,6 @@ body: | S_BRANCH %bb.1.if bb.1.if: - successors: %bb.2.end liveins: %sgpr0_sgpr1 %sgpr7 = S_MOV_B32 61440 @@ -724,7 +706,6 @@ frameInfo: hasMustTailInVarArgFunc: false body: | bb.0.main_body: - successors: %bb.1.if, %bb.2.end liveins: %vgpr0 %sgpr0_sgpr1 = COPY %exec @@ -736,7 +717,6 @@ body: | S_BRANCH %bb.1.if bb.1.if: - successors: %bb.2.end liveins: %sgpr0_sgpr1 %sgpr7 = S_MOV_B32 61440 diff --git a/test/CodeGen/AMDGPU/rename-independent-subregs.mir b/test/CodeGen/AMDGPU/rename-independent-subregs.mir index fc2e4426ba48..31ad26e76979 100644 --- a/test/CodeGen/AMDGPU/rename-independent-subregs.mir +++ b/test/CodeGen/AMDGPU/rename-independent-subregs.mir @@ -49,7 +49,6 @@ registers: - { id: 1, class: sreg_128 } body: | bb.0: - successors: %bb.1, %bb.2 S_NOP 0, implicit-def undef %0.sub2 S_CBRANCH_VCCNZ %bb.1, implicit undef %vcc S_BRANCH %bb.2 diff --git a/test/CodeGen/AMDGPU/scratch-simple.ll b/test/CodeGen/AMDGPU/scratch-simple.ll new file mode 100644 index 000000000000..60b9b56a48d1 --- /dev/null +++ b/test/CodeGen/AMDGPU/scratch-simple.ll @@ -0,0 +1,103 @@ +; RUN: llc -march=amdgcn -mcpu=verde -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s + +; This used to fail due to a v_add_i32 instruction with an illegal immediate +; operand that was created during Local Stack Slot Allocation. Test case derived +; from https://bugs.freedesktop.org/show_bug.cgi?id=96602 +; +; GCN-LABEL: {{^}}ps_main: + +; GCN-DAG: s_mov_b32 [[SWO:s[0-9]+]], s0 +; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x200 +; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0x400{{$}} +; GCN-DAG: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0 +; GCN-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, [[BYTES]] + +; GCN-DAG: v_or_b32_e32 [[LO_OFF:v[0-9]+]], [[CLAMP_IDX]], [[K]] +; GCN-DAG: v_or_b32_e32 [[HI_OFF:v[0-9]+]], [[CLAMP_IDX]], [[ZERO]] + +; GCN: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen +; GCN: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen +define amdgpu_ps float @ps_main(i32 %idx) { + %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx + %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx + %r = fadd float %v1, %v2 + ret float %r +} + +; GCN-LABEL: {{^}}vs_main: +; GCN: s_mov_b32 [[SWO:s[0-9]+]], s0 +; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen +; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen +define amdgpu_vs float @vs_main(i32 %idx) { + %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx + %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx + %r = fadd float %v1, %v2 + ret float %r +} + +; GCN-LABEL: {{^}}cs_main: +; GCN: s_mov_b32 [[SWO:s[0-9]+]], s0 +; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen +; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen +define amdgpu_cs float @cs_main(i32 %idx) { + %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx + %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx + %r = fadd float %v1, %v2 + ret float %r +} + +; GCN-LABEL: {{^}}hs_main: +; SI: s_mov_b32 [[SWO:s[0-9]+]], s0 +; GFX9: s_mov_b32 [[SWO:s[0-9]+]], s5 +; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen +; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen +define amdgpu_hs float @hs_main(i32 %idx) { + %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx + %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx + %r = fadd float %v1, %v2 + ret float %r +} + +; GCN-LABEL: {{^}}gs_main: +; SI: s_mov_b32 [[SWO:s[0-9]+]], s0 +; GFX9: s_mov_b32 [[SWO:s[0-9]+]], s5 +; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen +; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen +define amdgpu_gs float @gs_main(i32 %idx) { + %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx + %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx + %r = fadd float %v1, %v2 + ret float %r +} + +; GCN-LABEL: {{^}}hs_ir_uses_scratch_offset: +; SI: s_mov_b32 [[SWO:s[0-9]+]], s6 +; GFX9: s_mov_b32 [[SWO:s[0-9]+]], s5 +; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen +; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen +; GCN: s_mov_b32 s2, s5 +define amdgpu_hs <{i32, i32, i32, float}> @hs_ir_uses_scratch_offset(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg %swo, i32 %idx) { + %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx + %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx + %f = fadd float %v1, %v2 + %r1 = insertvalue <{i32, i32, i32, float}> undef, i32 %swo, 2 + %r2 = insertvalue <{i32, i32, i32, float}> %r1, float %f, 3 + ret <{i32, i32, i32, float}> %r2 +} + +; GCN-LABEL: {{^}}gs_ir_uses_scratch_offset: +; SI: s_mov_b32 [[SWO:s[0-9]+]], s6 +; GFX9: s_mov_b32 [[SWO:s[0-9]+]], s5 +; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen +; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen +; GCN: s_mov_b32 s2, s5 +define amdgpu_gs <{i32, i32, i32, float}> @gs_ir_uses_scratch_offset(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg %swo, i32 %idx) { + %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx + %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx + %f = fadd float %v1, %v2 + %r1 = insertvalue <{i32, i32, i32, float}> undef, i32 %swo, 2 + %r2 = insertvalue <{i32, i32, i32, float}> %r1, float %f, 3 + ret <{i32, i32, i32, float}> %r2 +} diff --git a/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir b/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir index 20052e865a54..18176de53793 100644 --- a/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir +++ b/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir @@ -20,12 +20,10 @@ body: | ; GCN: V_ADD_I32 bb.0: liveins: %vgpr0 - successors: %bb.1 %7 = COPY %vgpr0 %8 = S_MOV_B32 0 bb.1: - successors: %bb.1, %bb.2 %0 = PHI %8, %bb.0, %0, %bb.1, %2, %bb.2 %9 = V_MOV_B32_e32 9, implicit %exec %10 = V_CMP_EQ_U32_e64 %7, %9, implicit %exec @@ -33,7 +31,6 @@ body: | S_BRANCH %bb.1 bb.2: - successors: %bb.1 SI_END_CF %1, implicit-def %exec, implicit-def %scc, implicit %exec %11 = S_MOV_B32 1 %2 = S_ADD_I32 %0, %11, implicit-def %scc diff --git a/test/CodeGen/AMDGPU/subreg-intervals.mir b/test/CodeGen/AMDGPU/subreg-intervals.mir index c477fe9bc6d3..62816da25b2c 100644 --- a/test/CodeGen/AMDGPU/subreg-intervals.mir +++ b/test/CodeGen/AMDGPU/subreg-intervals.mir @@ -31,17 +31,14 @@ registers: - { id: 0, class: sreg_64 } body: | bb.0: - successors: %bb.1, %bb.2 S_CBRANCH_VCCNZ %bb.1, implicit undef %vcc S_BRANCH %bb.2 bb.1: - successors: %bb.3 S_NOP 0, implicit-def undef %0.sub0 S_BRANCH %bb.3 bb.2: - successors: %bb.3 S_NOP 0, implicit-def %0 S_BRANCH %bb.3 diff --git a/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir b/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir index 5e5465800c3a..6eb937e71b1b 100644 --- a/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir +++ b/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir @@ -75,7 +75,6 @@ frameInfo: hasMustTailInVarArgFunc: false body: | bb.0.entry: - successors: %bb.2.if, %bb.1.else liveins: %sgpr0_sgpr1 %sgpr2 = S_LOAD_DWORD_IMM %sgpr0_sgpr1, 9, 0 :: (non-temporal dereferenceable invariant load 4 from `float addrspace(2)* undef`) @@ -86,7 +85,6 @@ body: | S_CBRANCH_VCCZ %bb.1.else, implicit killed %vcc bb.2.if: - successors: %bb.3.done liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 %vgpr0 = V_MOV_B32_e32 9, implicit %exec @@ -95,7 +93,6 @@ body: | S_BRANCH %bb.3.done bb.1.else: - successors: %bb.3.done liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 %vgpr0 = V_MOV_B32_e32 100, implicit %exec @@ -141,7 +138,6 @@ frameInfo: hasMustTailInVarArgFunc: false body: | bb.0.entry: - successors: %bb.2.if, %bb.1.else liveins: %sgpr0_sgpr1 %sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) @@ -150,7 +146,6 @@ body: | S_CBRANCH_VCCZ %bb.1.else, implicit undef %vcc bb.2.if: - successors: %bb.3.done liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 %vgpr0 = V_MOV_B32_e32 9, implicit %exec @@ -159,7 +154,6 @@ body: | S_BRANCH %bb.3.done bb.1.else: - successors: %bb.3.done liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 %vgpr0 = V_MOV_B32_e32 100, implicit %exec diff --git a/test/CodeGen/AMDGPU/waitcnt-looptest.ll b/test/CodeGen/AMDGPU/waitcnt-looptest.ll new file mode 100644 index 000000000000..2a3ce4dfd191 --- /dev/null +++ b/test/CodeGen/AMDGPU/waitcnt-looptest.ll @@ -0,0 +1,146 @@ +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global | FileCheck --check-prefix=GCN %s + +; Check that the waitcnt insertion algorithm correctly propagates wait counts +; from before a loop to the loop header. + +; GCN-LABEL: {{^}}testKernel +; GCN: BB0_1: +; GCN: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN-NEXT: v_cmp_eq_f32_e64 +; GCN: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN-NEXT: v_cmp_eq_f32_e32 +; GCN: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN-NEXT: v_cmp_eq_f32_e32 + +@data_generic = addrspace(1) global [100 x float] [float 0.000000e+00, float 0x3FB99999A0000000, float 0x3FC99999A0000000, float 0x3FD3333340000000, float 0x3FD99999A0000000, float 5.000000e-01, float 0x3FE3333340000000, float 0x3FE6666660000000, float 0x3FE99999A0000000, float 0x3FECCCCCC0000000, float 1.000000e+00, float 0x3FF19999A0000000, float 0x3FF3333340000000, float 0x3FF4CCCCC0000000, float 0x3FF6666660000000, float 1.500000e+00, float 0x3FF99999A0000000, float 0x3FFB333340000000, float 0x3FFCCCCCC0000000, float 0x3FFE666660000000, float 2.000000e+00, float 0x4000CCCCC0000000, float 0x40019999A0000000, float 0x4002666660000000, float 0x4003333340000000, float 2.500000e+00, float 0x4004CCCCC0000000, float 0x40059999A0000000, float 0x4006666660000000, float 0x4007333340000000, float 3.000000e+00, float 0x4008CCCCC0000000, float 0x40099999A0000000, float 0x400A666660000000, float 0x400B333340000000, float 3.500000e+00, float 0x400CCCCCC0000000, float 0x400D9999A0000000, float 0x400E666660000000, float 0x400F333340000000, float 4.000000e+00, float 0x4010666660000000, float 0x4010CCCCC0000000, float 0x4011333340000000, float 0x40119999A0000000, float 4.500000e+00, float 0x4012666660000000, float 0x4012CCCCC0000000, float 0x4013333340000000, float 0x40139999A0000000, float 5.000000e+00, float 0x4014666660000000, float 0x4014CCCCC0000000, float 0x4015333340000000, float 0x40159999A0000000, float 5.500000e+00, float 0x4016666660000000, float 0x4016CCCCC0000000, float 0x4017333340000000, float 0x40179999A0000000, float 6.000000e+00, float 0x4018666660000000, float 0x4018CCCCC0000000, float 0x4019333340000000, float 0x40199999A0000000, float 6.500000e+00, float 0x401A666660000000, float 0x401ACCCCC0000000, float 0x401B333340000000, float 0x401B9999A0000000, float 7.000000e+00, float 0x401C666660000000, float 0x401CCCCCC0000000, float 0x401D333340000000, float 0x401D9999A0000000, float 7.500000e+00, float 0x401E666660000000, float 0x401ECCCCC0000000, float 0x401F333340000000, float 0x401F9999A0000000, float 8.000000e+00, float 0x4020333340000000, float 0x4020666660000000, float 0x40209999A0000000, float 0x4020CCCCC0000000, float 8.500000e+00, float 0x4021333340000000, float 0x4021666660000000, float 0x40219999A0000000, float 0x4021CCCCC0000000, float 9.000000e+00, float 0x4022333340000000, float 0x4022666660000000, float 0x40229999A0000000, float 0x4022CCCCC0000000, float 9.500000e+00, float 0x4023333340000000, float 0x4023666660000000, float 0x40239999A0000000, float 0x4023CCCCC0000000], align 4 +@data_reference = addrspace(1) global [100 x float] [float 0.000000e+00, float 0x3FB99999A0000000, float 0x3FC99999A0000000, float 0x3FD3333340000000, float 0x3FD99999A0000000, float 5.000000e-01, float 0x3FE3333340000000, float 0x3FE6666660000000, float 0x3FE99999A0000000, float 0x3FECCCCCC0000000, float 1.000000e+00, float 0x3FF19999A0000000, float 0x3FF3333340000000, float 0x3FF4CCCCC0000000, float 0x3FF6666660000000, float 1.500000e+00, float 0x3FF99999A0000000, float 0x3FFB333340000000, float 0x3FFCCCCCC0000000, float 0x3FFE666660000000, float 2.000000e+00, float 0x4000CCCCC0000000, float 0x40019999A0000000, float 0x4002666660000000, float 0x4003333340000000, float 2.500000e+00, float 0x4004CCCCC0000000, float 0x40059999A0000000, float 0x4006666660000000, float 0x4007333340000000, float 3.000000e+00, float 0x4008CCCCC0000000, float 0x40099999A0000000, float 0x400A666660000000, float 0x400B333340000000, float 3.500000e+00, float 0x400CCCCCC0000000, float 0x400D9999A0000000, float 0x400E666660000000, float 0x400F333340000000, float 4.000000e+00, float 0x4010666660000000, float 0x4010CCCCC0000000, float 0x4011333340000000, float 0x40119999A0000000, float 4.500000e+00, float 0x4012666660000000, float 0x4012CCCCC0000000, float 0x4013333340000000, float 0x40139999A0000000, float 5.000000e+00, float 0x4014666660000000, float 0x4014CCCCC0000000, float 0x4015333340000000, float 0x40159999A0000000, float 5.500000e+00, float 0x4016666660000000, float 0x4016CCCCC0000000, float 0x4017333340000000, float 0x40179999A0000000, float 6.000000e+00, float 0x4018666660000000, float 0x4018CCCCC0000000, float 0x4019333340000000, float 0x40199999A0000000, float 6.500000e+00, float 0x401A666660000000, float 0x401ACCCCC0000000, float 0x401B333340000000, float 0x401B9999A0000000, float 7.000000e+00, float 0x401C666660000000, float 0x401CCCCCC0000000, float 0x401D333340000000, float 0x401D9999A0000000, float 7.500000e+00, float 0x401E666660000000, float 0x401ECCCCC0000000, float 0x401F333340000000, float 0x401F9999A0000000, float 8.000000e+00, float 0x4020333340000000, float 0x4020666660000000, float 0x40209999A0000000, float 0x4020CCCCC0000000, float 8.500000e+00, float 0x4021333340000000, float 0x4021666660000000, float 0x40219999A0000000, float 0x4021CCCCC0000000, float 9.000000e+00, float 0x4022333340000000, float 0x4022666660000000, float 0x40229999A0000000, float 0x4022CCCCC0000000, float 9.500000e+00, float 0x4023333340000000, float 0x4023666660000000, float 0x40239999A0000000, float 0x4023CCCCC0000000], align 4 + +define amdgpu_kernel void @testKernel(i32 addrspace(1)* nocapture %arg) local_unnamed_addr #0 { +bb: + store <2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x float> addrspace(4)* bitcast (float addrspace(4)* getelementptr ([100 x float], [100 x float] addrspace(4)* addrspacecast ([100 x float] addrspace(1)* @data_generic to [100 x float] addrspace(4)*), i64 0, i64 4) to <2 x float> addrspace(4)*), align 4 + store <2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x float> addrspace(4)* bitcast (float addrspace(4)* getelementptr ([100 x float], [100 x float] addrspace(4)* addrspacecast ([100 x float] addrspace(1)* @data_reference to [100 x float] addrspace(4)*), i64 0, i64 4) to <2 x float> addrspace(4)*), align 4 + br label %bb18 + +bb1: ; preds = %bb18 + %tmp = tail call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() + %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.x() + %tmp3 = tail call i32 @llvm.amdgcn.workgroup.id.x() + %tmp4 = getelementptr inbounds i8, i8 addrspace(2)* %tmp, i64 4 + %tmp5 = bitcast i8 addrspace(2)* %tmp4 to i16 addrspace(2)* + %tmp6 = load i16, i16 addrspace(2)* %tmp5, align 4 + %tmp7 = zext i16 %tmp6 to i32 + %tmp8 = mul i32 %tmp3, %tmp7 + %tmp9 = add i32 %tmp8, %tmp2 + %tmp10 = tail call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr() + %tmp11 = zext i32 %tmp9 to i64 + %tmp12 = bitcast i8 addrspace(2)* %tmp10 to i64 addrspace(2)* + %tmp13 = load i64, i64 addrspace(2)* %tmp12, align 8 + %tmp14 = add i64 %tmp13, %tmp11 + %tmp15 = zext i1 %tmp99 to i32 + %tmp16 = and i64 %tmp14, 4294967295 + %tmp17 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp16 + store i32 %tmp15, i32 addrspace(1)* %tmp17, align 4 + ret void + +bb18: ; preds = %bb18, %bb + %tmp19 = phi i64 [ 0, %bb ], [ %tmp102, %bb18 ] + %tmp20 = phi i32 [ 0, %bb ], [ %tmp100, %bb18 ] + %tmp21 = phi i1 [ true, %bb ], [ %tmp99, %bb18 ] + %tmp22 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp19 + %tmp23 = load float, float addrspace(1)* %tmp22, align 4 + %tmp24 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp19 + %tmp25 = load float, float addrspace(1)* %tmp24, align 4 + %tmp26 = fcmp oeq float %tmp23, %tmp25 + %tmp27 = and i1 %tmp21, %tmp26 + %tmp28 = or i32 %tmp20, 1 + %tmp29 = sext i32 %tmp28 to i64 + %tmp30 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp29 + %tmp31 = load float, float addrspace(1)* %tmp30, align 4 + %tmp32 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp29 + %tmp33 = load float, float addrspace(1)* %tmp32, align 4 + %tmp34 = fcmp oeq float %tmp31, %tmp33 + %tmp35 = and i1 %tmp27, %tmp34 + %tmp36 = add nuw nsw i32 %tmp20, 2 + %tmp37 = sext i32 %tmp36 to i64 + %tmp38 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp37 + %tmp39 = load float, float addrspace(1)* %tmp38, align 4 + %tmp40 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp37 + %tmp41 = load float, float addrspace(1)* %tmp40, align 4 + %tmp42 = fcmp oeq float %tmp39, %tmp41 + %tmp43 = and i1 %tmp35, %tmp42 + %tmp44 = add nuw nsw i32 %tmp20, 3 + %tmp45 = sext i32 %tmp44 to i64 + %tmp46 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp45 + %tmp47 = load float, float addrspace(1)* %tmp46, align 4 + %tmp48 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp45 + %tmp49 = load float, float addrspace(1)* %tmp48, align 4 + %tmp50 = fcmp oeq float %tmp47, %tmp49 + %tmp51 = and i1 %tmp43, %tmp50 + %tmp52 = add nuw nsw i32 %tmp20, 4 + %tmp53 = sext i32 %tmp52 to i64 + %tmp54 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp53 + %tmp55 = load float, float addrspace(1)* %tmp54, align 4 + %tmp56 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp53 + %tmp57 = load float, float addrspace(1)* %tmp56, align 4 + %tmp58 = fcmp oeq float %tmp55, %tmp57 + %tmp59 = and i1 %tmp51, %tmp58 + %tmp60 = add nuw nsw i32 %tmp20, 5 + %tmp61 = sext i32 %tmp60 to i64 + %tmp62 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp61 + %tmp63 = load float, float addrspace(1)* %tmp62, align 4 + %tmp64 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp61 + %tmp65 = load float, float addrspace(1)* %tmp64, align 4 + %tmp66 = fcmp oeq float %tmp63, %tmp65 + %tmp67 = and i1 %tmp59, %tmp66 + %tmp68 = add nuw nsw i32 %tmp20, 6 + %tmp69 = sext i32 %tmp68 to i64 + %tmp70 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp69 + %tmp71 = load float, float addrspace(1)* %tmp70, align 4 + %tmp72 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp69 + %tmp73 = load float, float addrspace(1)* %tmp72, align 4 + %tmp74 = fcmp oeq float %tmp71, %tmp73 + %tmp75 = and i1 %tmp67, %tmp74 + %tmp76 = add nuw nsw i32 %tmp20, 7 + %tmp77 = sext i32 %tmp76 to i64 + %tmp78 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp77 + %tmp79 = load float, float addrspace(1)* %tmp78, align 4 + %tmp80 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp77 + %tmp81 = load float, float addrspace(1)* %tmp80, align 4 + %tmp82 = fcmp oeq float %tmp79, %tmp81 + %tmp83 = and i1 %tmp75, %tmp82 + %tmp84 = add nuw nsw i32 %tmp20, 8 + %tmp85 = sext i32 %tmp84 to i64 + %tmp86 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp85 + %tmp87 = load float, float addrspace(1)* %tmp86, align 4 + %tmp88 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp85 + %tmp89 = load float, float addrspace(1)* %tmp88, align 4 + %tmp90 = fcmp oeq float %tmp87, %tmp89 + %tmp91 = and i1 %tmp83, %tmp90 + %tmp92 = add nuw nsw i32 %tmp20, 9 + %tmp93 = sext i32 %tmp92 to i64 + %tmp94 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp93 + %tmp95 = load float, float addrspace(1)* %tmp94, align 4 + %tmp96 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp93 + %tmp97 = load float, float addrspace(1)* %tmp96, align 4 + %tmp98 = fcmp oeq float %tmp95, %tmp97 + %tmp99 = and i1 %tmp91, %tmp98 + %tmp100 = add nuw nsw i32 %tmp20, 10 + %tmp101 = icmp eq i32 %tmp100, 100 + %tmp102 = sext i32 %tmp100 to i64 + br i1 %tmp101, label %bb1, label %bb18 +} + +; Function Attrs: nounwind readnone speculatable +declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #1 + +; Function Attrs: nounwind readnone speculatable +declare i32 @llvm.amdgcn.workitem.id.x() #1 + +; Function Attrs: nounwind readnone speculatable +declare i32 @llvm.amdgcn.workgroup.id.x() #1 + +; Function Attrs: nounwind readnone speculatable +declare i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr() #1 + +attributes #0 = { "target-cpu"="fiji" "target-features"="-flat-for-global" } +attributes #1 = { nounwind readnone speculatable } |