diff options
Diffstat (limited to 'test/CodeGen/AMDGPU/schedule-global-loads.ll')
-rw-r--r-- | test/CodeGen/AMDGPU/schedule-global-loads.ll | 10 |
1 files changed, 4 insertions, 6 deletions
diff --git a/test/CodeGen/AMDGPU/schedule-global-loads.ll b/test/CodeGen/AMDGPU/schedule-global-loads.ll index 3f728fd873b3..1bf109dec032 100644 --- a/test/CodeGen/AMDGPU/schedule-global-loads.ll +++ b/test/CodeGen/AMDGPU/schedule-global-loads.ll @@ -1,21 +1,19 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s -declare i32 @llvm.r600.read.tidig.x() #1 - ; FIXME: This currently doesn't do a great job of clustering the ; loads, which end up with extra moves between them. Right now, it ; seems the only things areLoadsFromSameBasePtr is accomplishing is ; ordering the loads so that the lower address loads come first. ; FUNC-LABEL: {{^}}cluster_global_arg_loads: -; SI-DAG: buffer_load_dword [[REG0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -; SI-DAG: buffer_load_dword [[REG1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4 +; SI-DAG: buffer_load_dword [[REG0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; SI-DAG: buffer_load_dword [[REG1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 ; SI: buffer_store_dword [[REG0]] ; SI: buffer_store_dword [[REG1]] define void @cluster_global_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %ptr) #0 { %load0 = load i32, i32 addrspace(1)* %ptr, align 4 - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 1 + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 2 %load1 = load i32, i32 addrspace(1)* %gep, align 4 store i32 %load0, i32 addrspace(1)* %out0, align 4 store i32 %load1, i32 addrspace(1)* %out1, align 4 |