aboutsummaryrefslogtreecommitdiff
path: root/test/CodeGen/R600/private-memory.ll
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/R600/private-memory.ll')
-rw-r--r--test/CodeGen/R600/private-memory.ll70
1 files changed, 44 insertions, 26 deletions
diff --git a/test/CodeGen/R600/private-memory.ll b/test/CodeGen/R600/private-memory.ll
index 124d9fa64505..15153c69a48d 100644
--- a/test/CodeGen/R600/private-memory.ll
+++ b/test/CodeGen/R600/private-memory.ll
@@ -1,23 +1,23 @@
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
-; RUN: llc -mattr=+promote-alloca -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC
-; RUN: llc -mattr=-promote-alloca -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
+; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC
+; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
-; FUNC-LABEL: @mova_same_clause
+; FUNC-LABEL: {{^}}mova_same_clause:
; R600: LDS_WRITE
; R600: LDS_WRITE
; R600: LDS_READ
; R600: LDS_READ
-; SI-PROMOTE: DS_WRITE_B32
-; SI-PROMOTE: DS_WRITE_B32
-; SI-PROMOTE: DS_READ_B32
-; SI-PROMOTE: DS_READ_B32
+; SI-PROMOTE: ds_write_b32
+; SI-PROMOTE: ds_write_b32
+; SI-PROMOTE: ds_read_b32
+; SI-PROMOTE: ds_read_b32
-; SI-ALLOCA: BUFFER_STORE_DWORD v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}}
-; SI-ALLOCA: BUFFER_STORE_DWORD v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}}
+; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0
+; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0
define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
entry:
%stack = alloca [5 x i32], align 4
@@ -45,10 +45,10 @@ entry:
; XXX: This generated code has unnecessary MOVs, we should be able to optimize
; this.
-; FUNC-LABEL: @multiple_structs
+; FUNC-LABEL: {{^}}multiple_structs:
; R600-NOT: MOVA_INT
-; SI-NOT: V_MOVREL
-; SI-NOT: V_MOVREL
+; SI-NOT: v_movrel
+; SI-NOT: v_movrel
%struct.point = type { i32, i32 }
define void @multiple_structs(i32 addrspace(1)* %out) {
@@ -76,9 +76,9 @@ entry:
; loads and stores should be lowered to copies, so there shouldn't be any
; MOVA instructions.
-; FUNC-LABEL: @direct_loop
+; FUNC-LABEL: {{^}}direct_loop:
; R600-NOT: MOVA_INT
-; SI-NOT: V_MOVREL
+; SI-NOT: v_movrel
define void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
@@ -112,14 +112,13 @@ for.end:
ret void
}
-; FUNC-LABEL: @short_array
+; FUNC-LABEL: {{^}}short_array:
; R600: MOVA_INT
-; SI-PROMOTE: BUFFER_STORE_SHORT v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}}
-; SI-PROMOTE: BUFFER_STORE_SHORT v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}}
-; SI-PROMOTE-NOT: MOVREL
-; SI-PROMOTE: BUFFER_LOAD_SSHORT v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] + v{{[0-9]+}} + s{{[0-9]+}}
+; SI-PROMOTE-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x68,0xe0
+; SI-PROMOTE-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:2 ; encoding: [0x02,0x10,0x68,0xe0
+; SI-PROMOTE: buffer_load_sshort v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
define void @short_array(i32 addrspace(1)* %out, i32 %index) {
entry:
%0 = alloca [2 x i16]
@@ -134,12 +133,12 @@ entry:
ret void
}
-; FUNC-LABEL: @char_array
+; FUNC-LABEL: {{^}}char_array:
; R600: MOVA_INT
-; SI-DAG: BUFFER_STORE_BYTE v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}}, 0x0
-; SI-DAG: BUFFER_STORE_BYTE v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}}, 0x1
+; SI-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x60,0xe0
+; SI-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:1 ; encoding: [0x01,0x10,0x60,0xe0
define void @char_array(i32 addrspace(1)* %out, i32 %index) {
entry:
%0 = alloca [2 x i8]
@@ -157,12 +156,12 @@ entry:
; Make sure we don't overwrite workitem information with private memory
-; FUNC-LABEL: @work_item_info
+; FUNC-LABEL: {{^}}work_item_info:
; R600-NOT: MOV T0.X
; Additional check in case the move ends up in the last slot
; R600-NOT: MOV * TO.X
-; SI-NOT: V_MOV_B32_e{{(32|64)}} v0
+; SI-NOT: v_mov_b32_e{{(32|64)}} v0
define void @work_item_info(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = alloca [2 x i32]
@@ -180,11 +179,11 @@ entry:
; Test that two stack objects are not stored in the same register
; The second stack object should be in T3.X
-; FUNC-LABEL: @no_overlap
+; FUNC-LABEL: {{^}}no_overlap:
; R600_CHECK: MOV
; R600_CHECK: [[CHAN:[XYZW]]]+
; R600-NOT: [[CHAN]]+
-; SI: V_MOV_B32_e32 v3
+; SI: v_mov_b32_e32 v3
define void @no_overlap(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = alloca [3 x i8], align 1
@@ -291,3 +290,22 @@ entry:
ret void
}
+; AMDGPUPromoteAlloca does not know how to handle ptrtoint. When it
+; finds one, it should stop trying to promote.
+
+; FUNC-LABEL: ptrtoint:
+; SI-NOT: ds_write
+; SI: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen
+; SI: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:5
+define void @ptrtoint(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+ %alloca = alloca [16 x i32]
+ %tmp0 = getelementptr [16 x i32]* %alloca, i32 0, i32 %a
+ store i32 5, i32* %tmp0
+ %tmp1 = ptrtoint [16 x i32]* %alloca to i32
+ %tmp2 = add i32 %tmp1, 5
+ %tmp3 = inttoptr i32 %tmp2 to i32*
+ %tmp4 = getelementptr i32* %tmp3, i32 %b
+ %tmp5 = load i32* %tmp4
+ store i32 %tmp5, i32 addrspace(1)* %out
+ ret void
+}