aboutsummaryrefslogtreecommitdiff
path: root/test/CodeGen/AMDGPU
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-07-19 07:02:10 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-07-19 07:02:10 +0000
commit93c91e39b29142dec1d03a30df9f6e757f56c193 (patch)
tree33a9b014a327e64450b3c9ed46d8c5bdb78ad345 /test/CodeGen/AMDGPU
parentca089b24d48ef6fa8da2d0bb8c25bb802c4a95c0 (diff)
downloadsrc-93c91e39b29142dec1d03a30df9f6e757f56c193.tar.gz
src-93c91e39b29142dec1d03a30df9f6e757f56c193.zip
Vendor import of llvm trunk r308421:vendor/llvm/llvm-trunk-r308421
Notes
Notes: svn path=/vendor/llvm/dist/; revision=321184 svn path=/vendor/llvm/llvm-trunk-r308421/; revision=321185; tag=vendor/llvm/llvm-trunk-r308421
Diffstat (limited to 'test/CodeGen/AMDGPU')
-rw-r--r--test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll312
-rw-r--r--test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll11
-rw-r--r--test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll2
-rw-r--r--test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll14
-rw-r--r--test/CodeGen/AMDGPU/fcanonicalize-elimination.ll62
-rw-r--r--test/CodeGen/AMDGPU/function-args.ll16
-rw-r--r--test/CodeGen/AMDGPU/hsa.ll10
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll12
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll2
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll4
-rw-r--r--test/CodeGen/AMDGPU/move-to-valu-worklist.ll29
-rw-r--r--test/CodeGen/AMDGPU/mubuf-offset-private.ll26
-rw-r--r--test/CodeGen/AMDGPU/parallelandifcollapse.ll2
-rw-r--r--test/CodeGen/AMDGPU/parallelorifcollapse.ll2
-rw-r--r--test/CodeGen/AMDGPU/private-access-no-objects.ll10
-rw-r--r--test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir2
-rw-r--r--test/CodeGen/AMDGPU/scratch-simple.ll72
-rw-r--r--test/CodeGen/AMDGPU/sdwa-peephole-instr.mir35
-rw-r--r--test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir4
-rw-r--r--test/CodeGen/AMDGPU/trap.ll8
-rw-r--r--test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir2
-rw-r--r--test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll36
-rw-r--r--test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll6
23 files changed, 583 insertions, 96 deletions
diff --git a/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
new file mode 100644
index 000000000000..e9797eff712b
--- /dev/null
+++ b/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
@@ -0,0 +1,312 @@
+; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=HSA %s
+
+declare i32 @llvm.amdgcn.workgroup.id.x() #0
+declare i32 @llvm.amdgcn.workgroup.id.y() #0
+declare i32 @llvm.amdgcn.workgroup.id.z() #0
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare i32 @llvm.amdgcn.workitem.id.y() #0
+declare i32 @llvm.amdgcn.workitem.id.z() #0
+
+declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
+declare i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
+declare i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
+declare i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr() #0
+declare i64 @llvm.amdgcn.dispatch.id() #0
+
+; HSA: define void @use_workitem_id_x() #1 {
+define void @use_workitem_id_x() #1 {
+ %val = call i32 @llvm.amdgcn.workitem.id.x()
+ store volatile i32 %val, i32 addrspace(1)* undef
+ ret void
+}
+
+; HSA: define void @use_workitem_id_y() #2 {
+define void @use_workitem_id_y() #1 {
+ %val = call i32 @llvm.amdgcn.workitem.id.y()
+ store volatile i32 %val, i32 addrspace(1)* undef
+ ret void
+}
+
+; HSA: define void @use_workitem_id_z() #3 {
+define void @use_workitem_id_z() #1 {
+ %val = call i32 @llvm.amdgcn.workitem.id.z()
+ store volatile i32 %val, i32 addrspace(1)* undef
+ ret void
+}
+
+; HSA: define void @use_workgroup_id_x() #4 {
+define void @use_workgroup_id_x() #1 {
+ %val = call i32 @llvm.amdgcn.workgroup.id.x()
+ store volatile i32 %val, i32 addrspace(1)* undef
+ ret void
+}
+
+; HSA: define void @use_workgroup_id_y() #5 {
+define void @use_workgroup_id_y() #1 {
+ %val = call i32 @llvm.amdgcn.workgroup.id.y()
+ store volatile i32 %val, i32 addrspace(1)* undef
+ ret void
+}
+
+; HSA: define void @use_workgroup_id_z() #6 {
+define void @use_workgroup_id_z() #1 {
+ %val = call i32 @llvm.amdgcn.workgroup.id.z()
+ store volatile i32 %val, i32 addrspace(1)* undef
+ ret void
+}
+
+; HSA: define void @use_dispatch_ptr() #7 {
+define void @use_dispatch_ptr() #1 {
+ %dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
+ store volatile i8 addrspace(2)* %dispatch.ptr, i8 addrspace(2)* addrspace(1)* undef
+ ret void
+}
+
+; HSA: define void @use_queue_ptr() #8 {
+define void @use_queue_ptr() #1 {
+ %queue.ptr = call i8 addrspace(2)* @llvm.amdgcn.queue.ptr()
+ store volatile i8 addrspace(2)* %queue.ptr, i8 addrspace(2)* addrspace(1)* undef
+ ret void
+}
+
+; HSA: define void @use_dispatch_id() #9 {
+define void @use_dispatch_id() #1 {
+ %val = call i64 @llvm.amdgcn.dispatch.id()
+ store volatile i64 %val, i64 addrspace(1)* undef
+ ret void
+}
+
+; HSA: define void @use_workgroup_id_y_workgroup_id_z() #10 {
+define void @use_workgroup_id_y_workgroup_id_z() #1 {
+ %val0 = call i32 @llvm.amdgcn.workgroup.id.y()
+ %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
+ store volatile i32 %val0, i32 addrspace(1)* undef
+ store volatile i32 %val1, i32 addrspace(1)* undef
+ ret void
+}
+
+; HSA: define void @func_indirect_use_workitem_id_x() #1 {
+define void @func_indirect_use_workitem_id_x() #1 {
+ call void @use_workitem_id_x()
+ ret void
+}
+
+; HSA: define void @kernel_indirect_use_workitem_id_x() #1 {
+define void @kernel_indirect_use_workitem_id_x() #1 {
+ call void @use_workitem_id_x()
+ ret void
+}
+
+; HSA: define void @func_indirect_use_workitem_id_y() #2 {
+define void @func_indirect_use_workitem_id_y() #1 {
+ call void @use_workitem_id_y()
+ ret void
+}
+
+; HSA: define void @func_indirect_use_workitem_id_z() #3 {
+define void @func_indirect_use_workitem_id_z() #1 {
+ call void @use_workitem_id_z()
+ ret void
+}
+
+; HSA: define void @func_indirect_use_workgroup_id_x() #4 {
+define void @func_indirect_use_workgroup_id_x() #1 {
+ call void @use_workgroup_id_x()
+ ret void
+}
+
+; HSA: define void @kernel_indirect_use_workgroup_id_x() #4 {
+define void @kernel_indirect_use_workgroup_id_x() #1 {
+ call void @use_workgroup_id_x()
+ ret void
+}
+
+; HSA: define void @func_indirect_use_workgroup_id_y() #5 {
+define void @func_indirect_use_workgroup_id_y() #1 {
+ call void @use_workgroup_id_y()
+ ret void
+}
+
+; HSA: define void @func_indirect_use_workgroup_id_z() #6 {
+define void @func_indirect_use_workgroup_id_z() #1 {
+ call void @use_workgroup_id_z()
+ ret void
+}
+
+; HSA: define void @func_indirect_indirect_use_workgroup_id_y() #5 {
+define void @func_indirect_indirect_use_workgroup_id_y() #1 {
+ call void @func_indirect_use_workgroup_id_y()
+ ret void
+}
+
+; HSA: define void @indirect_x2_use_workgroup_id_y() #5 {
+define void @indirect_x2_use_workgroup_id_y() #1 {
+ call void @func_indirect_indirect_use_workgroup_id_y()
+ ret void
+}
+
+; HSA: define void @func_indirect_use_dispatch_ptr() #7 {
+define void @func_indirect_use_dispatch_ptr() #1 {
+ call void @use_dispatch_ptr()
+ ret void
+}
+
+; HSA: define void @func_indirect_use_queue_ptr() #8 {
+define void @func_indirect_use_queue_ptr() #1 {
+ call void @use_queue_ptr()
+ ret void
+}
+
+; HSA: define void @func_indirect_use_dispatch_id() #9 {
+define void @func_indirect_use_dispatch_id() #1 {
+ call void @use_dispatch_id()
+ ret void
+}
+
+; HSA: define void @func_indirect_use_workgroup_id_y_workgroup_id_z() #11 {
+define void @func_indirect_use_workgroup_id_y_workgroup_id_z() #1 {
+ call void @func_indirect_use_workgroup_id_y_workgroup_id_z()
+ ret void
+}
+
+; HSA: define void @recursive_use_workitem_id_y() #2 {
+define void @recursive_use_workitem_id_y() #1 {
+ %val = call i32 @llvm.amdgcn.workitem.id.y()
+ store volatile i32 %val, i32 addrspace(1)* undef
+ call void @recursive_use_workitem_id_y()
+ ret void
+}
+
+; HSA: define void @call_recursive_use_workitem_id_y() #2 {
+define void @call_recursive_use_workitem_id_y() #1 {
+ call void @recursive_use_workitem_id_y()
+ ret void
+}
+
+; HSA: define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #8 {
+define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 {
+ %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
+ store volatile i32 0, i32 addrspace(4)* %stof
+ ret void
+}
+
+; HSA: define void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* %ptr) #12 {
+define void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* %ptr) #2 {
+ %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
+ store volatile i32 0, i32 addrspace(4)* %stof
+ ret void
+}
+
+; HSA: define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* %ptr) #13 {
+define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* %ptr) #2 {
+ %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
+ store volatile i32 0, i32 addrspace(4)* %stof
+ call void @func_indirect_use_queue_ptr()
+ ret void
+}
+
+; HSA: define void @indirect_use_group_to_flat_addrspacecast() #8 {
+define void @indirect_use_group_to_flat_addrspacecast() #1 {
+ call void @use_group_to_flat_addrspacecast(i32 addrspace(3)* null)
+ ret void
+}
+
+; HSA: define void @indirect_use_group_to_flat_addrspacecast_gfx9() #11 {
+define void @indirect_use_group_to_flat_addrspacecast_gfx9() #1 {
+ call void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* null)
+ ret void
+}
+
+; HSA: define void @indirect_use_group_to_flat_addrspacecast_queue_ptr_gfx9() #8 {
+define void @indirect_use_group_to_flat_addrspacecast_queue_ptr_gfx9() #1 {
+ call void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* null)
+ ret void
+}
+
+; HSA: define void @use_kernarg_segment_ptr() #14 {
+define void @use_kernarg_segment_ptr() #1 {
+ %kernarg.segment.ptr = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
+ store volatile i8 addrspace(2)* %kernarg.segment.ptr, i8 addrspace(2)* addrspace(1)* undef
+ ret void
+}
+
+; HSA: define void @func_indirect_use_kernarg_segment_ptr() #14 {
+define void @func_indirect_use_kernarg_segment_ptr() #1 {
+ call void @use_kernarg_segment_ptr()
+ ret void
+}
+
+; HSA: define void @use_implicitarg_ptr() #14 {
+define void @use_implicitarg_ptr() #1 {
+ %implicitarg.ptr = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
+ store volatile i8 addrspace(2)* %implicitarg.ptr, i8 addrspace(2)* addrspace(1)* undef
+ ret void
+}
+
+; HSA: define void @func_indirect_use_implicitarg_ptr() #14 {
+define void @func_indirect_use_implicitarg_ptr() #1 {
+ call void @use_implicitarg_ptr()
+ ret void
+}
+
+; HSA: declare void @external.func() #15
+declare void @external.func() #3
+
+; HSA: define internal void @defined.func() #15 {
+define internal void @defined.func() #3 {
+ ret void
+}
+
+; HSA: define void @func_call_external() #15 {
+define void @func_call_external() #3 {
+ call void @external.func()
+ ret void
+}
+
+; HSA: define void @func_call_defined() #15 {
+define void @func_call_defined() #3 {
+ call void @defined.func()
+ ret void
+}
+
+; HSA: define void @func_call_asm() #15 {
+define void @func_call_asm() #3 {
+ call void asm sideeffect "", ""() #3
+ ret void
+}
+
+; HSA: define amdgpu_kernel void @kern_call_external() #16 {
+define amdgpu_kernel void @kern_call_external() #3 {
+ call void @external.func()
+ ret void
+}
+
+; HSA: define amdgpu_kernel void @func_kern_defined() #16 {
+define amdgpu_kernel void @func_kern_defined() #3 {
+ call void @defined.func()
+ ret void
+}
+
+attributes #0 = { nounwind readnone speculatable }
+attributes #1 = { nounwind "target-cpu"="fiji" }
+attributes #2 = { nounwind "target-cpu"="gfx900" }
+attributes #3 = { nounwind }
+
+; HSA: attributes #0 = { nounwind readnone speculatable }
+; HSA: attributes #1 = { nounwind "amdgpu-work-item-id-x" "target-cpu"="fiji" }
+; HSA: attributes #2 = { nounwind "amdgpu-work-item-id-y" "target-cpu"="fiji" }
+; HSA: attributes #3 = { nounwind "amdgpu-work-item-id-z" "target-cpu"="fiji" }
+; HSA: attributes #4 = { nounwind "amdgpu-work-group-id-x" "target-cpu"="fiji" }
+; HSA: attributes #5 = { nounwind "amdgpu-work-group-id-y" "target-cpu"="fiji" }
+; HSA: attributes #6 = { nounwind "amdgpu-work-group-id-z" "target-cpu"="fiji" }
+; HSA: attributes #7 = { nounwind "amdgpu-dispatch-ptr" "target-cpu"="fiji" }
+; HSA: attributes #8 = { nounwind "amdgpu-queue-ptr" "target-cpu"="fiji" }
+; HSA: attributes #9 = { nounwind "amdgpu-dispatch-id" "target-cpu"="fiji" }
+; HSA: attributes #10 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "target-cpu"="fiji" }
+; HSA: attributes #11 = { nounwind "target-cpu"="fiji" }
+; HSA: attributes #12 = { nounwind "target-cpu"="gfx900" }
+; HSA: attributes #13 = { nounwind "amdgpu-queue-ptr" "target-cpu"="gfx900" }
+; HSA: attributes #14 = { nounwind "amdgpu-kernarg-segment-ptr" "target-cpu"="fiji" }
+; HSA: attributes #15 = { nounwind }
+; HSA: attributes #16 = { nounwind "amdgpu-flat-scratch" }
diff --git a/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll b/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
index f7461b925ca1..3059a95a5098 100644
--- a/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
+++ b/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
@@ -10,6 +10,7 @@ declare i32 @llvm.amdgcn.workitem.id.z() #0
declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
declare i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
+declare i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
; HSA: define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
@@ -164,6 +165,15 @@ define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #1 {
ret void
}
+; HSA: define amdgpu_kernel void @use_kernarg_segment_ptr(i32 addrspace(1)* %ptr) #12 {
+define amdgpu_kernel void @use_kernarg_segment_ptr(i32 addrspace(1)* %ptr) #1 {
+ %dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
+ %bc = bitcast i8 addrspace(2)* %dispatch.ptr to i32 addrspace(2)*
+ %val = load i32, i32 addrspace(2)* %bc
+ store i32 %val, i32 addrspace(1)* %ptr
+ ret void
+}
+
; HSA: define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #11 {
define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 {
%stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
@@ -236,3 +246,4 @@ attributes #1 = { nounwind }
; HSA: attributes #9 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
; HSA: attributes #10 = { nounwind "amdgpu-dispatch-ptr" }
; HSA: attributes #11 = { nounwind "amdgpu-queue-ptr" }
+; HSA: attributes #12 = { nounwind "amdgpu-kernarg-segment-ptr" }
diff --git a/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll b/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll
index 63a6f6a8d32c..a0694fb1e3c9 100644
--- a/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll
+++ b/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll
@@ -36,7 +36,7 @@ attributes #2 = {"amdgpu-flat-work-group-size"="128,128"}
; CHECK-LABEL: {{^}}min_1024_max_2048
; CHECK: SGPRBlocks: 1
; CHECK: VGPRBlocks: 7
-; CHECK: NumSGPRsForWavesPerEU: 13
+; CHECK: NumSGPRsForWavesPerEU: 12
; CHECK: NumVGPRsForWavesPerEU: 32
@var = addrspace(1) global float 0.0
define amdgpu_kernel void @min_1024_max_2048() #3 {
diff --git a/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll b/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll
index 3dda73bc336e..a5e97205de21 100644
--- a/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll
+++ b/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll
@@ -118,7 +118,7 @@ attributes #8 = {"amdgpu-waves-per-eu"="5,10"}
; CHECK-LABEL: {{^}}exactly_10:
; CHECK: SGPRBlocks: 1
; CHECK: VGPRBlocks: 5
-; CHECK: NumSGPRsForWavesPerEU: 13
+; CHECK: NumSGPRsForWavesPerEU: 12
; CHECK: NumVGPRsForWavesPerEU: 24
define amdgpu_kernel void @exactly_10() #9 {
%val0 = load volatile float, float addrspace(1)* @var
@@ -188,3 +188,15 @@ define amdgpu_kernel void @exactly_10() #9 {
ret void
}
attributes #9 = {"amdgpu-waves-per-eu"="10,10"}
+
+; Exactly 256 workitems and exactly 2 waves.
+; CHECK-LABEL: {{^}}empty_workitems_exactly_256_waves_exactly_2:
+; CHECK: SGPRBlocks: 12
+; CHECK: VGPRBlocks: 21
+; CHECK: NumSGPRsForWavesPerEU: 102
+; CHECK: NumVGPRsForWavesPerEU: 85
+define amdgpu_kernel void @empty_workitems_exactly_256_waves_exactly_2() #10 {
+entry:
+ ret void
+}
+attributes #10 = {"amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="2,2"}
diff --git a/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll b/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
index 5383bbe71ae3..5ffa45595e70 100644
--- a/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
+++ b/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
@@ -347,7 +347,9 @@ define amdgpu_kernel void @test_fold_canonicalize_qNaN_value_f32(float addrspace
}
; GCN-LABEL: test_fold_canonicalize_minnum_value_from_load_f32:
-; GCN: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
+; VI: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
+; GFX9: v_min_f32_e32 [[V:v[0-9]+]], 0, v{{[0-9]+}}
+; GFX9: flat_store_dword v[{{[0-9:]+}}], [[V]]
define amdgpu_kernel void @test_fold_canonicalize_minnum_value_from_load_f32(float addrspace(1)* %arg) {
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
@@ -388,9 +390,11 @@ define amdgpu_kernel void @test_fold_canonicalize_sNaN_value_f32(float addrspace
}
; GCN-LABEL: test_fold_canonicalize_denorm_value_f32:
-; GCN: v_min_f32_e32 [[V0:v[0-9]+]], 0x7fffff, v{{[0-9]+}}
-; GCN: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[V0]]
-; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
+; GFX9: v_min_f32_e32 [[V:v[0-9]+]], 0x7fffff, v{{[0-9]+}}
+; VI: v_min_f32_e32 [[V0:v[0-9]+]], 0x7fffff, v{{[0-9]+}}
+; VI: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[V0]]
+; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
+; GFX9-NOT: 1.0
define amdgpu_kernel void @test_fold_canonicalize_denorm_value_f32(float addrspace(1)* %arg) {
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
@@ -402,9 +406,11 @@ define amdgpu_kernel void @test_fold_canonicalize_denorm_value_f32(float addrspa
}
; GCN-LABEL: test_fold_canonicalize_maxnum_value_from_load_f32:
-; GCN: v_max_f32_e32 [[V0:v[0-9]+]], 0, v{{[0-9]+}}
-; GCN: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[V0]]
+; GFX9: v_max_f32_e32 [[V:v[0-9]+]], 0, v{{[0-9]+}}
+; VI: v_max_f32_e32 [[V0:v[0-9]+]], 0, v{{[0-9]+}}
+; VI: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[V0]]
; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
+; GFX9-NOT: 1.0
define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_from_load_f32(float addrspace(1)* %arg) {
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
@@ -465,6 +471,49 @@ entry:
ret float %canonicalized
}
+; GCN-LABEL: {{^}}test_fold_canonicalize_load_nnan_value_f32
+; GFX9-DENORM: flat_load_dword [[V:v[0-9]+]],
+; GFX9-DENORM: flat_store_dword v[{{[0-9:]+}}], [[V]]
+; GFX9-DENORM-NOT: 1.0
+; GCN-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
+define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f32(float addrspace(1)* %arg, float addrspace(1)* %out) #1 {
+ %id = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
+ %v = load float, float addrspace(1)* %gep, align 4
+ %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
+ %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id
+ store float %canonicalized, float addrspace(1)* %gep2, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_fold_canonicalize_load_nnan_value_f64
+; GCN: flat_load_dwordx2 [[V:v\[[0-9:]+\]]],
+; GCN: flat_store_dwordx2 v[{{[0-9:]+}}], [[V]]
+; GCN-NOT: 1.0
+define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f64(double addrspace(1)* %arg, double addrspace(1)* %out) #1 {
+ %id = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id
+ %v = load double, double addrspace(1)* %gep, align 8
+ %canonicalized = tail call double @llvm.canonicalize.f64(double %v)
+ %gep2 = getelementptr inbounds double, double addrspace(1)* %out, i32 %id
+ store double %canonicalized, double addrspace(1)* %gep2, align 8
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_fold_canonicalize_load_nnan_value_f16
+; GCN: flat_load_ushort [[V:v[0-9]+]],
+; GCN: flat_store_short v[{{[0-9:]+}}], [[V]]
+; GCN-NOT: 1.0
+define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f16(half addrspace(1)* %arg, half addrspace(1)* %out) #1 {
+ %id = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
+ %v = load half, half addrspace(1)* %gep, align 2
+ %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
+ %gep2 = getelementptr inbounds half, half addrspace(1)* %out, i32 %id
+ store half %canonicalized, half addrspace(1)* %gep2, align 2
+ ret void
+}
+
declare float @llvm.canonicalize.f32(float) #0
declare double @llvm.canonicalize.f64(double) #0
declare half @llvm.canonicalize.f16(half) #0
@@ -485,3 +534,4 @@ declare float @llvm.maxnum.f32(float, float) #0
declare double @llvm.maxnum.f64(double, double) #0
attributes #0 = { nounwind readnone }
+attributes #1 = { "no-nans-fp-math"="true" }
diff --git a/test/CodeGen/AMDGPU/function-args.ll b/test/CodeGen/AMDGPU/function-args.ll
index 9b1368493ba5..6b22cb0b7e28 100644
--- a/test/CodeGen/AMDGPU/function-args.ll
+++ b/test/CodeGen/AMDGPU/function-args.ll
@@ -34,6 +34,22 @@ define void @void_func_i1_signext(i1 signext %arg0) #0 {
ret void
}
+; GCN-LABEL: {{^}}i1_arg_i1_use:
+; GCN: v_and_b32_e32 v0, 1, v0
+; GCN: v_cmp_eq_u32_e32 vcc, 1, v0
+; GCN: s_xor_b64 s{{\[[0-9]+:[0-9]+\]}}, vcc, -1
+define void @i1_arg_i1_use(i1 %arg) #0 {
+bb:
+ br i1 %arg, label %bb2, label %bb1
+
+bb1:
+ store volatile i32 0, i32 addrspace(1)* undef
+ br label %bb2
+
+bb2:
+ ret void
+}
+
; GCN-LABEL: {{^}}void_func_i8:
; GCN-NOT: v0
; GCN: buffer_store_byte v0, off
diff --git a/test/CodeGen/AMDGPU/hsa.ll b/test/CodeGen/AMDGPU/hsa.ll
index 972fbd66ef37..0b19fbe7d70c 100644
--- a/test/CodeGen/AMDGPU/hsa.ll
+++ b/test/CodeGen/AMDGPU/hsa.ll
@@ -40,7 +40,7 @@
; HSA-CI: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
; HSA-VI: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
-; HSA: .amdgpu_hsa_kernel simple
+; HSA-LABEL: .amdgpu_hsa_kernel simple
; HSA: {{^}}simple:
; HSA: .amd_kernel_code_t
; HSA: enable_sgpr_private_segment_buffer = 1
@@ -65,3 +65,11 @@ entry:
store i32 0, i32 addrspace(1)* %out
ret void
}
+
+; HSA-LABEL: .amdgpu_hsa_kernel simple_no_kernargs
+; HSA: enable_sgpr_kernarg_segment_ptr = 0
+define amdgpu_kernel void @simple_no_kernargs() {
+entry:
+ store volatile i32 0, i32 addrspace(1)* undef
+ ret void
+}
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
index 9a27809f37bb..70e6b408ca29 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
@@ -49,6 +49,18 @@ define amdgpu_kernel void @test_implicit_alignment(i32 addrspace(1)* %out, <2 x
ret void
}
+; ALL-LABEL: {{^}}test_no_kernargs:
+; HSA: enable_sgpr_kernarg_segment_ptr = 1
+; HSA: s_load_dword s{{[0-9]+}}, s[4:5]
+define amdgpu_kernel void @test_no_kernargs() #1 {
+ %kernarg.segment.ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
+ %header.ptr = bitcast i8 addrspace(2)* %kernarg.segment.ptr to i32 addrspace(2)*
+ %gep = getelementptr i32, i32 addrspace(2)* %header.ptr, i64 10
+ %value = load i32, i32 addrspace(2)* %gep
+ store volatile i32 %value, i32 addrspace(1)* undef
+ ret void
+}
+
declare i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
declare i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr() #0
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll
index f0af876567b4..1c3cba8d3e4f 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
; CHECK-LABEL: {{^}}test1:
; CHECK: v_cndmask_b32_e64 v0, 0, 1, exec
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll
index ee58d359a935..a466671d8c55 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
; CHECK-LABEL: {{^}}test1:
; CHECK: image_store
diff --git a/test/CodeGen/AMDGPU/move-to-valu-worklist.ll b/test/CodeGen/AMDGPU/move-to-valu-worklist.ll
new file mode 100644
index 000000000000..539eed92d540
--- /dev/null
+++ b/test/CodeGen/AMDGPU/move-to-valu-worklist.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
+
+; In moveToVALU(), move to vector ALU is performed, all instrs in
+; the use chain will be visited. We do not want the same node to be
+; pushed to the visit worklist more than once.
+
+; GCN-LABEL: {{^}}in_worklist_once:
+; GCN: buffer_load_dword
+; GCN: BB0_1:
+; GCN: v_xor_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+; GCN-NEXT: v_xor_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+; GCN: v_and_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+; GCN-NEXT: v_and_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+define amdgpu_kernel void @in_worklist_once() #0 {
+bb:
+ %tmp = load i64, i64* undef
+br label %bb1
+
+bb1: ; preds = %bb1, %bb
+ %tmp2 = phi i64 [ undef, %bb ], [ %tmp16, %bb1 ]
+ %tmp3 = phi i64 [ %tmp, %bb ], [ undef, %bb1 ]
+ %tmp11 = shl i64 %tmp2, 14
+ %tmp13 = xor i64 %tmp11, %tmp2
+ %tmp15 = and i64 %tmp3, %tmp13
+ %tmp16 = xor i64 %tmp15, %tmp3
+br label %bb1
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AMDGPU/mubuf-offset-private.ll b/test/CodeGen/AMDGPU/mubuf-offset-private.ll
index 3a0605fa182a..742c4f8af85d 100644
--- a/test/CodeGen/AMDGPU/mubuf-offset-private.ll
+++ b/test/CodeGen/AMDGPU/mubuf-offset-private.ll
@@ -5,42 +5,42 @@
; Test addressing modes when the scratch base is not a frame index.
; GCN-LABEL: {{^}}store_private_offset_i8:
-; GCN: buffer_store_byte v{{[0-9]+}}, off, s[4:7], s8 offset:8
+; GCN: buffer_store_byte v{{[0-9]+}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @store_private_offset_i8() #0 {
store volatile i8 5, i8* inttoptr (i32 8 to i8*)
ret void
}
; GCN-LABEL: {{^}}store_private_offset_i16:
-; GCN: buffer_store_short v{{[0-9]+}}, off, s[4:7], s8 offset:8
+; GCN: buffer_store_short v{{[0-9]+}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @store_private_offset_i16() #0 {
store volatile i16 5, i16* inttoptr (i32 8 to i16*)
ret void
}
; GCN-LABEL: {{^}}store_private_offset_i32:
-; GCN: buffer_store_dword v{{[0-9]+}}, off, s[4:7], s8 offset:8
+; GCN: buffer_store_dword v{{[0-9]+}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @store_private_offset_i32() #0 {
store volatile i32 5, i32* inttoptr (i32 8 to i32*)
ret void
}
; GCN-LABEL: {{^}}store_private_offset_v2i32:
-; GCN: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s8 offset:8
+; GCN: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @store_private_offset_v2i32() #0 {
store volatile <2 x i32> <i32 5, i32 10>, <2 x i32>* inttoptr (i32 8 to <2 x i32>*)
ret void
}
; GCN-LABEL: {{^}}store_private_offset_v4i32:
-; GCN: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s8 offset:8
+; GCN: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @store_private_offset_v4i32() #0 {
store volatile <4 x i32> <i32 5, i32 10, i32 15, i32 0>, <4 x i32>* inttoptr (i32 8 to <4 x i32>*)
ret void
}
; GCN-LABEL: {{^}}load_private_offset_i8:
-; GCN: buffer_load_ubyte v{{[0-9]+}}, off, s[4:7], s8 offset:8
+; GCN: buffer_load_ubyte v{{[0-9]+}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @load_private_offset_i8() #0 {
%load = load volatile i8, i8* inttoptr (i32 8 to i8*)
ret void
@@ -65,7 +65,7 @@ define amdgpu_kernel void @zextload_private_offset_i8(i32 addrspace(1)* %out) #0
}
; GCN-LABEL: {{^}}load_private_offset_i16:
-; GCN: buffer_load_ushort v{{[0-9]+}}, off, s[4:7], s8 offset:8
+; GCN: buffer_load_ushort v{{[0-9]+}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @load_private_offset_i16() #0 {
%load = load volatile i16, i16* inttoptr (i32 8 to i16*)
ret void
@@ -90,28 +90,28 @@ define amdgpu_kernel void @zextload_private_offset_i16(i32 addrspace(1)* %out) #
}
; GCN-LABEL: {{^}}load_private_offset_i32:
-; GCN: buffer_load_dword v{{[0-9]+}}, off, s[4:7], s8 offset:8
+; GCN: buffer_load_dword v{{[0-9]+}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @load_private_offset_i32() #0 {
%load = load volatile i32, i32* inttoptr (i32 8 to i32*)
ret void
}
; GCN-LABEL: {{^}}load_private_offset_v2i32:
-; GCN: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s8 offset:8
+; GCN: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @load_private_offset_v2i32() #0 {
%load = load volatile <2 x i32>, <2 x i32>* inttoptr (i32 8 to <2 x i32>*)
ret void
}
; GCN-LABEL: {{^}}load_private_offset_v4i32:
-; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s8 offset:8
+; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s2 offset:8
define amdgpu_kernel void @load_private_offset_v4i32() #0 {
%load = load volatile <4 x i32>, <4 x i32>* inttoptr (i32 8 to <4 x i32>*)
ret void
}
; GCN-LABEL: {{^}}store_private_offset_i8_max_offset:
-; GCN: buffer_store_byte v{{[0-9]+}}, off, s[4:7], s8 offset:4095
+; GCN: buffer_store_byte v{{[0-9]+}}, off, s[4:7], s2 offset:4095
define amdgpu_kernel void @store_private_offset_i8_max_offset() #0 {
store volatile i8 5, i8* inttoptr (i32 4095 to i8*)
ret void
@@ -119,7 +119,7 @@ define amdgpu_kernel void @store_private_offset_i8_max_offset() #0 {
; GCN-LABEL: {{^}}store_private_offset_i8_max_offset_plus1:
; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1000
-; GCN: buffer_store_byte v{{[0-9]+}}, [[OFFSET]], s[4:7], s8 offen{{$}}
+; GCN: buffer_store_byte v{{[0-9]+}}, [[OFFSET]], s[4:7], s2 offen{{$}}
define amdgpu_kernel void @store_private_offset_i8_max_offset_plus1() #0 {
store volatile i8 5, i8* inttoptr (i32 4096 to i8*)
ret void
@@ -127,7 +127,7 @@ define amdgpu_kernel void @store_private_offset_i8_max_offset_plus1() #0 {
; GCN-LABEL: {{^}}store_private_offset_i8_max_offset_plus2:
; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1000
-; GCN: buffer_store_byte v{{[0-9]+}}, [[OFFSET]], s[4:7], s8 offen offset:1{{$}}
+; GCN: buffer_store_byte v{{[0-9]+}}, [[OFFSET]], s[4:7], s2 offen offset:1{{$}}
define amdgpu_kernel void @store_private_offset_i8_max_offset_plus2() #0 {
store volatile i8 5, i8* inttoptr (i32 4097 to i8*)
ret void
diff --git a/test/CodeGen/AMDGPU/parallelandifcollapse.ll b/test/CodeGen/AMDGPU/parallelandifcollapse.ll
index 190d2b72ebaf..87f37144244e 100644
--- a/test/CodeGen/AMDGPU/parallelandifcollapse.ll
+++ b/test/CodeGen/AMDGPU/parallelandifcollapse.ll
@@ -8,7 +8,7 @@
; CHECK-NEXT: OR_INT
; FIXME: For some reason having the allocas here allowed the flatten cfg pass
-; to do its transfomation, however now that we are using local memory for
+; to do its transformation, however now that we are using local memory for
; allocas, the transformation isn't happening.
define amdgpu_kernel void @_Z9chk1D_512v() #0 {
diff --git a/test/CodeGen/AMDGPU/parallelorifcollapse.ll b/test/CodeGen/AMDGPU/parallelorifcollapse.ll
index 91116b0f65ea..e199d5b5df25 100644
--- a/test/CodeGen/AMDGPU/parallelorifcollapse.ll
+++ b/test/CodeGen/AMDGPU/parallelorifcollapse.ll
@@ -5,7 +5,7 @@
; then merge if-regions with the same bodies.
; FIXME: For some reason having the allocas here allowed the flatten cfg pass
-; to do its transfomation, however now that we are using local memory for
+; to do its transformation, however now that we are using local memory for
; allocas, the transformation isn't happening.
; XFAIL: *
;
diff --git a/test/CodeGen/AMDGPU/private-access-no-objects.ll b/test/CodeGen/AMDGPU/private-access-no-objects.ll
index dcb089010e99..cf0c7944d4cd 100644
--- a/test/CodeGen/AMDGPU/private-access-no-objects.ll
+++ b/test/CodeGen/AMDGPU/private-access-no-objects.ll
@@ -10,14 +10,14 @@
; GCN-LABEL: {{^}}store_to_undef:
; OPT-DAG: s_mov_b64 s{{\[}}[[RSRC_LO:[0-9]+]]:{{[0-9]+\]}}, s[0:1]
; OPT-DAG: s_mov_b64 s{{\[[0-9]+}}:[[RSRC_HI:[0-9]+]]{{\]}}, s[2:3]
-; OPT-DAG: s_mov_b32 [[SOFFSET:s[0-9]+]], s7{{$}}
+; OPT-DAG: s_mov_b32 [[SOFFSET:s[0-9]+]], s5{{$}}
; OPT: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, [[SOFFSET]] offen{{$}}
; -O0 should assume spilling, so the input scratch resource descriptor
; -should be used directly without any copies.
; OPTNONE-NOT: s_mov_b32
-; OPTNONE: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s7 offen{{$}}
+; OPTNONE: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s5 offen{{$}}
define amdgpu_kernel void @store_to_undef() #0 {
store volatile i32 0, i32* undef
ret void
@@ -26,7 +26,7 @@ define amdgpu_kernel void @store_to_undef() #0 {
; GCN-LABEL: {{^}}store_to_inttoptr:
; OPT-DAG: s_mov_b64 s{{\[}}[[RSRC_LO:[0-9]+]]:{{[0-9]+\]}}, s[0:1]
; OPT-DAG: s_mov_b64 s{{\[[0-9]+}}:[[RSRC_HI:[0-9]+]]{{\]}}, s[2:3]
-; OPT-DAG: s_mov_b32 [[SOFFSET:s[0-9]+]], s7{{$}}
+; OPT-DAG: s_mov_b32 [[SOFFSET:s[0-9]+]], s5{{$}}
; OPT: buffer_store_dword v{{[0-9]+}}, off, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, [[SOFFSET]] offset:124{{$}}
define amdgpu_kernel void @store_to_inttoptr() #0 {
store volatile i32 0, i32* inttoptr (i32 124 to i32*)
@@ -36,7 +36,7 @@ define amdgpu_kernel void @store_to_inttoptr() #0 {
; GCN-LABEL: {{^}}load_from_undef:
; OPT-DAG: s_mov_b64 s{{\[}}[[RSRC_LO:[0-9]+]]:{{[0-9]+\]}}, s[0:1]
; OPT-DAG: s_mov_b64 s{{\[[0-9]+}}:[[RSRC_HI:[0-9]+]]{{\]}}, s[2:3]
-; OPT-DAG: s_mov_b32 [[SOFFSET:s[0-9]+]], s7{{$}}
+; OPT-DAG: s_mov_b32 [[SOFFSET:s[0-9]+]], s5{{$}}
; OPT: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, [[SOFFSET]] offen{{$}}
define amdgpu_kernel void @load_from_undef() #0 {
%ld = load volatile i32, i32* undef
@@ -46,7 +46,7 @@ define amdgpu_kernel void @load_from_undef() #0 {
; GCN-LABEL: {{^}}load_from_inttoptr:
; OPT-DAG: s_mov_b64 s{{\[}}[[RSRC_LO:[0-9]+]]:{{[0-9]+\]}}, s[0:1]
; OPT-DAG: s_mov_b64 s{{\[[0-9]+}}:[[RSRC_HI:[0-9]+]]{{\]}}, s[2:3]
-; OPT-DAG: s_mov_b32 [[SOFFSET:s[0-9]+]], s7{{$}}
+; OPT-DAG: s_mov_b32 [[SOFFSET:s[0-9]+]], s5{{$}}
; OPT: buffer_load_dword v{{[0-9]+}}, off, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, [[SOFFSET]] offset:124{{$}}
define amdgpu_kernel void @load_from_inttoptr() #0 {
%ld = load volatile i32, i32* inttoptr (i32 124 to i32*)
diff --git a/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir b/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir
index 770bfaddb23e..a52b80ba86e5 100644
--- a/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir
+++ b/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir
@@ -34,7 +34,7 @@ body: |
bb.0:
successors: %bb.2, %bb.1
- %7 = V_CMP_NEQ_F32_e64 0, 0, 0, undef %3, 0, 0, implicit %exec
+ %7 = V_CMP_NEQ_F32_e64 0, 0, 0, undef %3, 0, implicit %exec
%vcc = COPY killed %7
S_CBRANCH_VCCZ %bb.2, implicit killed %vcc
diff --git a/test/CodeGen/AMDGPU/scratch-simple.ll b/test/CodeGen/AMDGPU/scratch-simple.ll
index 6ed730ad60f4..5e0178072e5e 100644
--- a/test/CodeGen/AMDGPU/scratch-simple.ll
+++ b/test/CodeGen/AMDGPU/scratch-simple.ll
@@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=verde -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=gfx804 -mattr=-flat-for-global -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s
; This used to fail due to a v_add_i32 instruction with an illegal immediate
@@ -8,15 +8,16 @@
;
; GCN-LABEL: {{^}}ps_main:
-; GCN-DAG: s_mov_b32 [[SWO:s[0-9]+]], s0
+; GCN-DAG: s_mov_b32 s4, SCRATCH_RSRC_DWORD0
+; GCN-NOT: s_mov_b32 s0
; GCN-DAG: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0
; GCN-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, [[BYTES]]
; GCN-DAG: v_or_b32_e32 [[LO_OFF:v[0-9]+]], 0x200, [[CLAMP_IDX]]
; GCN-DAG: v_or_b32_e32 [[HI_OFF:v[0-9]+]], 0x400, [[CLAMP_IDX]]
-; GCN: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen
-; GCN: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen
+; GCN: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, s0 offen
+; GCN: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, s0 offen
define amdgpu_ps float @ps_main(i32 %idx) {
%v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
%v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
@@ -25,9 +26,10 @@ define amdgpu_ps float @ps_main(i32 %idx) {
}
; GCN-LABEL: {{^}}vs_main:
-; GCN: s_mov_b32 [[SWO:s[0-9]+]], s0
-; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen
-; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen
+; GCN-DAG: s_mov_b32 s4, SCRATCH_RSRC_DWORD0
+; GCN-NOT: s_mov_b32 s0
+; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s0 offen
+; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s0 offen
define amdgpu_vs float @vs_main(i32 %idx) {
%v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
%v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
@@ -36,9 +38,9 @@ define amdgpu_vs float @vs_main(i32 %idx) {
}
; GCN-LABEL: {{^}}cs_main:
-; GCN: s_mov_b32 [[SWO:s[0-9]+]], s0
-; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen
-; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen
+; GCN-DAG: s_mov_b32 s4, SCRATCH_RSRC_DWORD0
+; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s0 offen
+; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s0 offen
define amdgpu_cs float @cs_main(i32 %idx) {
%v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
%v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
@@ -47,10 +49,15 @@ define amdgpu_cs float @cs_main(i32 %idx) {
}
; GCN-LABEL: {{^}}hs_main:
-; SI: s_mov_b32 [[SWO:s[0-9]+]], s0
-; GFX9: s_mov_b32 [[SWO:s[0-9]+]], s5
-; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen
-; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen
+; SI: s_mov_b32 s4, SCRATCH_RSRC_DWORD0
+; SI-NOT: s_mov_b32 s0
+; SI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s0 offen
+; SI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s0 offen
+
+; GFX9: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; GFX9-NOT: s_mov_b32 s5
+; GFX9: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s5 offen
+; GFX9: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s5 offen
define amdgpu_hs float @hs_main(i32 %idx) {
%v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
%v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
@@ -59,10 +66,13 @@ define amdgpu_hs float @hs_main(i32 %idx) {
}
; GCN-LABEL: {{^}}gs_main:
-; SI: s_mov_b32 [[SWO:s[0-9]+]], s0
-; GFX9: s_mov_b32 [[SWO:s[0-9]+]], s5
-; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen
-; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen
+; SI: s_mov_b32 s4, SCRATCH_RSRC_DWORD0
+; SI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s0 offen
+; SI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s0 offen
+
+; GFX9: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; GFX9: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s5 offen
+; GFX9: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s5 offen
define amdgpu_gs float @gs_main(i32 %idx) {
%v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
%v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
@@ -71,10 +81,16 @@ define amdgpu_gs float @gs_main(i32 %idx) {
}
; GCN-LABEL: {{^}}hs_ir_uses_scratch_offset:
-; SI: s_mov_b32 [[SWO:s[0-9]+]], s6
-; GFX9: s_mov_b32 [[SWO:s[0-9]+]], s5
-; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen
-; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen
+; GCN: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+
+; SI-NOT: s_mov_b32 s6
+; SI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s6 offen
+; SI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s6 offen
+
+; GFX9-NOT: s_mov_b32 s5
+; GFX9: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s5 offen
+; GFX9: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s5 offen
+
; GCN: s_mov_b32 s2, s5
define amdgpu_hs <{i32, i32, i32, float}> @hs_ir_uses_scratch_offset(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg %swo, i32 %idx) {
%v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
@@ -86,10 +102,14 @@ define amdgpu_hs <{i32, i32, i32, float}> @hs_ir_uses_scratch_offset(i32 inreg,
}
; GCN-LABEL: {{^}}gs_ir_uses_scratch_offset:
-; SI: s_mov_b32 [[SWO:s[0-9]+]], s6
-; GFX9: s_mov_b32 [[SWO:s[0-9]+]], s5
-; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen
-; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen
+; GCN: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+
+; SI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s6 offen
+; SI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s6 offen
+
+; GFX9: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s5 offen
+; GFX9: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s5 offen
+
; GCN: s_mov_b32 s2, s5
define amdgpu_gs <{i32, i32, i32, float}> @gs_ir_uses_scratch_offset(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg %swo, i32 %idx) {
%v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
diff --git a/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir b/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir
index 4f5c582f8b58..ff1b2ad73ef0 100644
--- a/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir
+++ b/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir
@@ -332,7 +332,7 @@ body: |
# VI: %vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit %exec
-# VI: %{{[0-9]+}} = V_CMPX_GT_F32_e64 0, 23, 0, killed %{{[0-9]+}}, 0, 0, implicit-def %exec, implicit %exec
+# VI: %{{[0-9]+}} = V_CMPX_GT_F32_e64 0, 23, 0, killed %{{[0-9]+}}, 0, implicit-def %exec, implicit %exec
# VI: %vcc = V_CMP_LT_I32_sdwa 0, %{{[0-9]+}}, 0, %3, 0, 6, 4, implicit-def %vcc, implicit %exec
# VI: %{{[0-9]+}} = V_CMPX_EQ_I32_e64 23, killed %{{[0-9]+}}, implicit-def %exec, implicit %exec
@@ -345,20 +345,21 @@ body: |
# VI: %vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 1, 6, 4, implicit-def %vcc, implicit %exec
-# VI: %vcc = V_CMPX_GT_F32_e64 0, 23, 0, killed %{{[0-9]+}}, 0, 2, implicit-def %exec, implicit %exec
-# VI: %vcc = V_CMP_EQ_F32_e64 0, %{{[0-9]+}}, 0, killed %{{[0-9]+}}, 1, 2, implicit %exec
+# VI: %vcc = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
+# VI: %vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 1, 6, 4, implicit-def %vcc, implicit %exec
# VI: %vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
# VI: %vcc = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
# VI: %vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
-# VI: %vcc = V_CMPX_GT_F32_e64 1, 23, 1, killed %{{[0-9]+}}, 1, 2, implicit-def %exec, implicit %exec
+# VI: %vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
-# GFX9: %vcc = V_CMP_EQ_F32_e64 0, %{{[0-9]+}}, 0, killed %{{[0-9]+}}, 1, 0, implicit %exec
-# GFX9: %vcc = V_CMPX_GT_F32_e64 0, 23, 0, killed %{{[0-9]+}}, 0, 2, implicit-def %exec, implicit %exec
-# GFX9: %vcc = V_CMP_EQ_F32_e64 0, %{{[0-9]+}}, 0, killed %{{[0-9]+}}, 1, 2, implicit %exec
+# GFX9: %vcc = V_CMP_EQ_F32_e64 0, %{{[0-9]+}}, 0, killed %{{[0-9]+}}, 1, implicit %exec
+# GFX9: %vcc = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
+# GFX9: %vcc = V_CMP_EQ_F32_e64 0, %{{[0-9]+}}, 0, killed %{{[0-9]+}}, 1, implicit %exec
# GFX9: %vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
# GFX9: %vcc = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
# GFX9: %vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
-# GFX9: %vcc = V_CMPX_GT_F32_e64 1, 23, 1, killed %{{[0-9]+}}, 1, 2, implicit-def %exec, implicit %exec
+# GFX9: %vcc = V_CMPX_GT_F32_e64 1, 23, 1, killed %{{[0-9]+}}, 1, implicit-def %exec, implicit %exec
+
name: vopc_instructions
@@ -415,28 +416,28 @@ body: |
V_CMPX_EQ_I32_e32 123, killed %13, implicit-def %vcc, implicit-def %exec, implicit %exec
%14 = V_AND_B32_e64 %5, %3, implicit %exec
- %vcc = V_CMP_EQ_F32_e64 0, %6, 0, killed %14, 0, 0, implicit %exec
+ %vcc = V_CMP_EQ_F32_e64 0, %6, 0, killed %14, 0, implicit %exec
%15 = V_AND_B32_e64 %5, %3, implicit %exec
- %18 = V_CMPX_GT_F32_e64 0, 23, 0, killed %15, 0, 0, implicit-def %exec, implicit %exec
+ %18 = V_CMPX_GT_F32_e64 0, 23, 0, killed %15, 0, implicit-def %exec, implicit %exec
%16 = V_AND_B32_e64 %5, %3, implicit %exec
%vcc = V_CMP_LT_I32_e64 %6, killed %16, implicit %exec
%17 = V_AND_B32_e64 %5, %3, implicit %exec
%19 = V_CMPX_EQ_I32_e64 23, killed %17, implicit-def %exec, implicit %exec
%20 = V_AND_B32_e64 %5, %3, implicit %exec
- %vcc = V_CMP_EQ_F32_e64 0, %6, 0, killed %20, 1, 0, implicit %exec
+ %vcc = V_CMP_EQ_F32_e64 0, %6, 0, killed %20, 1, implicit %exec
%21 = V_AND_B32_e64 %5, %3, implicit %exec
- %vcc = V_CMPX_GT_F32_e64 0, 23, 0, killed %21, 0, 2, implicit-def %exec, implicit %exec
+ %vcc = V_CMPX_GT_F32_e64 0, 23, 0, killed %21, 0, implicit-def %exec, implicit %exec
%23 = V_AND_B32_e64 %5, %3, implicit %exec
- %vcc = V_CMP_EQ_F32_e64 0, %6, 0, killed %23, 1, 2, implicit %exec
+ %vcc = V_CMP_EQ_F32_e64 0, %6, 0, killed %23, 1, implicit %exec
%24 = V_AND_B32_e64 %5, %3, implicit %exec
- %vcc = V_CMPX_GT_F32_e64 1, 23, 0, killed %24, 0, 0, implicit-def %exec, implicit %exec
+ %vcc = V_CMPX_GT_F32_e64 1, 23, 0, killed %24, 0, implicit-def %exec, implicit %exec
%25 = V_AND_B32_e64 %5, %3, implicit %exec
- %vcc = V_CMPX_GT_F32_e64 0, 23, 1, killed %25, 0, 0, implicit-def %exec, implicit %exec
+ %vcc = V_CMPX_GT_F32_e64 0, 23, 1, killed %25, 0, implicit-def %exec, implicit %exec
%26 = V_AND_B32_e64 %5, %3, implicit %exec
- %vcc = V_CMPX_GT_F32_e64 1, 23, 1, killed %26, 0, 0, implicit-def %exec, implicit %exec
+ %vcc = V_CMPX_GT_F32_e64 1, 23, 1, killed %26, 0, implicit-def %exec, implicit %exec
%27 = V_AND_B32_e64 %5, %3, implicit %exec
- %vcc = V_CMPX_GT_F32_e64 1, 23, 1, killed %27, 1, 2, implicit-def %exec, implicit %exec
+ %vcc = V_CMPX_GT_F32_e64 1, 23, 1, killed %27, 1, implicit-def %exec, implicit %exec
%100 = V_MOV_B32_e32 %vcc_lo, implicit %exec
diff --git a/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir b/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir
index 913b54332119..bd222adf6a68 100644
--- a/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir
+++ b/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir
@@ -8,7 +8,7 @@
# GCN: %{{[0-9]+}} = V_BCNT_U32_B32_e64 %{{[0-9]+}}, killed %{{[0-9]+}}, implicit-def %vcc, implicit %exec
# GCN: %{{[0-9]+}} = V_BFM_B32_e64 %{{[0-9]+}}, killed %{{[0-9]+}}, implicit-def %vcc, implicit %exec
-# GCN: %{{[0-9]+}} = V_CVT_PKNORM_I16_F32_e64 0, %{{[0-9]+}}, 0, killed %{{[0-9]+}}, 0, 0, implicit-def %vcc, implicit %exec
+# GCN: %{{[0-9]+}} = V_CVT_PKNORM_I16_F32_e64 0, %{{[0-9]+}}, 0, killed %{{[0-9]+}}, 0, implicit-def %vcc, implicit %exec
# GCN: %{{[0-9]+}} = V_READLANE_B32 killed %{{[0-9]+}}, 0, implicit-def %vcc, implicit %exec
---
@@ -50,7 +50,7 @@ body: |
%15 = V_BFM_B32_e64 %13, killed %14, implicit-def %vcc, implicit %exec
%16 = V_LSHRREV_B32_e64 16, %15, implicit %exec
- %17 = V_CVT_PKNORM_I16_F32_e64 0, %15, 0, killed %16, 0, 0, implicit-def %vcc, implicit %exec
+ %17 = V_CVT_PKNORM_I16_F32_e64 0, %15, 0, killed %16, 0, implicit-def %vcc, implicit %exec
%18 = V_LSHRREV_B32_e64 16, %17, implicit %exec
%19 = V_READLANE_B32 killed %18, 0, implicit-def %vcc, implicit %exec
diff --git a/test/CodeGen/AMDGPU/trap.ll b/test/CodeGen/AMDGPU/trap.ll
index 51771c9723e0..04ff4c87ea77 100644
--- a/test/CodeGen/AMDGPU/trap.ll
+++ b/test/CodeGen/AMDGPU/trap.ll
@@ -19,11 +19,11 @@ declare void @llvm.debugtrap() #0
; MESA-TRAP: .section .AMDGPU.config
; MESA-TRAP: .long 47180
-; MESA-TRAP-NEXT: .long 208
+; MESA-TRAP-NEXT: .long 204
; NOMESA-TRAP: .section .AMDGPU.config
; NOMESA-TRAP: .long 47180
-; NOMESA-TRAP-NEXT: .long 144
+; NOMESA-TRAP-NEXT: .long 140
; GCN-LABEL: {{^}}hsa_trap:
; HSA-TRAP: enable_trap_handler = 1
@@ -45,11 +45,11 @@ define amdgpu_kernel void @hsa_trap() {
; MESA-TRAP: .section .AMDGPU.config
; MESA-TRAP: .long 47180
-; MESA-TRAP-NEXT: .long 208
+; MESA-TRAP-NEXT: .long 204
; NOMESA-TRAP: .section .AMDGPU.config
; NOMESA-TRAP: .long 47180
-; NOMESA-TRAP-NEXT: .long 144
+; NOMESA-TRAP-NEXT: .long 140
; GCN-WARNING: warning: <unknown>:0:0: in function hsa_debugtrap void (): debugtrap handler not supported
; GCN-LABEL: {{^}}hsa_debugtrap:
diff --git a/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir b/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir
index 6eb937e71b1b..54991d3d953c 100644
--- a/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir
+++ b/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir
@@ -81,7 +81,7 @@ body: |
%sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
%sgpr7 = S_MOV_B32 61440
%sgpr6 = S_MOV_B32 -1
- %vcc = V_CMP_EQ_F32_e64 0, 0, 0, %sgpr2, 0, 0, implicit %exec
+ %vcc = V_CMP_EQ_F32_e64 0, 0, 0, %sgpr2, 0, implicit %exec
S_CBRANCH_VCCZ %bb.1.else, implicit killed %vcc
bb.2.if:
diff --git a/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll b/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
index 135f02ac205a..feae5e9f3792 100644
--- a/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
+++ b/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
@@ -19,8 +19,9 @@
; HSA: workitem_private_segment_byte_size = 1536
; GCN-NOT: flat_scr
+; MESA-NOT: s_mov_b32 s3
+; HSA-NOT: s_mov_b32 s7
-; GCNMESA-DAG: s_mov_b32 s16, s3
; GCNMESA-DAG: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GCNMESA-DAG: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GCNMESA-DAG: s_mov_b32 s14, -1
@@ -29,17 +30,32 @@
; GFX9MESA-DAG: s_mov_b32 s15, 0xe00000
-; GCN: buffer_store_dword {{v[0-9]+}}, off, s[12:15], s16 offset:{{[0-9]+}} ; 4-byte Folded Spill
+; GCNMESAMESA: buffer_store_dword {{v[0-9]+}}, off, s[12:15], s3 offset:{{[0-9]+}} ; 4-byte Folded Spill
-; GCN: buffer_store_dword {{v[0-9]}}, off, s[12:15], s16 offset:{{[0-9]+}}
-; GCN: buffer_store_dword {{v[0-9]}}, off, s[12:15], s16 offset:{{[0-9]+}}
-; GCN: buffer_store_dword {{v[0-9]}}, off, s[12:15], s16 offset:{{[0-9]+}}
-; GCN: buffer_store_dword {{v[0-9]}}, off, s[12:15], s16 offset:{{[0-9]+}}
+; GCNMESA: buffer_store_dword {{v[0-9]}}, off, s[12:15], s3 offset:{{[0-9]+}}
+; GCNMESA: buffer_store_dword {{v[0-9]}}, off, s[12:15], s3 offset:{{[0-9]+}}
+; GCNMESA: buffer_store_dword {{v[0-9]}}, off, s[12:15], s3 offset:{{[0-9]+}}
+; GCNMESA: buffer_store_dword {{v[0-9]}}, off, s[12:15], s3 offset:{{[0-9]+}}
+
+; GCNMESA: buffer_load_dword {{v[0-9]+}}, off, s[12:15], s3 offset:{{[0-9]+}}
+; GCNMESA: buffer_load_dword {{v[0-9]+}}, off, s[12:15], s3 offset:{{[0-9]+}}
+; GCNMESA: buffer_load_dword {{v[0-9]+}}, off, s[12:15], s3 offset:{{[0-9]+}}
+; GCNMESA: buffer_load_dword {{v[0-9]+}}, off, s[12:15], s3 offset:{{[0-9]+}}
+
+
+
+; HSA: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s7 offset:{{[0-9]+}} ; 4-byte Folded Spill
+
+; HSA: buffer_store_dword {{v[0-9]}}, off, s[0:3], s7 offset:{{[0-9]+}}
+; HSA: buffer_store_dword {{v[0-9]}}, off, s[0:3], s7 offset:{{[0-9]+}}
+; HSA: buffer_store_dword {{v[0-9]}}, off, s[0:3], s7 offset:{{[0-9]+}}
+; HSA: buffer_store_dword {{v[0-9]}}, off, s[0:3], s7 offset:{{[0-9]+}}
+
+; HSA: buffer_load_dword {{v[0-9]+}}, off, s[0:3], s7 offset:{{[0-9]+}}
+; HSA: buffer_load_dword {{v[0-9]+}}, off, s[0:3], s7 offset:{{[0-9]+}}
+; HSA: buffer_load_dword {{v[0-9]+}}, off, s[0:3], s7 offset:{{[0-9]+}}
+; HSA: buffer_load_dword {{v[0-9]+}}, off, s[0:3], s7 offset:{{[0-9]+}}
-; GCN: buffer_load_dword {{v[0-9]+}}, off, s[12:15], s16 offset:{{[0-9]+}}
-; GCN: buffer_load_dword {{v[0-9]+}}, off, s[12:15], s16 offset:{{[0-9]+}}
-; GCN: buffer_load_dword {{v[0-9]+}}, off, s[12:15], s16 offset:{{[0-9]+}}
-; GCN: buffer_load_dword {{v[0-9]+}}, off, s[12:15], s16 offset:{{[0-9]+}}
; GCN: NumVgprs: 256
; GCN: ScratchSize: 1536
diff --git a/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll b/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
index ca2366a361fb..afbd06a00fae 100644
--- a/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
+++ b/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
@@ -13,7 +13,7 @@
; GCN-LABEL: {{^}}main:
-; GCN-DAG: s_mov_b32 s[[OFFREG:[0-9]+]], s12
+; GCN-NOT: s_mov_b32 s12
; GCN-DAG: s_mov_b32 s[[DESC0:[0-9]+]], SCRATCH_RSRC_DWORD0
; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
; GCN-DAG: s_mov_b32 s{{[0-9]+}}, -1
@@ -22,8 +22,8 @@
; GFX9-DAG: s_mov_b32 s[[DESC3:[0-9]+]], 0xe00000
; OFFREG is offset system SGPR
-; GCN: buffer_store_dword {{v[0-9]+}}, off, s{{\[}}[[DESC0]]:[[DESC3]]], s[[OFFREG]] offset:{{[0-9]+}} ; 4-byte Folded Spill
-; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[}}[[DESC0]]:[[DESC3]]], s[[OFFREG]] offset:{{[0-9]+}} ; 4-byte Folded Reload
+; GCN: buffer_store_dword {{v[0-9]+}}, off, s{{\[}}[[DESC0]]:[[DESC3]]], s12 offset:{{[0-9]+}} ; 4-byte Folded Spill
+; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[}}[[DESC0]]:[[DESC3]]], s12 offset:{{[0-9]+}} ; 4-byte Folded Reload
; GCN: NumVgprs: 256
; GCN: ScratchSize: 1536