diff options
Diffstat (limited to 'test/CodeGen/AMDGPU/frem.ll')
-rw-r--r-- | test/CodeGen/AMDGPU/frem.ll | 25 |
1 files changed, 12 insertions, 13 deletions
diff --git a/test/CodeGen/AMDGPU/frem.ll b/test/CodeGen/AMDGPU/frem.ll index 039623c02194..9778069d0477 100644 --- a/test/CodeGen/AMDGPU/frem.ll +++ b/test/CodeGen/AMDGPU/frem.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -enable-misched < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=bonaire -enable-misched < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -enable-misched < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}frem_f32: ; GCN-DAG: buffer_load_dword [[X:v[0-9]+]], {{.*$}} @@ -12,10 +12,10 @@ ; GCN: v_mul_f32_e32 ; GCN: v_div_fmas_f32 ; GCN: v_div_fixup_f32 -; GCN: v_trunc_f32_e32 -; GCN: v_mad_f32 +; GCN: v_trunc_f32_e32 v{{[0-9]+}}, v{{[0-9]+}} +; GCN: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; GCN: s_endpgm -define void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1, +define amdgpu_kernel void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) #0 { %gep2 = getelementptr float, float addrspace(1)* %in2, i32 4 %r0 = load float, float addrspace(1)* %in1, align 4 @@ -33,8 +33,7 @@ define void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1, ; GCN: v_trunc_f32_e32 [[TRUNC:v[0-9]+]], [[DIV]] ; GCN: v_mad_f32 [[RESULT:v[0-9]+]], -[[TRUNC]], [[Y]], [[X]] ; GCN: buffer_store_dword [[RESULT]] -; GCN: s_endpgm -define void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1, +define amdgpu_kernel void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) #1 { %gep2 = getelementptr float, float addrspace(1)* %in2, i32 4 %r0 = load float, float addrspace(1)* %in1, align 4 @@ -55,7 +54,7 @@ define void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1, ; GCN: v_add_f64 ; GCN: buffer_store_dwordx2 ; GCN: s_endpgm -define void @frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1, +define amdgpu_kernel void @frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) #0 { %r0 = load double, double addrspace(1)* %in1, align 8 %r1 = load double, double addrspace(1)* %in2, align 8 @@ -71,7 +70,7 @@ define void @frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1, ; CI: v_trunc_f64_e32 ; GCN: v_fma_f64 ; GCN: s_endpgm -define void @unsafe_frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1, +define amdgpu_kernel void @unsafe_frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) #1 { %r0 = load double, double addrspace(1)* %in1, align 8 %r1 = load double, double addrspace(1)* %in2, align 8 @@ -80,7 +79,7 @@ define void @unsafe_frem_f64(double addrspace(1)* %out, double addrspace(1)* %in ret void } -define void @frem_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in1, +define amdgpu_kernel void @frem_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in1, <2 x float> addrspace(1)* %in2) #0 { %gep2 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in2, i32 4 %r0 = load <2 x float>, <2 x float> addrspace(1)* %in1, align 8 @@ -90,7 +89,7 @@ define void @frem_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1) ret void } -define void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in1, +define amdgpu_kernel void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in1, <4 x float> addrspace(1)* %in2) #0 { %gep2 = getelementptr <4 x float>, <4 x float> addrspace(1)* %in2, i32 4 %r0 = load <4 x float>, <4 x float> addrspace(1)* %in1, align 16 @@ -100,7 +99,7 @@ define void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1) ret void } -define void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1, +define amdgpu_kernel void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1, <2 x double> addrspace(1)* %in2) #0 { %gep2 = getelementptr <2 x double>, <2 x double> addrspace(1)* %in2, i32 4 %r0 = load <2 x double>, <2 x double> addrspace(1)* %in1, align 16 |