diff options
Diffstat (limited to 'test/CodeGen/NVPTX')
29 files changed, 256 insertions, 68 deletions
diff --git a/test/CodeGen/NVPTX/annotations.ll b/test/CodeGen/NVPTX/annotations.ll index 39d52d382663..2341377d75a6 100644 --- a/test/CodeGen/NVPTX/annotations.ll +++ b/test/CodeGen/NVPTX/annotations.ll @@ -33,21 +33,14 @@ define void @kernel_func_minctasm(float* %a) { !nvvm.annotations = !{!1, !2, !3, !4, !5, !6, !7, !8} -!1 = metadata !{void (float*)* @kernel_func_maxntid, metadata !"kernel", i32 1} -!2 = metadata !{void (float*)* @kernel_func_maxntid, - metadata !"maxntidx", i32 10, - metadata !"maxntidy", i32 20, - metadata !"maxntidz", i32 30} - -!3 = metadata !{void (float*)* @kernel_func_reqntid, metadata !"kernel", i32 1} -!4 = metadata !{void (float*)* @kernel_func_reqntid, - metadata !"reqntidx", i32 11, - metadata !"reqntidy", i32 22, - metadata !"reqntidz", i32 33} - -!5 = metadata !{void (float*)* @kernel_func_minctasm, metadata !"kernel", i32 1} -!6 = metadata !{void (float*)* @kernel_func_minctasm, - metadata !"minctasm", i32 42} - -!7 = metadata !{i64 addrspace(1)* @texture, metadata !"texture", i32 1} -!8 = metadata !{i64 addrspace(1)* @surface, metadata !"surface", i32 1} +!1 = !{void (float*)* @kernel_func_maxntid, !"kernel", i32 1} +!2 = !{void (float*)* @kernel_func_maxntid, !"maxntidx", i32 10, !"maxntidy", i32 20, !"maxntidz", i32 30} + +!3 = !{void (float*)* @kernel_func_reqntid, !"kernel", i32 1} +!4 = !{void (float*)* @kernel_func_reqntid, !"reqntidx", i32 11, !"reqntidy", i32 22, !"reqntidz", i32 33} + +!5 = !{void (float*)* @kernel_func_minctasm, !"kernel", i32 1} +!6 = !{void (float*)* @kernel_func_minctasm, !"minctasm", i32 42} + +!7 = !{i64 addrspace(1)* @texture, !"texture", i32 1} +!8 = !{i64 addrspace(1)* @surface, !"surface", i32 1} diff --git a/test/CodeGen/NVPTX/bug21465.ll b/test/CodeGen/NVPTX/bug21465.ll new file mode 100644 index 000000000000..cacffceac517 --- /dev/null +++ b/test/CodeGen/NVPTX/bug21465.ll @@ -0,0 +1,24 @@ +; RUN: opt < %s -nvptx-lower-struct-args -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" +target triple = "nvptx64-unknown-unknown" + +%struct.S = type { i32, i32 } + +; Function Attrs: nounwind +define void @_Z11TakesStruct1SPi(%struct.S* byval nocapture readonly %input, i32* nocapture %output) #0 { +entry: +; CHECK-LABEL @_Z22TakesStruct1SPi +; CHECK: bitcast %struct.S* %input to i8* +; CHECK: call i8 addrspace(101)* @llvm.nvvm.ptr.gen.to.param.p101i8.p0i8 + %b = getelementptr inbounds %struct.S* %input, i64 0, i32 1 + %0 = load i32* %b, align 4 + store i32 %0, i32* %output, align 4 + ret void +} + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!nvvm.annotations = !{!0} + +!0 = !{void (%struct.S*, i32*)* @_Z11TakesStruct1SPi, !"kernel", i32 1} diff --git a/test/CodeGen/NVPTX/call-with-alloca-buffer.ll b/test/CodeGen/NVPTX/call-with-alloca-buffer.ll index 83d491637041..8483112381f1 100644 --- a/test/CodeGen/NVPTX/call-with-alloca-buffer.ll +++ b/test/CodeGen/NVPTX/call-with-alloca-buffer.ll @@ -63,4 +63,4 @@ declare void @callee(float*, i8*) !nvvm.annotations = !{!0} -!0 = metadata !{void (float*)* @kernel_func, metadata !"kernel", i32 1} +!0 = !{void (float*)* @kernel_func, !"kernel", i32 1} diff --git a/test/CodeGen/NVPTX/calling-conv.ll b/test/CodeGen/NVPTX/calling-conv.ll index 190a1462adbc..3b03442ad8bd 100644 --- a/test/CodeGen/NVPTX/calling-conv.ll +++ b/test/CodeGen/NVPTX/calling-conv.ll @@ -27,4 +27,4 @@ define void @metadata_kernel(float* %a) { !nvvm.annotations = !{!1} -!1 = metadata !{void (float*)* @metadata_kernel, metadata !"kernel", i32 1} +!1 = !{void (float*)* @metadata_kernel, !"kernel", i32 1} diff --git a/test/CodeGen/NVPTX/fma-assoc.ll b/test/CodeGen/NVPTX/fma-assoc.ll new file mode 100644 index 000000000000..fc04c61dd691 --- /dev/null +++ b/test/CodeGen/NVPTX/fma-assoc.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast | FileCheck %s + +define ptx_device float @t1_f32(float %x, float %y, float %z, + float %u, float %v) { +; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}; +; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}; +; CHECK: ret; + %a = fmul float %x, %y + %b = fmul float %u, %v + %c = fadd float %a, %b + %d = fadd float %c, %z + ret float %d +} + +define ptx_device double @t1_f64(double %x, double %y, double %z, + double %u, double %v) { +; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}; +; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}; +; CHECK: ret; + %a = fmul double %x, %y + %b = fmul double %u, %v + %c = fadd double %a, %b + %d = fadd double %c, %z + ret double %d +} diff --git a/test/CodeGen/NVPTX/fma.ll b/test/CodeGen/NVPTX/fma.ll index 14b5c45b87d8..6785a01827e2 100644 --- a/test/CodeGen/NVPTX/fma.ll +++ b/test/CodeGen/NVPTX/fma.ll @@ -1,5 +1,8 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast | FileCheck %s +declare float @dummy_f32(float, float) #0 +declare double @dummy_f64(double, double) #0 + define ptx_device float @t1_f32(float %x, float %y, float %z) { ; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}; ; CHECK: ret; @@ -8,6 +11,17 @@ define ptx_device float @t1_f32(float %x, float %y, float %z) { ret float %b } +define ptx_device float @t2_f32(float %x, float %y, float %z, float %w) { +; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}; +; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}; +; CHECK: ret; + %a = fmul float %x, %y + %b = fadd float %a, %z + %c = fadd float %a, %w + %d = call float @dummy_f32(float %b, float %c) + ret float %d +} + define ptx_device double @t1_f64(double %x, double %y, double %z) { ; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}; ; CHECK: ret; @@ -15,3 +29,14 @@ define ptx_device double @t1_f64(double %x, double %y, double %z) { %b = fadd double %a, %z ret double %b } + +define ptx_device double @t2_f64(double %x, double %y, double %z, double %w) { +; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}; +; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}; +; CHECK: ret; + %a = fmul double %x, %y + %b = fadd double %a, %z + %c = fadd double %a, %w + %d = call double @dummy_f64(double %b, double %c) + ret double %d +} diff --git a/test/CodeGen/NVPTX/generic-to-nvvm.ll b/test/CodeGen/NVPTX/generic-to-nvvm.ll index 2a527989e410..fb63d6ed575f 100644 --- a/test/CodeGen/NVPTX/generic-to-nvvm.ll +++ b/test/CodeGen/NVPTX/generic-to-nvvm.ll @@ -23,4 +23,4 @@ define void @foo(i32* %a, i32* %b) { !nvvm.annotations = !{!0} -!0 = metadata !{void (i32*, i32*)* @foo, metadata !"kernel", i32 1} +!0 = !{void (i32*, i32*)* @foo, !"kernel", i32 1} diff --git a/test/CodeGen/NVPTX/i1-global.ll b/test/CodeGen/NVPTX/i1-global.ll index 1dd8ae40db4f..e3fe08e5f874 100644 --- a/test/CodeGen/NVPTX/i1-global.ll +++ b/test/CodeGen/NVPTX/i1-global.ll @@ -16,4 +16,4 @@ define void @foo(i1 %p, i32* %out) { !nvvm.annotations = !{!0} -!0 = metadata !{void (i1, i32*)* @foo, metadata !"kernel", i32 1} +!0 = !{void (i1, i32*)* @foo, !"kernel", i32 1} diff --git a/test/CodeGen/NVPTX/i1-param.ll b/test/CodeGen/NVPTX/i1-param.ll index f4df87439322..aac71960551f 100644 --- a/test/CodeGen/NVPTX/i1-param.ll +++ b/test/CodeGen/NVPTX/i1-param.ll @@ -16,4 +16,4 @@ define void @foo(i1 %p, i32* %out) { !nvvm.annotations = !{!0} -!0 = metadata !{void (i1, i32*)* @foo, metadata !"kernel", i32 1} +!0 = !{void (i1, i32*)* @foo, !"kernel", i32 1} diff --git a/test/CodeGen/NVPTX/ldu-i8.ll b/test/CodeGen/NVPTX/ldu-i8.ll index 9cc667557906..36c99b30425d 100644 --- a/test/CodeGen/NVPTX/ldu-i8.ll +++ b/test/CodeGen/NVPTX/ldu-i8.ll @@ -2,15 +2,13 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" -declare i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8*) +declare i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8*, i32) define i8 @foo(i8* %a) { ; Ensure we properly truncate off the high-order 24 bits ; CHECK: ldu.global.u8 ; CHECK: cvt.u32.u16 ; CHECK: and.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, 255 - %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8* %a), !align !0 + %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8* %a, i32 4) ret i8 %val } - -!0 = metadata !{i32 4} diff --git a/test/CodeGen/NVPTX/ldu-ldg.ll b/test/CodeGen/NVPTX/ldu-ldg.ll index 3b0619ff5175..4bfd68c22428 100644 --- a/test/CodeGen/NVPTX/ldu-ldg.ll +++ b/test/CodeGen/NVPTX/ldu-ldg.ll @@ -1,40 +1,36 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -declare i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr) -declare i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr) -declare i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr) -declare i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr) +declare i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 %align) +declare i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr, i32 %align) +declare i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 %align) +declare i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr, i32 %align) ; CHECK: func0 define i8 @func0(i8 addrspace(1)* %ptr) { ; ldu.global.u8 - %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr), !align !0 + %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 4) ret i8 %val } ; CHECK: func1 define i32 @func1(i32 addrspace(1)* %ptr) { ; ldu.global.u32 - %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr), !align !0 + %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr, i32 4) ret i32 %val } ; CHECK: func2 define i8 @func2(i8 addrspace(1)* %ptr) { ; ld.global.nc.u8 - %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr), !align !0 + %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 4) ret i8 %val } ; CHECK: func3 define i32 @func3(i32 addrspace(1)* %ptr) { ; ld.global.nc.u32 - %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr), !align !0 + %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr, i32 4) ret i32 %val } - - - -!0 = metadata !{i32 4} diff --git a/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll b/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll index 55707ea85106..fd35a7503901 100644 --- a/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll +++ b/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll @@ -7,15 +7,13 @@ define void @reg_plus_offset(i32* %a) { ; CHECK: ldu.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}+32]; ; CHECK: ldu.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}+36]; %p2 = getelementptr i32* %a, i32 8 - %t1 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p2), !align !1 + %t1 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p2, i32 4) %p3 = getelementptr i32* %a, i32 9 - %t2 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p3), !align !1 + %t2 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p3, i32 4) %t3 = mul i32 %t1, %t2 store i32 %t3, i32* %a ret void } -!1 = metadata !{ i32 4 } - -declare i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32*) +declare i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32*, i32) declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() diff --git a/test/CodeGen/NVPTX/machine-sink.ll b/test/CodeGen/NVPTX/machine-sink.ll new file mode 100644 index 000000000000..3614bea16534 --- /dev/null +++ b/test/CodeGen/NVPTX/machine-sink.ll @@ -0,0 +1,40 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" + +@scalar1 = internal addrspace(3) global float 0.000000e+00, align 4 +@scalar2 = internal addrspace(3) global float 0.000000e+00, align 4 + +; We shouldn't sink mul.rn.f32 to BB %merge because BB %merge post-dominates +; BB %entry. Over-sinking created more register pressure on this example. The +; backend would sink the fmuls to BB %merge, but not the loads for being +; conservative on sinking memory accesses. As a result, the loads and +; the two fmuls would be separated to two basic blocks, causing two +; cross-BB live ranges. +define float @post_dominate(float %x, i1 %cond) { +; CHECK-LABEL: post_dominate( +entry: + %0 = load float* addrspacecast (float addrspace(3)* @scalar1 to float*), align 4 + %1 = load float* addrspacecast (float addrspace(3)* @scalar2 to float*), align 4 +; CHECK: ld.shared.f32 +; CHECK: ld.shared.f32 + %2 = fmul float %0, %0 + %3 = fmul float %1, %2 +; CHECK-NOT: bra +; CHECK: mul.rn.f32 +; CHECK: mul.rn.f32 + br i1 %cond, label %then, label %merge + +then: + %z = fadd float %x, %x + br label %then2 + +then2: + %z2 = fadd float %z, %z + br label %merge + +merge: + %y = phi float [ 0.0, %entry ], [ %z2, %then2 ] + %w = fadd float %y, %3 + ret float %w +} diff --git a/test/CodeGen/NVPTX/managed.ll b/test/CodeGen/NVPTX/managed.ll index 4d7e7817f77b..d3f1604dbd36 100644 --- a/test/CodeGen/NVPTX/managed.ll +++ b/test/CodeGen/NVPTX/managed.ll @@ -8,4 +8,4 @@ !nvvm.annotations = !{!0} -!0 = metadata !{i32 addrspace(1)* @managed_g, metadata !"managed", i32 1} +!0 = !{i32 addrspace(1)* @managed_g, !"managed", i32 1} diff --git a/test/CodeGen/NVPTX/mulwide.ll b/test/CodeGen/NVPTX/mulwide.ll index 43bb63098f67..1ddf9739e202 100644 --- a/test/CodeGen/NVPTX/mulwide.ll +++ b/test/CodeGen/NVPTX/mulwide.ll @@ -23,6 +23,28 @@ define i32 @mulwideu16(i16 %a, i16 %b) { ret i32 %val2 } +; OPT-LABEL: @mulwide8 +; NOOPT-LABEL: @mulwide8 +define i32 @mulwide8(i8 %a, i8 %b) { +; OPT: mul.wide.s16 +; NOOPT: mul.lo.s32 + %val0 = sext i8 %a to i32 + %val1 = sext i8 %b to i32 + %val2 = mul i32 %val0, %val1 + ret i32 %val2 +} + +; OPT-LABEL: @mulwideu8 +; NOOPT-LABEL: @mulwideu8 +define i32 @mulwideu8(i8 %a, i8 %b) { +; OPT: mul.wide.u16 +; NOOPT: mul.lo.s32 + %val0 = zext i8 %a to i32 + %val1 = zext i8 %b to i32 + %val2 = mul i32 %val0, %val1 + ret i32 %val2 +} + ; OPT-LABEL: @mulwide32 ; NOOPT-LABEL: @mulwide32 define i64 @mulwide32(i32 %a, i32 %b) { @@ -44,3 +66,25 @@ define i64 @mulwideu32(i32 %a, i32 %b) { %val2 = mul i64 %val0, %val1 ret i64 %val2 } + +; OPT-LABEL: @mulwideu7 +; NOOPT-LABEL: @mulwideu7 +define i64 @mulwideu7(i7 %a, i7 %b) { +; OPT: mul.wide.u32 +; NOOPT: mul.lo.s64 + %val0 = zext i7 %a to i64 + %val1 = zext i7 %b to i64 + %val2 = mul i64 %val0, %val1 + ret i64 %val2 +} + +; OPT-LABEL: @mulwides7 +; NOOPT-LABEL: @mulwides7 +define i64 @mulwides7(i7 %a, i7 %b) { +; OPT: mul.wide.s32 +; NOOPT: mul.lo.s64 + %val0 = sext i7 %a to i64 + %val1 = sext i7 %b to i64 + %val2 = mul i64 %val0, %val1 + ret i64 %val2 +} diff --git a/test/CodeGen/NVPTX/noduplicate-syncthreads.ll b/test/CodeGen/NVPTX/noduplicate-syncthreads.ll index 64745fcba3ba..841bbc3a517c 100644 --- a/test/CodeGen/NVPTX/noduplicate-syncthreads.ll +++ b/test/CodeGen/NVPTX/noduplicate-syncthreads.ll @@ -70,5 +70,5 @@ if.end17: ; preds = %if.else13, %if.then ; Function Attrs: noduplicate nounwind declare void @llvm.cuda.syncthreads() #2 -!0 = metadata !{void (float*)* @foo, metadata !"kernel", i32 1} -!1 = metadata !{null, metadata !"align", i32 8} +!0 = !{void (float*)* @foo, !"kernel", i32 1} +!1 = !{null, !"align", i32 8} diff --git a/test/CodeGen/NVPTX/nvcl-param-align.ll b/test/CodeGen/NVPTX/nvcl-param-align.ll new file mode 100644 index 000000000000..c1a489f1fc42 --- /dev/null +++ b/test/CodeGen/NVPTX/nvcl-param-align.ll @@ -0,0 +1,16 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + +target triple = "nvptx-unknown-nvcl" + +; CHECK-LABEL: .entry foo( +define void @foo(i64 %img, i64 %sampler, <5 x float>* %v) { +; The parameter alignment should be the next power of 2 of 5xsizeof(float), +; which is 32. +; CHECK: .param .u32 .ptr .align 32 foo_param_2 + ret void +} + +!nvvm.annotations = !{!1, !2, !3} +!1 = !{void (i64, i64, <5 x float>*)* @foo, !"kernel", i32 1} +!2 = !{void (i64, i64, <5 x float>*)* @foo, !"rdoimage", i32 0} +!3 = !{void (i64, i64, <5 x float>*)* @foo, !"sampler", i32 1} diff --git a/test/CodeGen/NVPTX/refl1.ll b/test/CodeGen/NVPTX/refl1.ll index 4aeff0924955..e8782ea3aa27 100644 --- a/test/CodeGen/NVPTX/refl1.ll +++ b/test/CodeGen/NVPTX/refl1.ll @@ -36,4 +36,4 @@ attributes #2 = { alwaysinline inlinehint nounwind readnone } !nvvm.annotations = !{!0} -!0 = metadata !{void (float*)* @foo, metadata !"kernel", i32 1} +!0 = !{void (float*)* @foo, !"kernel", i32 1} diff --git a/test/CodeGen/NVPTX/simple-call.ll b/test/CodeGen/NVPTX/simple-call.ll index ab6f423cd80a..1b41361cf7ed 100644 --- a/test/CodeGen/NVPTX/simple-call.ll +++ b/test/CodeGen/NVPTX/simple-call.ll @@ -23,4 +23,4 @@ define void @kernel_func(float* %a) { !nvvm.annotations = !{!1} -!1 = metadata !{void (float*)* @kernel_func, metadata !"kernel", i32 1} +!1 = !{void (float*)* @kernel_func, !"kernel", i32 1} diff --git a/test/CodeGen/NVPTX/surf-read-cuda.ll b/test/CodeGen/NVPTX/surf-read-cuda.ll index 10a1ecc4c473..ed021346c0f9 100644 --- a/test/CodeGen/NVPTX/surf-read-cuda.ll +++ b/test/CodeGen/NVPTX/surf-read-cuda.ll @@ -47,7 +47,7 @@ define void @bar(float* %red, i32 %idx) { !nvvm.annotations = !{!1, !2, !3} -!1 = metadata !{void (i64, float*, i32)* @foo, metadata !"kernel", i32 1} -!2 = metadata !{void (float*, i32)* @bar, metadata !"kernel", i32 1} -!3 = metadata !{i64 addrspace(1)* @surf0, metadata !"surface", i32 1} +!1 = !{void (i64, float*, i32)* @foo, !"kernel", i32 1} +!2 = !{void (float*, i32)* @bar, !"kernel", i32 1} +!3 = !{i64 addrspace(1)* @surf0, !"surface", i32 1} diff --git a/test/CodeGen/NVPTX/surf-read.ll b/test/CodeGen/NVPTX/surf-read.ll index a69d03efe0d2..7383722a3596 100644 --- a/test/CodeGen/NVPTX/surf-read.ll +++ b/test/CodeGen/NVPTX/surf-read.ll @@ -16,5 +16,5 @@ define void @foo(i64 %img, float* %red, i32 %idx) { } !nvvm.annotations = !{!1, !2} -!1 = metadata !{void (i64, float*, i32)* @foo, metadata !"kernel", i32 1} -!2 = metadata !{void (i64, float*, i32)* @foo, metadata !"rdwrimage", i32 0} +!1 = !{void (i64, float*, i32)* @foo, !"kernel", i32 1} +!2 = !{void (i64, float*, i32)* @foo, !"rdwrimage", i32 0} diff --git a/test/CodeGen/NVPTX/surf-write-cuda.ll b/test/CodeGen/NVPTX/surf-write-cuda.ll index 654c47f46957..da55a242bba6 100644 --- a/test/CodeGen/NVPTX/surf-write-cuda.ll +++ b/test/CodeGen/NVPTX/surf-write-cuda.ll @@ -36,7 +36,7 @@ define void @bar(i32 %val, i32 %idx) { !nvvm.annotations = !{!1, !2, !3} -!1 = metadata !{void (i64, i32, i32)* @foo, metadata !"kernel", i32 1} -!2 = metadata !{void (i32, i32)* @bar, metadata !"kernel", i32 1} -!3 = metadata !{i64 addrspace(1)* @surf0, metadata !"surface", i32 1} +!1 = !{void (i64, i32, i32)* @foo, !"kernel", i32 1} +!2 = !{void (i32, i32)* @bar, !"kernel", i32 1} +!3 = !{i64 addrspace(1)* @surf0, !"surface", i32 1} diff --git a/test/CodeGen/NVPTX/surf-write.ll b/test/CodeGen/NVPTX/surf-write.ll index 880231f96599..5098d2ae9e1c 100644 --- a/test/CodeGen/NVPTX/surf-write.ll +++ b/test/CodeGen/NVPTX/surf-write.ll @@ -12,5 +12,5 @@ define void @foo(i64 %img, i32 %val, i32 %idx) { } !nvvm.annotations = !{!1, !2} -!1 = metadata !{void (i64, i32, i32)* @foo, metadata !"kernel", i32 1} -!2 = metadata !{void (i64, i32, i32)* @foo, metadata !"wroimage", i32 0} +!1 = !{void (i64, i32, i32)* @foo, !"kernel", i32 1} +!2 = !{void (i64, i32, i32)* @foo, !"wroimage", i32 0} diff --git a/test/CodeGen/NVPTX/tex-read-cuda.ll b/test/CodeGen/NVPTX/tex-read-cuda.ll index ee0cefa919b1..c5b5600de874 100644 --- a/test/CodeGen/NVPTX/tex-read-cuda.ll +++ b/test/CodeGen/NVPTX/tex-read-cuda.ll @@ -41,6 +41,6 @@ define void @bar(float* %red, i32 %idx) { } !nvvm.annotations = !{!1, !2, !3} -!1 = metadata !{void (i64, float*, i32)* @foo, metadata !"kernel", i32 1} -!2 = metadata !{void (float*, i32)* @bar, metadata !"kernel", i32 1} -!3 = metadata !{i64 addrspace(1)* @tex0, metadata !"texture", i32 1} +!1 = !{void (i64, float*, i32)* @foo, !"kernel", i32 1} +!2 = !{void (float*, i32)* @bar, !"kernel", i32 1} +!3 = !{i64 addrspace(1)* @tex0, !"texture", i32 1} diff --git a/test/CodeGen/NVPTX/tex-read.ll b/test/CodeGen/NVPTX/tex-read.ll index 55e4bfc9e453..6e0fda69e4f5 100644 --- a/test/CodeGen/NVPTX/tex-read.ll +++ b/test/CodeGen/NVPTX/tex-read.ll @@ -15,6 +15,6 @@ define void @foo(i64 %img, i64 %sampler, float* %red, i32 %idx) { } !nvvm.annotations = !{!1, !2, !3} -!1 = metadata !{void (i64, i64, float*, i32)* @foo, metadata !"kernel", i32 1} -!2 = metadata !{void (i64, i64, float*, i32)* @foo, metadata !"rdoimage", i32 0} -!3 = metadata !{void (i64, i64, float*, i32)* @foo, metadata !"sampler", i32 1} +!1 = !{void (i64, i64, float*, i32)* @foo, !"kernel", i32 1} +!2 = !{void (i64, i64, float*, i32)* @foo, !"rdoimage", i32 0} +!3 = !{void (i64, i64, float*, i32)* @foo, !"sampler", i32 1} diff --git a/test/CodeGen/NVPTX/texsurf-queries.ll b/test/CodeGen/NVPTX/texsurf-queries.ll index c7637ccff77a..e56eb5dea18f 100644 --- a/test/CodeGen/NVPTX/texsurf-queries.ll +++ b/test/CodeGen/NVPTX/texsurf-queries.ll @@ -99,5 +99,5 @@ define i32 @s3() { !nvvm.annotations = !{!1, !2} -!1 = metadata !{i64 addrspace(1)* @tex0, metadata !"texture", i32 1} -!2 = metadata !{i64 addrspace(1)* @surf0, metadata !"surface", i32 1} +!1 = !{i64 addrspace(1)* @tex0, !"texture", i32 1} +!2 = !{i64 addrspace(1)* @surf0, !"surface", i32 1} diff --git a/test/CodeGen/NVPTX/vector-global.ll b/test/CodeGen/NVPTX/vector-global.ll new file mode 100644 index 000000000000..a463bee3a479 --- /dev/null +++ b/test/CodeGen/NVPTX/vector-global.ll @@ -0,0 +1,9 @@ +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s + +target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" +target triple = "nvptx64-nvidia-cuda" + +@g1 = external global <4 x i32> ; external global variable +; CHECK: .extern .global .align 16 .b8 g1[16]; +@g2 = global <4 x i32> zeroinitializer ; module-level global variable +; CHECK: .visible .global .align 16 .b8 g2[16]; diff --git a/test/CodeGen/NVPTX/vector-return.ll b/test/CodeGen/NVPTX/vector-return.ll new file mode 100644 index 000000000000..15e50f8e1443 --- /dev/null +++ b/test/CodeGen/NVPTX/vector-return.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s + +declare <2 x float> @bar(<2 x float> %input) + +define void @foo(<2 x float> %input, <2 x float>* %output) { +; CHECK-LABEL: @foo +entry: + %call = tail call <2 x float> @bar(<2 x float> %input) +; CHECK: .param .align 8 .b8 retval0[8]; +; CHECK: ld.param.v2.f32 {[[ELEM1:%f[0-9]+]], [[ELEM2:%f[0-9]+]]}, [retval0+0]; + store <2 x float> %call, <2 x float>* %output, align 8 +; CHECK: st.v2.f32 [{{%rd[0-9]+}}], {[[ELEM1]], [[ELEM2]]} + ret void +} diff --git a/test/CodeGen/NVPTX/weak-linkage.ll b/test/CodeGen/NVPTX/weak-linkage.ll index 7a1335783642..5df57b29249e 100644 --- a/test/CodeGen/NVPTX/weak-linkage.ll +++ b/test/CodeGen/NVPTX/weak-linkage.ll @@ -1,11 +1,17 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s - +; CHECK: // .weak foo ; CHECK: .weak .func foo define weak void @foo() { ret void } +; CHECK: // .weak baz +; CHECK: .weak .func baz +define weak_odr void @baz() { + ret void +} + ; CHECK: .visible .func bar define void @bar() { ret void |