aboutsummaryrefslogtreecommitdiff
path: root/test/CodeGen/builtins-nvptx.c
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/builtins-nvptx.c')
-rw-r--r--test/CodeGen/builtins-nvptx.c74
1 files changed, 72 insertions, 2 deletions
diff --git a/test/CodeGen/builtins-nvptx.c b/test/CodeGen/builtins-nvptx.c
index fa6b14c1ca7d..2c7e0c136769 100644
--- a/test/CodeGen/builtins-nvptx.c
+++ b/test/CodeGen/builtins-nvptx.c
@@ -1,8 +1,15 @@
-// RUN: %clang_cc1 -triple nvptx-unknown-unknown -emit-llvm -o %t %s
-// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -emit-llvm -o %t %s
+// REQUIRES: nvptx-registered-target
+// REQUIRES: nvptx64-registered-target
+// RUN: %clang_cc1 -triple nvptx-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
int read_tid() {
+// CHECK: call i32 @llvm.ptx.read.tid.x()
+// CHECK: call i32 @llvm.ptx.read.tid.y()
+// CHECK: call i32 @llvm.ptx.read.tid.z()
+// CHECK: call i32 @llvm.ptx.read.tid.w()
+
int x = __builtin_ptx_read_tid_x();
int y = __builtin_ptx_read_tid_y();
int z = __builtin_ptx_read_tid_z();
@@ -14,6 +21,11 @@ int read_tid() {
int read_ntid() {
+// CHECK: call i32 @llvm.ptx.read.ntid.x()
+// CHECK: call i32 @llvm.ptx.read.ntid.y()
+// CHECK: call i32 @llvm.ptx.read.ntid.z()
+// CHECK: call i32 @llvm.ptx.read.ntid.w()
+
int x = __builtin_ptx_read_ntid_x();
int y = __builtin_ptx_read_ntid_y();
int z = __builtin_ptx_read_ntid_z();
@@ -25,6 +37,11 @@ int read_ntid() {
int read_ctaid() {
+// CHECK: call i32 @llvm.ptx.read.ctaid.x()
+// CHECK: call i32 @llvm.ptx.read.ctaid.y()
+// CHECK: call i32 @llvm.ptx.read.ctaid.z()
+// CHECK: call i32 @llvm.ptx.read.ctaid.w()
+
int x = __builtin_ptx_read_ctaid_x();
int y = __builtin_ptx_read_ctaid_y();
int z = __builtin_ptx_read_ctaid_z();
@@ -36,6 +53,11 @@ int read_ctaid() {
int read_nctaid() {
+// CHECK: call i32 @llvm.ptx.read.nctaid.x()
+// CHECK: call i32 @llvm.ptx.read.nctaid.y()
+// CHECK: call i32 @llvm.ptx.read.nctaid.z()
+// CHECK: call i32 @llvm.ptx.read.nctaid.w()
+
int x = __builtin_ptx_read_nctaid_x();
int y = __builtin_ptx_read_nctaid_y();
int z = __builtin_ptx_read_nctaid_z();
@@ -47,6 +69,13 @@ int read_nctaid() {
int read_ids() {
+// CHECK: call i32 @llvm.ptx.read.laneid()
+// CHECK: call i32 @llvm.ptx.read.warpid()
+// CHECK: call i32 @llvm.ptx.read.nwarpid()
+// CHECK: call i32 @llvm.ptx.read.smid()
+// CHECK: call i32 @llvm.ptx.read.nsmid()
+// CHECK: call i32 @llvm.ptx.read.gridid()
+
int a = __builtin_ptx_read_laneid();
int b = __builtin_ptx_read_warpid();
int c = __builtin_ptx_read_nwarpid();
@@ -60,6 +89,12 @@ int read_ids() {
int read_lanemasks() {
+// CHECK: call i32 @llvm.ptx.read.lanemask.eq()
+// CHECK: call i32 @llvm.ptx.read.lanemask.le()
+// CHECK: call i32 @llvm.ptx.read.lanemask.lt()
+// CHECK: call i32 @llvm.ptx.read.lanemask.ge()
+// CHECK: call i32 @llvm.ptx.read.lanemask.gt()
+
int a = __builtin_ptx_read_lanemask_eq();
int b = __builtin_ptx_read_lanemask_le();
int c = __builtin_ptx_read_lanemask_lt();
@@ -73,6 +108,9 @@ int read_lanemasks() {
long read_clocks() {
+// CHECK: call i32 @llvm.ptx.read.clock()
+// CHECK: call i64 @llvm.ptx.read.clock64()
+
int a = __builtin_ptx_read_clock();
long b = __builtin_ptx_read_clock64();
@@ -82,6 +120,11 @@ long read_clocks() {
int read_pms() {
+// CHECK: call i32 @llvm.ptx.read.pm0()
+// CHECK: call i32 @llvm.ptx.read.pm1()
+// CHECK: call i32 @llvm.ptx.read.pm2()
+// CHECK: call i32 @llvm.ptx.read.pm3()
+
int a = __builtin_ptx_read_pm0();
int b = __builtin_ptx_read_pm1();
int c = __builtin_ptx_read_pm2();
@@ -93,6 +136,33 @@ int read_pms() {
void sync() {
+// CHECK: call void @llvm.ptx.bar.sync(i32 0)
+
__builtin_ptx_bar_sync(0);
}
+
+
+// NVVM intrinsics
+
+// The idea is not to test all intrinsics, just that Clang is recognizing the
+// builtins defined in BuiltinsNVPTX.def
+void nvvm_math(float f1, float f2, double d1, double d2) {
+// CHECK: call float @llvm.nvvm.fmax.f
+ float t1 = __nvvm_fmax_f(f1, f2);
+// CHECK: call float @llvm.nvvm.fmin.f
+ float t2 = __nvvm_fmin_f(f1, f2);
+// CHECK: call float @llvm.nvvm.sqrt.rn.f
+ float t3 = __nvvm_sqrt_rn_f(f1);
+// CHECK: call float @llvm.nvvm.rcp.rn.f
+ float t4 = __nvvm_rcp_rn_f(f2);
+
+// CHECK: call double @llvm.nvvm.fmax.d
+ double td1 = __nvvm_fmax_d(d1, d2);
+// CHECK: call double @llvm.nvvm.fmin.d
+ double td2 = __nvvm_fmin_d(d1, d2);
+// CHECK: call double @llvm.nvvm.sqrt.rn.d
+ double td3 = __nvvm_sqrt_rn_d(d1);
+// CHECK: call double @llvm.nvvm.rcp.rn.d
+ double td4 = __nvvm_rcp_rn_d(d2);
+}