aboutsummaryrefslogtreecommitdiff
path: root/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/aarch64-v8.2a-neon-intrinsics.c')
-rw-r--r--test/CodeGen/aarch64-v8.2a-neon-intrinsics.c63
1 files changed, 25 insertions, 38 deletions
diff --git a/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c b/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
index 3f61238b64fb..e1a2e3fb92dd 100644
--- a/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
+++ b/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
@@ -164,6 +164,13 @@ int16x4_t test_vcvta_s16_f16 (float16x4_t a) {
return vcvta_s16_f16(a);
}
+// CHECK-LABEL: test_vcvta_u16_f16
+// CHECK: [[VCVT:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtau.v4i16.v4f16(<4 x half> %a)
+// CHECK: ret <4 x i16> [[VCVT]]
+int16x4_t test_vcvta_u16_f16 (float16x4_t a) {
+ return vcvta_u16_f16(a);
+}
+
// CHECK-LABEL: test_vcvtaq_s16_f16
// CHECK: [[VCVT:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtas.v8i16.v8f16(<8 x half> %a)
// CHECK: ret <8 x i16> [[VCVT]]
@@ -1223,27 +1230,25 @@ float16x8_t test_vmulxq_n_f16(float16x8_t a, float16_t b) {
return vmulxq_n_f16(a, b);
}
-/* TODO: Not implemented yet (needs scalar intrinsic from arm_fp16.h)
-// CCHECK-LABEL: test_vmulxh_lane_f16
-// CCHECK: [[CONV0:%.*]] = fpext half %a to float
-// CCHECK: [[CONV1:%.*]] = fpext half %{{.*}} to float
-// CCHECK: [[MUL:%.*]] = fmul float [[CONV0:%.*]], [[CONV0:%.*]]
-// CCHECK: [[CONV3:%.*]] = fptrunc float %mul to half
-// CCHECK: ret half [[CONV3:%.*]]
+// CHECK-LABEL: test_vmulxh_lane_f16
+// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
+// CHECK: [[EXTR:%.*]] = extractelement <4 x half> [[TMP1]], i32 3
+// CHECK: [[MULX:%.*]] = call half @llvm.aarch64.neon.fmulx.f16(half %a, half [[EXTR]]
+// CHECK: ret half [[MULX]]
float16_t test_vmulxh_lane_f16(float16_t a, float16x4_t b) {
return vmulxh_lane_f16(a, b, 3);
}
-// CCHECK-LABEL: test_vmulxh_laneq_f16
-// CCHECK: [[CONV0:%.*]] = fpext half %a to float
-// CCHECK: [[CONV1:%.*]] = fpext half %{{.*}} to float
-// CCHECK: [[MUL:%.*]] = fmul float [[CONV0:%.*]], [[CONV0:%.*]]
-// CCHECK: [[CONV3:%.*]] = fptrunc float %mul to half
-// CCHECK: ret half [[CONV3:%.*]]
+// CHECK-LABEL: test_vmulxh_laneq_f16
+// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %b to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK: [[EXTR:%.*]] = extractelement <8 x half> [[TMP1]], i32 7
+// CHECK: [[MULX:%.*]] = call half @llvm.aarch64.neon.fmulx.f16(half %a, half [[EXTR]])
+// CHECK: ret half [[MULX]]
float16_t test_vmulxh_laneq_f16(float16_t a, float16x8_t b) {
return vmulxh_laneq_f16(a, b, 7);
}
-*/
// CHECK-LABEL: test_vmaxv_f16
// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
@@ -1350,16 +1355,13 @@ float16x8_t test_vbslq_f16(uint16x8_t a, float16x8_t b, float16x8_t c) {
// CHECK-LABEL: test_vzip_f16
// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8
// CHECK: [[__RET_I:%.*]] = alloca %struct.float16x4x2_t, align 8
-// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET_I]] to i8*
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x half>*
// CHECK: [[VZIP0_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
// CHECK: store <4 x half> [[VZIP0_I]], <4 x half>* [[TMP1]]
// CHECK: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, <4 x half>* [[TMP1]], i32 1
// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
// CHECK: store <4 x half> [[VZIP1_I]], <4 x half>* [[TMP2]]
-// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
-// CHECK: [[TMP6:%.*]] = bitcast %struct.float16x4x2_t* [[__RET_I]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false)
float16x4x2_t test_vzip_f16(float16x4_t a, float16x4_t b) {
return vzip_f16(a, b);
}
@@ -1367,16 +1369,13 @@ float16x4x2_t test_vzip_f16(float16x4_t a, float16x4_t b) {
// CHECK-LABEL: test_vzipq_f16
// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16
// CHECK: [[__RET_I:%.*]] = alloca %struct.float16x8x2_t, align 16
-// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET_I]] to i8*
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x half>*
// CHECK: [[VZIP0_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
// CHECK: store <8 x half> [[VZIP0_I]], <8 x half>* [[TMP1]]
// CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, <8 x half>* [[TMP1]], i32 1
// CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
// CHECK: store <8 x half> [[VZIP1_I]], <8 x half>* [[TMP2]]
-// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8*
-// CHECK: [[TMP6:%.*]] = bitcast %struct.float16x8x2_t* [[__RET_I]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false)
float16x8x2_t test_vzipq_f16(float16x8_t a, float16x8_t b) {
return vzipq_f16(a, b);
}
@@ -1384,16 +1383,13 @@ float16x8x2_t test_vzipq_f16(float16x8_t a, float16x8_t b) {
// CHECK-LABEL: test_vuzp_f16
// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8
// CHECK: [[__RET_I:%.*]] = alloca %struct.float16x4x2_t, align 8
-// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET_I]] to i8*
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x half>*
// CHECK: [[VZIP0_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
// CHECK: store <4 x half> [[VZIP0_I]], <4 x half>* [[TMP1]]
// CHECK: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, <4 x half>* [[TMP1]], i32 1
// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
// CHECK: store <4 x half> [[VZIP1_I]], <4 x half>* [[TMP2]]
-// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
-// CHECK: [[TMP6:%.*]] = bitcast %struct.float16x4x2_t* [[__RET_I]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false)
float16x4x2_t test_vuzp_f16(float16x4_t a, float16x4_t b) {
return vuzp_f16(a, b);
}
@@ -1401,16 +1397,13 @@ float16x4x2_t test_vuzp_f16(float16x4_t a, float16x4_t b) {
// CHECK-LABEL: test_vuzpq_f16
// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16
// CHECK: [[__RET_I:%.*]] = alloca %struct.float16x8x2_t, align 16
-// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET_I]] to i8*
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x half>*
// CHECK: [[VZIP0_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
// CHECK: store <8 x half> [[VZIP0_I]], <8 x half>* [[TMP1]]
// CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, <8 x half>* [[TMP1]], i32 1
// CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
// CHECK: store <8 x half> [[VZIP1_I]], <8 x half>* [[TMP2]]
-// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8*
-// CHECK: [[TMP6:%.*]] = bitcast %struct.float16x8x2_t* [[__RET_I]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false)
float16x8x2_t test_vuzpq_f16(float16x8_t a, float16x8_t b) {
return vuzpq_f16(a, b);
}
@@ -1418,16 +1411,13 @@ float16x8x2_t test_vuzpq_f16(float16x8_t a, float16x8_t b) {
// CHECK-LABEL: test_vtrn_f16
// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8
// CHECK: [[__RET_I:%.*]] = alloca %struct.float16x4x2_t, align 8
-// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET_I]] to i8*
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x half>*
// CHECK: [[VZIP0_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
// CHECK: store <4 x half> [[VZIP0_I]], <4 x half>* [[TMP1]]
// CHECK: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, <4 x half>* [[TMP1]], i32 1
// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
// CHECK: store <4 x half> [[VZIP1_I]], <4 x half>* [[TMP2]]
-// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
-// CHECK: [[TMP6:%.*]] = bitcast %struct.float16x4x2_t* [[__RET_I]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false)
float16x4x2_t test_vtrn_f16(float16x4_t a, float16x4_t b) {
return vtrn_f16(a, b);
}
@@ -1435,16 +1425,13 @@ float16x4x2_t test_vtrn_f16(float16x4_t a, float16x4_t b) {
// CHECK-LABEL: test_vtrnq_f16
// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16
// CHECK: [[__RET_I:%.*]] = alloca %struct.float16x8x2_t, align 16
-// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET_I]] to i8*
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x half>*
// CHECK: [[VZIP0_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
// CHECK: store <8 x half> [[VZIP0_I]], <8 x half>* [[TMP1]]
// CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, <8 x half>* [[TMP1]], i32 1
// CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
// CHECK: store <8 x half> [[VZIP1_I]], <8 x half>* [[TMP2]]
-// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8*
-// CHECK: [[TMP6:%.*]] = bitcast %struct.float16x8x2_t* [[__RET_I]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false)
float16x8x2_t test_vtrnq_f16(float16x8_t a, float16x8_t b) {
return vtrnq_f16(a, b);
}