diff options
Diffstat (limited to 'test/CodeGen/aarch64-v8.2a-neon-intrinsics.c')
-rw-r--r-- | test/CodeGen/aarch64-v8.2a-neon-intrinsics.c | 63 |
1 files changed, 25 insertions, 38 deletions
diff --git a/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c b/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c index 3f61238b64fb..e1a2e3fb92dd 100644 --- a/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c +++ b/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c @@ -164,6 +164,13 @@ int16x4_t test_vcvta_s16_f16 (float16x4_t a) { return vcvta_s16_f16(a); } +// CHECK-LABEL: test_vcvta_u16_f16 +// CHECK: [[VCVT:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtau.v4i16.v4f16(<4 x half> %a) +// CHECK: ret <4 x i16> [[VCVT]] +int16x4_t test_vcvta_u16_f16 (float16x4_t a) { + return vcvta_u16_f16(a); +} + // CHECK-LABEL: test_vcvtaq_s16_f16 // CHECK: [[VCVT:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtas.v8i16.v8f16(<8 x half> %a) // CHECK: ret <8 x i16> [[VCVT]] @@ -1223,27 +1230,25 @@ float16x8_t test_vmulxq_n_f16(float16x8_t a, float16_t b) { return vmulxq_n_f16(a, b); } -/* TODO: Not implemented yet (needs scalar intrinsic from arm_fp16.h) -// CCHECK-LABEL: test_vmulxh_lane_f16 -// CCHECK: [[CONV0:%.*]] = fpext half %a to float -// CCHECK: [[CONV1:%.*]] = fpext half %{{.*}} to float -// CCHECK: [[MUL:%.*]] = fmul float [[CONV0:%.*]], [[CONV0:%.*]] -// CCHECK: [[CONV3:%.*]] = fptrunc float %mul to half -// CCHECK: ret half [[CONV3:%.*]] +// CHECK-LABEL: test_vmulxh_lane_f16 +// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %b to <8 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> +// CHECK: [[EXTR:%.*]] = extractelement <4 x half> [[TMP1]], i32 3 +// CHECK: [[MULX:%.*]] = call half @llvm.aarch64.neon.fmulx.f16(half %a, half [[EXTR]] +// CHECK: ret half [[MULX]] float16_t test_vmulxh_lane_f16(float16_t a, float16x4_t b) { return vmulxh_lane_f16(a, b, 3); } -// CCHECK-LABEL: test_vmulxh_laneq_f16 -// CCHECK: [[CONV0:%.*]] = fpext half %a to float -// CCHECK: [[CONV1:%.*]] = fpext half %{{.*}} to float -// CCHECK: [[MUL:%.*]] = fmul float [[CONV0:%.*]], [[CONV0:%.*]] -// CCHECK: [[CONV3:%.*]] = fptrunc float %mul to half -// CCHECK: ret half [[CONV3:%.*]] +// CHECK-LABEL: test_vmulxh_laneq_f16 +// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %b to <16 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> +// CHECK: [[EXTR:%.*]] = extractelement <8 x half> [[TMP1]], i32 7 +// CHECK: [[MULX:%.*]] = call half @llvm.aarch64.neon.fmulx.f16(half %a, half [[EXTR]]) +// CHECK: ret half [[MULX]] float16_t test_vmulxh_laneq_f16(float16_t a, float16x8_t b) { return vmulxh_laneq_f16(a, b, 7); } -*/ // CHECK-LABEL: test_vmaxv_f16 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> @@ -1350,16 +1355,13 @@ float16x8_t test_vbslq_f16(uint16x8_t a, float16x8_t b, float16x8_t c) { // CHECK-LABEL: test_vzip_f16 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8 // CHECK: [[__RET_I:%.*]] = alloca %struct.float16x4x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x half>* // CHECK: [[VZIP0_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5> // CHECK: store <4 x half> [[VZIP0_I]], <4 x half>* [[TMP1]] // CHECK: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, <4 x half>* [[TMP1]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7> // CHECK: store <4 x half> [[VZIP1_I]], <4 x half>* [[TMP2]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.float16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) float16x4x2_t test_vzip_f16(float16x4_t a, float16x4_t b) { return vzip_f16(a, b); } @@ -1367,16 +1369,13 @@ float16x4x2_t test_vzip_f16(float16x4_t a, float16x4_t b) { // CHECK-LABEL: test_vzipq_f16 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16 // CHECK: [[__RET_I:%.*]] = alloca %struct.float16x8x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x half>* // CHECK: [[VZIP0_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> // CHECK: store <8 x half> [[VZIP0_I]], <8 x half>* [[TMP1]] // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, <8 x half>* [[TMP1]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> // CHECK: store <8 x half> [[VZIP1_I]], <8 x half>* [[TMP2]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.float16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) float16x8x2_t test_vzipq_f16(float16x8_t a, float16x8_t b) { return vzipq_f16(a, b); } @@ -1384,16 +1383,13 @@ float16x8x2_t test_vzipq_f16(float16x8_t a, float16x8_t b) { // CHECK-LABEL: test_vuzp_f16 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8 // CHECK: [[__RET_I:%.*]] = alloca %struct.float16x4x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x half>* // CHECK: [[VZIP0_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6> // CHECK: store <4 x half> [[VZIP0_I]], <4 x half>* [[TMP1]] // CHECK: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, <4 x half>* [[TMP1]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7> // CHECK: store <4 x half> [[VZIP1_I]], <4 x half>* [[TMP2]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.float16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) float16x4x2_t test_vuzp_f16(float16x4_t a, float16x4_t b) { return vuzp_f16(a, b); } @@ -1401,16 +1397,13 @@ float16x4x2_t test_vuzp_f16(float16x4_t a, float16x4_t b) { // CHECK-LABEL: test_vuzpq_f16 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16 // CHECK: [[__RET_I:%.*]] = alloca %struct.float16x8x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x half>* // CHECK: [[VZIP0_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> // CHECK: store <8 x half> [[VZIP0_I]], <8 x half>* [[TMP1]] // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, <8 x half>* [[TMP1]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> // CHECK: store <8 x half> [[VZIP1_I]], <8 x half>* [[TMP2]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.float16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) float16x8x2_t test_vuzpq_f16(float16x8_t a, float16x8_t b) { return vuzpq_f16(a, b); } @@ -1418,16 +1411,13 @@ float16x8x2_t test_vuzpq_f16(float16x8_t a, float16x8_t b) { // CHECK-LABEL: test_vtrn_f16 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8 // CHECK: [[__RET_I:%.*]] = alloca %struct.float16x4x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x half>* // CHECK: [[VZIP0_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6> // CHECK: store <4 x half> [[VZIP0_I]], <4 x half>* [[TMP1]] // CHECK: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, <4 x half>* [[TMP1]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7> // CHECK: store <4 x half> [[VZIP1_I]], <4 x half>* [[TMP2]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.float16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) float16x4x2_t test_vtrn_f16(float16x4_t a, float16x4_t b) { return vtrn_f16(a, b); } @@ -1435,16 +1425,13 @@ float16x4x2_t test_vtrn_f16(float16x4_t a, float16x4_t b) { // CHECK-LABEL: test_vtrnq_f16 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16 // CHECK: [[__RET_I:%.*]] = alloca %struct.float16x8x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x half>* // CHECK: [[VZIP0_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> // CHECK: store <8 x half> [[VZIP0_I]], <8 x half>* [[TMP1]] // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, <8 x half>* [[TMP1]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> // CHECK: store <8 x half> [[VZIP1_I]], <8 x half>* [[TMP2]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.float16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) float16x8x2_t test_vtrnq_f16(float16x8_t a, float16x8_t b) { return vtrnq_f16(a, b); } |