aboutsummaryrefslogtreecommitdiff
path: root/test/CodeGen/arm64_vfma.c
blob: bfa568779638e27ecb20374357608ae7470d9764 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
// RUN: %clang_cc1 -O1 -triple arm64-apple-ios7 -target-feature +neon -ffreestanding -S -o - -emit-llvm %s | FileCheck %s
// Test ARM64 SIMD fused multiply add intrinsics

#include <arm_neon.h>

float32x2_t test_vfma_f32(float32x2_t a1, float32x2_t a2, float32x2_t a3) {
  // CHECK: test_vfma_f32
  return vfma_f32(a1, a2, a3);
  // CHECK: llvm.fma.v2f32({{.*a2, .*a3, .*a1}})
  // CHECK-NEXT: ret
}

float32x4_t test_vfmaq_f32(float32x4_t a1, float32x4_t a2, float32x4_t a3) {
  // CHECK: test_vfmaq_f32
  return vfmaq_f32(a1, a2, a3);
  // CHECK: llvm.fma.v4f32({{.*a2, .*a3, .*a1}})
  // CHECK-NEXT: ret
}

float64x2_t test_vfmaq_f64(float64x2_t a1, float64x2_t a2, float64x2_t a3) {
  // CHECK: test_vfmaq_f64
  return vfmaq_f64(a1, a2, a3);
  // CHECK: llvm.fma.v2f64({{.*a2, .*a3, .*a1}})
  // CHECK-NEXT: ret
}

float32x2_t test_vfma_lane_f32(float32x2_t a1, float32x2_t a2, float32x2_t a3) {
  // CHECK: test_vfma_lane_f32
  return vfma_lane_f32(a1, a2, a3, 1);
  // NB: the test below is deliberately lose, so that we don't depend too much
  // upon the exact IR used to select lane 1 (usually a shufflevector)
  // CHECK: llvm.fma.v2f32(<2 x float> %a2, <2 x float> {{.*}}, <2 x float> %a1)
  // CHECK-NEXT: ret
}

float32x4_t test_vfmaq_lane_f32(float32x4_t a1, float32x4_t a2, float32x2_t a3) {
  // CHECK: test_vfmaq_lane_f32
  return vfmaq_lane_f32(a1, a2, a3, 1);
  // NB: the test below is deliberately lose, so that we don't depend too much
  // upon the exact IR used to select lane 1 (usually a shufflevector)
  // CHECK: llvm.fma.v4f32(<4 x float> %a2, <4 x float> {{.*}}, <4 x float> %a1)
  // CHECK-NEXT: ret
}

float64x2_t test_vfmaq_lane_f64(float64x2_t a1, float64x2_t a2, float64x1_t a3) {
  // CHECK: test_vfmaq_lane_f64
  return vfmaq_lane_f64(a1, a2, a3, 0);
  // NB: the test below is deliberately lose, so that we don't depend too much
  // upon the exact IR used to select lane 1 (usually a shufflevector)
  // CHECK: llvm.fma.v2f64(<2 x double> %a2, <2 x double> {{.*}}, <2 x double> %a1)
  // CHECK-NEXT: ret
}

float32x2_t test_vfma_n_f32(float32x2_t a1, float32x2_t a2, float32_t a3) {
  // CHECK: test_vfma_n_f32
  return vfma_n_f32(a1, a2, a3);
  // NB: the test below is deliberately lose, so that we don't depend too much
  // upon the exact IR used to select lane 0 (usually two insertelements)
  // CHECK: llvm.fma.v2f32
  // CHECK-NEXT: ret
}

float32x4_t test_vfmaq_n_f32(float32x4_t a1, float32x4_t a2, float32_t a3) {
  // CHECK: test_vfmaq_n_f32
  return vfmaq_n_f32(a1, a2, a3);
  // NB: the test below is deliberately lose, so that we don't depend too much
  // upon the exact IR used to select lane 0 (usually four insertelements)
  // CHECK: llvm.fma.v4f32
  // CHECK-NEXT: ret
}

float64x2_t test_vfmaq_n_f64(float64x2_t a1, float64x2_t a2, float64_t a3) {
  // CHECK: test_vfmaq_n_f64
  return vfmaq_n_f64(a1, a2, a3);
  // NB: the test below is deliberately lose, so that we don't depend too much
  // upon the exact IR used to select lane 0 (usually two insertelements)
  // CHECK: llvm.fma.v2f64
  // CHECK-NEXT: ret
}

float32x2_t test_vfms_f32(float32x2_t a1, float32x2_t a2, float32x2_t a3) {
  // CHECK: test_vfms_f32
  return vfms_f32(a1, a2, a3);
  // CHECK: [[NEG:%.*]] = fsub <2 x float> {{.*}}, %a2
  // CHECK: llvm.fma.v2f32(<2 x float> %a3, <2 x float> [[NEG]], <2 x float> %a1)
  // CHECK-NEXT: ret
}

float32x4_t test_vfmsq_f32(float32x4_t a1, float32x4_t a2, float32x4_t a3) {
  // CHECK: test_vfmsq_f32
  return vfmsq_f32(a1, a2, a3);
  // CHECK: [[NEG:%.*]] = fsub <4 x float> {{.*}}, %a2
  // CHECK: llvm.fma.v4f32(<4 x float> %a3, <4 x float> [[NEG]], <4 x float> %a1)
  // CHECK-NEXT: ret
}

float64x2_t test_vfmsq_f64(float64x2_t a1, float64x2_t a2, float64x2_t a3) {
  // CHECK: test_vfmsq_f64
  return vfmsq_f64(a1, a2, a3);
  // CHECK: [[NEG:%.*]] = fsub <2 x double> {{.*}}, %a2
  // CHECK: llvm.fma.v2f64(<2 x double> %a3, <2 x double> [[NEG]], <2 x double> %a1)
  // CHECK-NEXT: ret
}

float32x2_t test_vfms_lane_f32(float32x2_t a1, float32x2_t a2, float32x2_t a3) {
  // CHECK: test_vfms_lane_f32
  return vfms_lane_f32(a1, a2, a3, 1);
  // NB: the test below is deliberately lose, so that we don't depend too much
  // upon the exact IR used to select lane 1 (usually a shufflevector)
  // CHECK: [[NEG:%.*]] = fsub <2 x float> {{.*}}, %a3
  // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[NEG]]
  // CHECK: llvm.fma.v2f32(<2 x float> {{.*}}, <2 x float> [[LANE]], <2 x float> %a1)
  // CHECK-NEXT: ret
}

float32x4_t test_vfmsq_lane_f32(float32x4_t a1, float32x4_t a2, float32x2_t a3) {
  // CHECK: test_vfmsq_lane_f32
  return vfmsq_lane_f32(a1, a2, a3, 1);
  // NB: the test below is deliberately lose, so that we don't depend too much
  // upon the exact IR used to select lane 1 (usually a shufflevector)
  // CHECK: [[NEG:%.*]] = fsub <2 x float> {{.*}}, %a3
  // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[NEG]]
  // CHECK: llvm.fma.v4f32(<4 x float> {{.*}}, <4 x float> [[LANE]], <4 x float> %a1)
  // CHECK-NEXT: ret
}

float64x2_t test_vfmsq_lane_f64(float64x2_t a1, float64x2_t a2, float64x1_t a3) {
  // CHECK: test_vfmsq_lane_f64
  return vfmsq_lane_f64(a1, a2, a3, 0);
  // NB: the test below is deliberately lose, so that we don't depend too much
  // upon the exact IR used to select lane 1 (usually a shufflevector)
  // CHECK: [[NEG:%.*]] = fsub <1 x double> {{.*}}, %a3
  // CHECK: [[LANE:%.*]] = shufflevector <1 x double> [[NEG]]
  // CHECK: llvm.fma.v2f64(<2 x double> {{.*}}, <2 x double> [[LANE]], <2 x double> %a1)
  // CHECK-NEXT: ret
}