test/CodeGen/arm64_vMaxMin.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207

// RUN: %clang_cc1 -O1 -triple arm64-apple-ios7 -target-feature +neon -ffreestanding -S -o - -emit-llvm %s | FileCheck %s
// RUN: %clang_cc1 -O1 -triple arm64-apple-ios7 -target-feature +neon -ffreestanding -S -o - %s | FileCheck -check-prefix=CHECK-CODEGEN %s
// REQUIRES: aarch64-registered-target
// Test ARM64 SIMD max/min intrinsics

#include <arm_neon.h>

// Test a represntative sample of 8 and 16, signed and unsigned, 64 and 128 bit reduction
int8_t test_vmaxv_s8(int8x8_t a1) {
  // CHECK-LABEL: define i8 @test_vmaxv_s8(
  return vmaxv_s8(a1);
  // CHECK: call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(
}

uint16_t test_vminvq_u16(uint16x8_t a1) {
  // CHECK-LABEL: define i16 @test_vminvq_u16(
  return vminvq_u16(a1);
  // CHECK: call i32 @llvm.aarch64.neon.uminv.i32.v8i16(
}

// Test a represntative sample of 8 and 16, signed and unsigned, 64 and 128 bit pairwise
uint8x8_t test_vmin_u8(uint8x8_t a1, uint8x8_t a2) {
  // CHECK-LABEL: define <8 x i8> @test_vmin_u8(
  return vmin_u8(a1, a2);
  // CHECK: call <8 x i8> @llvm.aarch64.neon.umin.v8i8(
}

uint8x16_t test_vminq_u8(uint8x16_t a1, uint8x16_t a2) {
  // CHECK-LABEL: define <16 x i8> @test_vminq_u8(
  return vminq_u8(a1, a2);
  // CHECK: call <16 x i8> @llvm.aarch64.neon.umin.v16i8(
}

int16x8_t test_vmaxq_s16(int16x8_t a1, int16x8_t a2) {
  // CHECK-LABEL: define <8 x i16> @test_vmaxq_s16(
  return vmaxq_s16(a1, a2);
  // CHECK: call <8 x i16> @llvm.aarch64.neon.smax.v8i16(
}

// Test the more complicated cases of [suf]32 and f64
float64x2_t test_vmaxq_f64(float64x2_t a1, float64x2_t a2) {
  // CHECK-LABEL: define <2 x double> @test_vmaxq_f64(
  return vmaxq_f64(a1, a2);
  // CHECK: call <2 x double> @llvm.aarch64.neon.fmax.v2f64(
}

float32x4_t test_vmaxq_f32(float32x4_t a1, float32x4_t a2) {
  // CHECK-LABEL: define <4 x float> @test_vmaxq_f32(
  return vmaxq_f32(a1, a2);
  // CHECK: call <4 x float> @llvm.aarch64.neon.fmax.v4f32(
}

float64x2_t test_vminq_f64(float64x2_t a1, float64x2_t a2) {
  // CHECK-LABEL: define <2 x double> @test_vminq_f64(
  return vminq_f64(a1, a2);
  // CHECK: call <2 x double> @llvm.aarch64.neon.fmin.v2f64(
}

float32x2_t test_vmax_f32(float32x2_t a1, float32x2_t a2) {
  // CHECK-LABEL: define <2 x float> @test_vmax_f32(
  return vmax_f32(a1, a2);
  // CHECK: call <2 x float> @llvm.aarch64.neon.fmax.v2f32(
}

int32x2_t test_vmax_s32(int32x2_t a1, int32x2_t a2) {
  // CHECK-LABEL: define <2 x i32> @test_vmax_s32(
  return vmax_s32(a1, a2);
  // CHECK: call <2 x i32> @llvm.aarch64.neon.smax.v2i32(
}

uint32x2_t test_vmin_u32(uint32x2_t a1, uint32x2_t a2) {
  // CHECK-LABEL: define <2 x i32> @test_vmin_u32(
  return vmin_u32(a1, a2);
  // CHECK: call <2 x i32> @llvm.aarch64.neon.umin.v2i32(
}

float32_t test_vmaxnmv_f32(float32x2_t a1) {
  // CHECK-LABEL: define float @test_vmaxnmv_f32(
  return vmaxnmv_f32(a1);
  // CHECK: llvm.aarch64.neon.fmaxnmv.f32.v2f32
  // CHECK-NEXT: ret
}

// this doesn't translate into a valid instruction, regardless of what the
// ARM doc says.
#if 0
float64_t test_vmaxnmvq_f64(float64x2_t a1) {
  // CHECK@ test_vmaxnmvq_f64
  return vmaxnmvq_f64(a1);
  // CHECK@ llvm.aarch64.neon.saddlv.i64.v2i32
  // CHECK-NEXT@ ret
}
#endif

float32_t test_vmaxnmvq_f32(float32x4_t a1) {
  // CHECK-LABEL: define float @test_vmaxnmvq_f32(
  return vmaxnmvq_f32(a1);
  // CHECK: call float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(
  // CHECK-NEXT: ret
}

float32_t test_vmaxv_f32(float32x2_t a1) {
  // CHECK-LABEL: define float @test_vmaxv_f32(
  return vmaxv_f32(a1);
  // CHECK: call float @llvm.aarch64.neon.fmaxv.f32.v2f32(
  // FIXME check that the 2nd and 3rd arguments are the same V register below
  // CHECK-CODEGEN: fmaxp.2s
  // CHECK-NEXT: ret
}

int32_t test_vmaxv_s32(int32x2_t a1) {
  // CHECK-LABEL: define i32 @test_vmaxv_s32(
  return vmaxv_s32(a1);
  // CHECK: call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(
  // FIXME check that the 2nd and 3rd arguments are the same V register below
  // CHECK-CODEGEN: smaxp.2s
  // CHECK-NEXT: ret
}

uint32_t test_vmaxv_u32(uint32x2_t a1) {
  // CHECK-LABEL: define i32 @test_vmaxv_u32(
  return vmaxv_u32(a1);
  // CHECK: call i32 @llvm.aarch64.neon.umaxv.i32.v2i32(
  // FIXME check that the 2nd and 3rd arguments are the same V register below
  // CHECK-CODEGEN: umaxp.2s
  // CHECK-NEXT: ret
}

// FIXME punt on this for now; don't forget to fix CHECKs
#if 0
float64_t test_vmaxvq_f64(float64x2_t a1) {
  // CHECK@ test_vmaxvq_f64
  return vmaxvq_f64(a1);
  // CHECK@ llvm.aarch64.neon.fmaxv.i64.v2f64
  // CHECK-NEXT@ ret
}
#endif

float32_t test_vmaxvq_f32(float32x4_t a1) {
  // CHECK-LABEL: define float @test_vmaxvq_f32(
  return vmaxvq_f32(a1);
  // CHECK: call float @llvm.aarch64.neon.fmaxv.f32.v4f32(
  // CHECK-NEXT: ret
}

float32_t test_vminnmv_f32(float32x2_t a1) {
  // CHECK-LABEL: define float @test_vminnmv_f32(
  return vminnmv_f32(a1);
  // CHECK: call float @llvm.aarch64.neon.fminnmv.f32.v2f32(
  // CHECK-NEXT: ret
}

float32_t test_vminvq_f32(float32x4_t a1) {
  // CHECK-LABEL: define float @test_vminvq_f32(
  return vminvq_f32(a1);
  // CHECK: call float @llvm.aarch64.neon.fminv.f32.v4f32(
  // CHECK-NEXT: ret
}

// this doesn't translate into a valid instruction, regardless of what the ARM
// doc says.
#if 0
float64_t test_vminnmvq_f64(float64x2_t a1) {
  // CHECK@ test_vminnmvq_f64
  return vminnmvq_f64(a1);
  // CHECK@ llvm.aarch64.neon.saddlv.i64.v2i32
  // CHECK-NEXT@ ret
}
#endif

float32_t test_vminnmvq_f32(float32x4_t a1) {
  // CHECK-LABEL: define float @test_vminnmvq_f32(
  return vminnmvq_f32(a1);
  // CHECK: call float @llvm.aarch64.neon.fminnmv.f32.v4f32(
  // CHECK-NEXT: ret
}

float32_t test_vminv_f32(float32x2_t a1) {
  // CHECK-LABEL: define float @test_vminv_f32(
  return vminv_f32(a1);
  // CHECK: call float @llvm.aarch64.neon.fminv.f32.v2f32(
  // CHECK-NEXT: ret
}

int32_t test_vminv_s32(int32x2_t a1) {
  // CHECK-LABEL: define i32 @test_vminv_s32(
  return vminv_s32(a1);
  // CHECK: call i32 @llvm.aarch64.neon.sminv.i32.v2i32(
  // CHECK-CODEGEN: sminp.2s
  // CHECK-NEXT: ret
}

uint32_t test_vminv_u32(uint32x2_t a1) {
  // CHECK-LABEL: define i32 @test_vminv_u32(
  return vminv_u32(a1);
  // CHECK: call i32 @llvm.aarch64.neon.uminv.i32.v2i32(
}

// FIXME punt on this for now; don't forget to fix CHECKs
#if 0
float64_t test_vminvq_f64(float64x2_t a1) {
  // CHECK@ test_vminvq_f64
  return vminvq_f64(a1);
  // CHECK@ llvm.aarch64.neon.saddlv.i64.v2i32
  // CHECK-NEXT@ ret
}
#endif