aboutsummaryrefslogtreecommitdiff
path: root/lib/Headers/avx512fintrin.h
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2019-08-20 20:50:49 +0000
committerDimitry Andric <dim@FreeBSD.org>2019-08-20 20:50:49 +0000
commit2298981669bf3bd63335a4be179bc0f96823a8f4 (patch)
tree1cbe2eb27f030d2d70b80ee5ca3c86bee7326a9f /lib/Headers/avx512fintrin.h
parent9a83721404652cea39e9f02ae3e3b5c964602a5c (diff)
downloadsrc-2298981669bf3bd63335a4be179bc0f96823a8f4.tar.gz
src-2298981669bf3bd63335a4be179bc0f96823a8f4.zip
Vendor import of stripped clang trunk r366426 (just before thevendor/clang/clang-trunk-r366426
Notes
Notes: svn path=/vendor/clang/dist/; revision=351280 svn path=/vendor/clang/clang-trunk-r366426/; revision=351281; tag=vendor/clang/clang-trunk-r366426
Diffstat (limited to 'lib/Headers/avx512fintrin.h')
-rw-r--r--lib/Headers/avx512fintrin.h115
1 files changed, 47 insertions, 68 deletions
diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h
index 1c19993ff1bb..132761f9ef5c 100644
--- a/lib/Headers/avx512fintrin.h
+++ b/lib/Headers/avx512fintrin.h
@@ -1,22 +1,8 @@
/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
*
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
@@ -40,9 +26,13 @@ typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
typedef unsigned int __v16su __attribute__((__vector_size__(64)));
-typedef float __m512 __attribute__((__vector_size__(64)));
-typedef double __m512d __attribute__((__vector_size__(64)));
-typedef long long __m512i __attribute__((__vector_size__(64)));
+typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64)));
+typedef double __m512d __attribute__((__vector_size__(64), __aligned__(64)));
+typedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64)));
+
+typedef float __m512_u __attribute__((__vector_size__(64), __aligned__(1)));
+typedef double __m512d_u __attribute__((__vector_size__(64), __aligned__(1)));
+typedef long long __m512i_u __attribute__((__vector_size__(64), __aligned__(1)));
typedef unsigned char __mmask8;
typedef unsigned short __mmask16;
@@ -1991,12 +1981,12 @@ _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
#define _mm512_mask_add_round_pd(W, U, A, B, R) \
(__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_add_round_pd((A), (B), (R)), \
- (__v8df)(__m512d)(W));
+ (__v8df)(__m512d)(W))
#define _mm512_maskz_add_round_pd(U, A, B, R) \
(__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_add_round_pd((A), (B), (R)), \
- (__v8df)_mm512_setzero_pd());
+ (__v8df)_mm512_setzero_pd())
#define _mm512_add_round_ps(A, B, R) \
(__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \
@@ -2005,12 +1995,12 @@ _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
#define _mm512_mask_add_round_ps(W, U, A, B, R) \
(__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_add_round_ps((A), (B), (R)), \
- (__v16sf)(__m512)(W));
+ (__v16sf)(__m512)(W))
#define _mm512_maskz_add_round_ps(U, A, B, R) \
(__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_add_round_ps((A), (B), (R)), \
- (__v16sf)_mm512_setzero_ps());
+ (__v16sf)_mm512_setzero_ps())
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
@@ -2106,12 +2096,12 @@ _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
#define _mm512_mask_sub_round_pd(W, U, A, B, R) \
(__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_sub_round_pd((A), (B), (R)), \
- (__v8df)(__m512d)(W));
+ (__v8df)(__m512d)(W))
#define _mm512_maskz_sub_round_pd(U, A, B, R) \
(__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_sub_round_pd((A), (B), (R)), \
- (__v8df)_mm512_setzero_pd());
+ (__v8df)_mm512_setzero_pd())
#define _mm512_sub_round_ps(A, B, R) \
(__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \
@@ -2120,12 +2110,12 @@ _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
#define _mm512_mask_sub_round_ps(W, U, A, B, R) \
(__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
- (__v16sf)(__m512)(W));
+ (__v16sf)(__m512)(W))
#define _mm512_maskz_sub_round_ps(U, A, B, R) \
(__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
- (__v16sf)_mm512_setzero_ps());
+ (__v16sf)_mm512_setzero_ps())
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
@@ -2221,12 +2211,12 @@ _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
#define _mm512_mask_mul_round_pd(W, U, A, B, R) \
(__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_mul_round_pd((A), (B), (R)), \
- (__v8df)(__m512d)(W));
+ (__v8df)(__m512d)(W))
#define _mm512_maskz_mul_round_pd(U, A, B, R) \
(__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_mul_round_pd((A), (B), (R)), \
- (__v8df)_mm512_setzero_pd());
+ (__v8df)_mm512_setzero_pd())
#define _mm512_mul_round_ps(A, B, R) \
(__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \
@@ -2235,12 +2225,12 @@ _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
#define _mm512_mask_mul_round_ps(W, U, A, B, R) \
(__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
- (__v16sf)(__m512)(W));
+ (__v16sf)(__m512)(W))
#define _mm512_maskz_mul_round_ps(U, A, B, R) \
(__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
- (__v16sf)_mm512_setzero_ps());
+ (__v16sf)_mm512_setzero_ps())
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
@@ -2349,12 +2339,12 @@ _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
#define _mm512_mask_div_round_pd(W, U, A, B, R) \
(__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_div_round_pd((A), (B), (R)), \
- (__v8df)(__m512d)(W));
+ (__v8df)(__m512d)(W))
#define _mm512_maskz_div_round_pd(U, A, B, R) \
(__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_div_round_pd((A), (B), (R)), \
- (__v8df)_mm512_setzero_pd());
+ (__v8df)_mm512_setzero_pd())
#define _mm512_div_round_ps(A, B, R) \
(__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \
@@ -2363,12 +2353,12 @@ _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
#define _mm512_mask_div_round_ps(W, U, A, B, R) \
(__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_div_round_ps((A), (B), (R)), \
- (__v16sf)(__m512)(W));
+ (__v16sf)(__m512)(W))
#define _mm512_maskz_div_round_ps(U, A, B, R) \
(__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_div_round_ps((A), (B), (R)), \
- (__v16sf)_mm512_setzero_ps());
+ (__v16sf)_mm512_setzero_ps())
#define _mm512_roundscale_ps(A, B) \
(__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
@@ -3789,20 +3779,9 @@ _mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
(__v16hi)_mm256_setzero_si256(), \
(__mmask16)(W))
-#define _mm512_cvtps_ph(A, I) \
- (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
- (__v16hi)_mm256_setzero_si256(), \
- (__mmask16)-1)
-
-#define _mm512_mask_cvtps_ph(U, W, A, I) \
- (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
- (__v16hi)(__m256i)(U), \
- (__mmask16)(W))
-
-#define _mm512_maskz_cvtps_ph(W, A, I) \
- (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
- (__v16hi)_mm256_setzero_si256(), \
- (__mmask16)(W))
+#define _mm512_cvtps_ph _mm512_cvt_roundps_ph
+#define _mm512_mask_cvtps_ph _mm512_mask_cvt_roundps_ph
+#define _mm512_maskz_cvtps_ph _mm512_maskz_cvt_roundps_ph
#define _mm512_cvt_roundph_ps(A, R) \
(__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
@@ -4324,7 +4303,7 @@ static __inline __m512i __DEFAULT_FN_ATTRS512
_mm512_loadu_si512 (void const *__P)
{
struct __loadu_si512 {
- __m512i __v;
+ __m512i_u __v;
} __attribute__((__packed__, __may_alias__));
return ((struct __loadu_si512*)__P)->__v;
}
@@ -4333,7 +4312,7 @@ static __inline __m512i __DEFAULT_FN_ATTRS512
_mm512_loadu_epi32 (void const *__P)
{
struct __loadu_epi32 {
- __m512i __v;
+ __m512i_u __v;
} __attribute__((__packed__, __may_alias__));
return ((struct __loadu_epi32*)__P)->__v;
}
@@ -4360,7 +4339,7 @@ static __inline __m512i __DEFAULT_FN_ATTRS512
_mm512_loadu_epi64 (void const *__P)
{
struct __loadu_epi64 {
- __m512i __v;
+ __m512i_u __v;
} __attribute__((__packed__, __may_alias__));
return ((struct __loadu_epi64*)__P)->__v;
}
@@ -4420,7 +4399,7 @@ static __inline __m512d __DEFAULT_FN_ATTRS512
_mm512_loadu_pd(void const *__p)
{
struct __loadu_pd {
- __m512d __v;
+ __m512d_u __v;
} __attribute__((__packed__, __may_alias__));
return ((struct __loadu_pd*)__p)->__v;
}
@@ -4429,7 +4408,7 @@ static __inline __m512 __DEFAULT_FN_ATTRS512
_mm512_loadu_ps(void const *__p)
{
struct __loadu_ps {
- __m512 __v;
+ __m512_u __v;
} __attribute__((__packed__, __may_alias__));
return ((struct __loadu_ps*)__p)->__v;
}
@@ -4504,7 +4483,7 @@ static __inline void __DEFAULT_FN_ATTRS512
_mm512_storeu_epi64 (void *__P, __m512i __A)
{
struct __storeu_epi64 {
- __m512i __v;
+ __m512i_u __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_epi64*)__P)->__v = __A;
}
@@ -4520,7 +4499,7 @@ static __inline void __DEFAULT_FN_ATTRS512
_mm512_storeu_si512 (void *__P, __m512i __A)
{
struct __storeu_si512 {
- __m512i __v;
+ __m512i_u __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_si512*)__P)->__v = __A;
}
@@ -4529,7 +4508,7 @@ static __inline void __DEFAULT_FN_ATTRS512
_mm512_storeu_epi32 (void *__P, __m512i __A)
{
struct __storeu_epi32 {
- __m512i __v;
+ __m512i_u __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_epi32*)__P)->__v = __A;
}
@@ -4551,7 +4530,7 @@ static __inline void __DEFAULT_FN_ATTRS512
_mm512_storeu_pd(void *__P, __m512d __A)
{
struct __storeu_pd {
- __m512d __v;
+ __m512d_u __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_pd*)__P)->__v = __A;
}
@@ -4567,7 +4546,7 @@ static __inline void __DEFAULT_FN_ATTRS512
_mm512_storeu_ps(void *__P, __m512 __A)
{
struct __storeu_ps {
- __m512 __v;
+ __m512_u __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_ps*)__P)->__v = __A;
}
@@ -9329,7 +9308,7 @@ _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
__v2du __t6 = __t4 op __t5; \
__v2du __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \
__v2du __t8 = __t6 op __t7; \
- return __t8[0];
+ return __t8[0]
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W) {
_mm512_mask_reduce_operator(+);
@@ -9381,7 +9360,7 @@ _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) {
__m128d __t6 = __t4 op __t5; \
__m128d __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \
__m128d __t8 = __t6 op __t7; \
- return __t8[0];
+ return __t8[0]
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W) {
_mm512_mask_reduce_operator(+);
@@ -9415,7 +9394,7 @@ _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) {
__v4su __t8 = __t6 op __t7; \
__v4su __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \
__v4su __t10 = __t8 op __t9; \
- return __t10[0];
+ return __t10[0]
static __inline__ int __DEFAULT_FN_ATTRS512
_mm512_reduce_add_epi32(__m512i __W) {
@@ -9473,7 +9452,7 @@ _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) {
__m128 __t8 = __t6 op __t7; \
__m128 __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \
__m128 __t10 = __t8 op __t9; \
- return __t10[0];
+ return __t10[0]
static __inline__ float __DEFAULT_FN_ATTRS512
_mm512_reduce_add_ps(__m512 __W) {
@@ -9505,7 +9484,7 @@ _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) {
__m512i __t4 = _mm512_##op(__t2, __t3); \
__m512i __t5 = (__m512i)__builtin_shufflevector((__v8di)__t4, (__v8di)__t4, 1, 0, 3, 2, 5, 4, 7, 6); \
__v8di __t6 = (__v8di)_mm512_##op(__t4, __t5); \
- return __t6[0];
+ return __t6[0]
static __inline__ long long __DEFAULT_FN_ATTRS512
_mm512_reduce_max_epi64(__m512i __V) {
@@ -9563,7 +9542,7 @@ _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) {
__m128i __t8 = _mm_##op(__t6, __t7); \
__m128i __t9 = (__m128i)__builtin_shufflevector((__v4si)__t8, (__v4si)__t8, 1, 0, 3, 2); \
__v4si __t10 = (__v4si)_mm_##op(__t8, __t9); \
- return __t10[0];
+ return __t10[0]
static __inline__ int __DEFAULT_FN_ATTRS512
_mm512_reduce_max_epi32(__m512i __V) {
@@ -9619,7 +9598,7 @@ _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) {
__m128d __t6 = _mm_##op(__t4, __t5); \
__m128d __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \
__m128d __t8 = _mm_##op(__t6, __t7); \
- return __t8[0];
+ return __t8[0]
static __inline__ double __DEFAULT_FN_ATTRS512
_mm512_reduce_max_pd(__m512d __V) {
@@ -9655,7 +9634,7 @@ _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) {
__m128 __t8 = _mm_##op(__t6, __t7); \
__m128 __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \
__m128 __t10 = _mm_##op(__t8, __t9); \
- return __t10[0];
+ return __t10[0]
static __inline__ float __DEFAULT_FN_ATTRS512
_mm512_reduce_max_ps(__m512 __V) {