diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2019-08-20 20:50:49 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2019-08-20 20:50:49 +0000 |
commit | 2298981669bf3bd63335a4be179bc0f96823a8f4 (patch) | |
tree | 1cbe2eb27f030d2d70b80ee5ca3c86bee7326a9f /lib/Headers/avx512fintrin.h | |
parent | 9a83721404652cea39e9f02ae3e3b5c964602a5c (diff) | |
download | src-2298981669bf3bd63335a4be179bc0f96823a8f4.tar.gz src-2298981669bf3bd63335a4be179bc0f96823a8f4.zip |
Vendor import of stripped clang trunk r366426 (just before thevendor/clang/clang-trunk-r366426
release_90 branch point):
https://llvm.org/svn/llvm-project/cfe/trunk@366426
Notes
Notes:
svn path=/vendor/clang/dist/; revision=351280
svn path=/vendor/clang/clang-trunk-r366426/; revision=351281; tag=vendor/clang/clang-trunk-r366426
Diffstat (limited to 'lib/Headers/avx512fintrin.h')
-rw-r--r-- | lib/Headers/avx512fintrin.h | 115 |
1 files changed, 47 insertions, 68 deletions
diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h index 1c19993ff1bb..132761f9ef5c 100644 --- a/lib/Headers/avx512fintrin.h +++ b/lib/Headers/avx512fintrin.h @@ -1,22 +1,8 @@ /*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ @@ -40,9 +26,13 @@ typedef unsigned short __v32hu __attribute__((__vector_size__(64))); typedef unsigned long long __v8du __attribute__((__vector_size__(64))); typedef unsigned int __v16su __attribute__((__vector_size__(64))); -typedef float __m512 __attribute__((__vector_size__(64))); -typedef double __m512d __attribute__((__vector_size__(64))); -typedef long long __m512i __attribute__((__vector_size__(64))); +typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64))); +typedef double __m512d __attribute__((__vector_size__(64), __aligned__(64))); +typedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64))); + +typedef float __m512_u __attribute__((__vector_size__(64), __aligned__(1))); +typedef double __m512d_u __attribute__((__vector_size__(64), __aligned__(1))); +typedef long long __m512i_u __attribute__((__vector_size__(64), __aligned__(1))); typedef unsigned char __mmask8; typedef unsigned short __mmask16; @@ -1991,12 +1981,12 @@ _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) { #define _mm512_mask_add_round_pd(W, U, A, B, R) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_add_round_pd((A), (B), (R)), \ - (__v8df)(__m512d)(W)); + (__v8df)(__m512d)(W)) #define _mm512_maskz_add_round_pd(U, A, B, R) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_add_round_pd((A), (B), (R)), \ - (__v8df)_mm512_setzero_pd()); + (__v8df)_mm512_setzero_pd()) #define _mm512_add_round_ps(A, B, R) \ (__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \ @@ -2005,12 +1995,12 @@ _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) { #define _mm512_mask_add_round_ps(W, U, A, B, R) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_add_round_ps((A), (B), (R)), \ - (__v16sf)(__m512)(W)); + (__v16sf)(__m512)(W)) #define _mm512_maskz_add_round_ps(U, A, B, R) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_add_round_ps((A), (B), (R)), \ - (__v16sf)_mm512_setzero_ps()); + (__v16sf)_mm512_setzero_ps()) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { @@ -2106,12 +2096,12 @@ _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) { #define _mm512_mask_sub_round_pd(W, U, A, B, R) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_sub_round_pd((A), (B), (R)), \ - (__v8df)(__m512d)(W)); + (__v8df)(__m512d)(W)) #define _mm512_maskz_sub_round_pd(U, A, B, R) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_sub_round_pd((A), (B), (R)), \ - (__v8df)_mm512_setzero_pd()); + (__v8df)_mm512_setzero_pd()) #define _mm512_sub_round_ps(A, B, R) \ (__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \ @@ -2120,12 +2110,12 @@ _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) { #define _mm512_mask_sub_round_ps(W, U, A, B, R) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \ - (__v16sf)(__m512)(W)); + (__v16sf)(__m512)(W)) #define _mm512_maskz_sub_round_ps(U, A, B, R) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \ - (__v16sf)_mm512_setzero_ps()); + (__v16sf)_mm512_setzero_ps()) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { @@ -2221,12 +2211,12 @@ _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) { #define _mm512_mask_mul_round_pd(W, U, A, B, R) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_mul_round_pd((A), (B), (R)), \ - (__v8df)(__m512d)(W)); + (__v8df)(__m512d)(W)) #define _mm512_maskz_mul_round_pd(U, A, B, R) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_mul_round_pd((A), (B), (R)), \ - (__v8df)_mm512_setzero_pd()); + (__v8df)_mm512_setzero_pd()) #define _mm512_mul_round_ps(A, B, R) \ (__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \ @@ -2235,12 +2225,12 @@ _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) { #define _mm512_mask_mul_round_ps(W, U, A, B, R) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \ - (__v16sf)(__m512)(W)); + (__v16sf)(__m512)(W)) #define _mm512_maskz_mul_round_ps(U, A, B, R) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \ - (__v16sf)_mm512_setzero_ps()); + (__v16sf)_mm512_setzero_ps()) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { @@ -2349,12 +2339,12 @@ _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) { #define _mm512_mask_div_round_pd(W, U, A, B, R) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_div_round_pd((A), (B), (R)), \ - (__v8df)(__m512d)(W)); + (__v8df)(__m512d)(W)) #define _mm512_maskz_div_round_pd(U, A, B, R) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_div_round_pd((A), (B), (R)), \ - (__v8df)_mm512_setzero_pd()); + (__v8df)_mm512_setzero_pd()) #define _mm512_div_round_ps(A, B, R) \ (__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \ @@ -2363,12 +2353,12 @@ _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) { #define _mm512_mask_div_round_ps(W, U, A, B, R) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_div_round_ps((A), (B), (R)), \ - (__v16sf)(__m512)(W)); + (__v16sf)(__m512)(W)) #define _mm512_maskz_div_round_ps(U, A, B, R) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_div_round_ps((A), (B), (R)), \ - (__v16sf)_mm512_setzero_ps()); + (__v16sf)_mm512_setzero_ps()) #define _mm512_roundscale_ps(A, B) \ (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \ @@ -3789,20 +3779,9 @@ _mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A) (__v16hi)_mm256_setzero_si256(), \ (__mmask16)(W)) -#define _mm512_cvtps_ph(A, I) \ - (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ - (__v16hi)_mm256_setzero_si256(), \ - (__mmask16)-1) - -#define _mm512_mask_cvtps_ph(U, W, A, I) \ - (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ - (__v16hi)(__m256i)(U), \ - (__mmask16)(W)) - -#define _mm512_maskz_cvtps_ph(W, A, I) \ - (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ - (__v16hi)_mm256_setzero_si256(), \ - (__mmask16)(W)) +#define _mm512_cvtps_ph _mm512_cvt_roundps_ph +#define _mm512_mask_cvtps_ph _mm512_mask_cvt_roundps_ph +#define _mm512_maskz_cvtps_ph _mm512_maskz_cvt_roundps_ph #define _mm512_cvt_roundph_ps(A, R) \ (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ @@ -4324,7 +4303,7 @@ static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_si512 (void const *__P) { struct __loadu_si512 { - __m512i __v; + __m512i_u __v; } __attribute__((__packed__, __may_alias__)); return ((struct __loadu_si512*)__P)->__v; } @@ -4333,7 +4312,7 @@ static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi32 (void const *__P) { struct __loadu_epi32 { - __m512i __v; + __m512i_u __v; } __attribute__((__packed__, __may_alias__)); return ((struct __loadu_epi32*)__P)->__v; } @@ -4360,7 +4339,7 @@ static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi64 (void const *__P) { struct __loadu_epi64 { - __m512i __v; + __m512i_u __v; } __attribute__((__packed__, __may_alias__)); return ((struct __loadu_epi64*)__P)->__v; } @@ -4420,7 +4399,7 @@ static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_loadu_pd(void const *__p) { struct __loadu_pd { - __m512d __v; + __m512d_u __v; } __attribute__((__packed__, __may_alias__)); return ((struct __loadu_pd*)__p)->__v; } @@ -4429,7 +4408,7 @@ static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_loadu_ps(void const *__p) { struct __loadu_ps { - __m512 __v; + __m512_u __v; } __attribute__((__packed__, __may_alias__)); return ((struct __loadu_ps*)__p)->__v; } @@ -4504,7 +4483,7 @@ static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi64 (void *__P, __m512i __A) { struct __storeu_epi64 { - __m512i __v; + __m512i_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_epi64*)__P)->__v = __A; } @@ -4520,7 +4499,7 @@ static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_si512 (void *__P, __m512i __A) { struct __storeu_si512 { - __m512i __v; + __m512i_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_si512*)__P)->__v = __A; } @@ -4529,7 +4508,7 @@ static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi32 (void *__P, __m512i __A) { struct __storeu_epi32 { - __m512i __v; + __m512i_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_epi32*)__P)->__v = __A; } @@ -4551,7 +4530,7 @@ static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_pd(void *__P, __m512d __A) { struct __storeu_pd { - __m512d __v; + __m512d_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_pd*)__P)->__v = __A; } @@ -4567,7 +4546,7 @@ static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_ps(void *__P, __m512 __A) { struct __storeu_ps { - __m512 __v; + __m512_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_ps*)__P)->__v = __A; } @@ -9329,7 +9308,7 @@ _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A) __v2du __t6 = __t4 op __t5; \ __v2du __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \ __v2du __t8 = __t6 op __t7; \ - return __t8[0]; + return __t8[0] static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W) { _mm512_mask_reduce_operator(+); @@ -9381,7 +9360,7 @@ _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) { __m128d __t6 = __t4 op __t5; \ __m128d __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \ __m128d __t8 = __t6 op __t7; \ - return __t8[0]; + return __t8[0] static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W) { _mm512_mask_reduce_operator(+); @@ -9415,7 +9394,7 @@ _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) { __v4su __t8 = __t6 op __t7; \ __v4su __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \ __v4su __t10 = __t8 op __t9; \ - return __t10[0]; + return __t10[0] static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi32(__m512i __W) { @@ -9473,7 +9452,7 @@ _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) { __m128 __t8 = __t6 op __t7; \ __m128 __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \ __m128 __t10 = __t8 op __t9; \ - return __t10[0]; + return __t10[0] static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_add_ps(__m512 __W) { @@ -9505,7 +9484,7 @@ _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) { __m512i __t4 = _mm512_##op(__t2, __t3); \ __m512i __t5 = (__m512i)__builtin_shufflevector((__v8di)__t4, (__v8di)__t4, 1, 0, 3, 2, 5, 4, 7, 6); \ __v8di __t6 = (__v8di)_mm512_##op(__t4, __t5); \ - return __t6[0]; + return __t6[0] static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epi64(__m512i __V) { @@ -9563,7 +9542,7 @@ _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) { __m128i __t8 = _mm_##op(__t6, __t7); \ __m128i __t9 = (__m128i)__builtin_shufflevector((__v4si)__t8, (__v4si)__t8, 1, 0, 3, 2); \ __v4si __t10 = (__v4si)_mm_##op(__t8, __t9); \ - return __t10[0]; + return __t10[0] static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epi32(__m512i __V) { @@ -9619,7 +9598,7 @@ _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) { __m128d __t6 = _mm_##op(__t4, __t5); \ __m128d __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \ __m128d __t8 = _mm_##op(__t6, __t7); \ - return __t8[0]; + return __t8[0] static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_max_pd(__m512d __V) { @@ -9655,7 +9634,7 @@ _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) { __m128 __t8 = _mm_##op(__t6, __t7); \ __m128 __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \ __m128 __t10 = _mm_##op(__t8, __t9); \ - return __t10[0]; + return __t10[0] static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_max_ps(__m512 __V) { |