diff options
Diffstat (limited to 'contrib/llvm-project/clang/lib/Headers/avx512bwintrin.h')
-rw-r--r-- | contrib/llvm-project/clang/lib/Headers/avx512bwintrin.h | 232 |
1 files changed, 111 insertions, 121 deletions
diff --git a/contrib/llvm-project/clang/lib/Headers/avx512bwintrin.h b/contrib/llvm-project/clang/lib/Headers/avx512bwintrin.h index 4281a33d375c..c854720de6a6 100644 --- a/contrib/llvm-project/clang/lib/Headers/avx512bwintrin.h +++ b/contrib/llvm-project/clang/lib/Headers/avx512bwintrin.h @@ -18,8 +18,12 @@ typedef unsigned int __mmask32; typedef unsigned long long __mmask64; /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"), __min_vector_width__(512))) -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"))) +#define __DEFAULT_FN_ATTRS512 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512bw,evex512"), __min_vector_width__(512))) +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512bw,no-evex512"))) static __inline __mmask32 __DEFAULT_FN_ATTRS _knot_mask32(__mmask32 __M) @@ -27,9 +31,7 @@ _knot_mask32(__mmask32 __M) return __builtin_ia32_knotsi(__M); } -static __inline __mmask64 __DEFAULT_FN_ATTRS -_knot_mask64(__mmask64 __M) -{ +static __inline __mmask64 __DEFAULT_FN_ATTRS _knot_mask64(__mmask64 __M) { return __builtin_ia32_knotdi(__M); } @@ -39,9 +41,8 @@ _kand_mask32(__mmask32 __A, __mmask32 __B) return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS -_kand_mask64(__mmask64 __A, __mmask64 __B) -{ +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kand_mask64(__mmask64 __A, + __mmask64 __B) { return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B); } @@ -51,9 +52,8 @@ _kandn_mask32(__mmask32 __A, __mmask32 __B) return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS -_kandn_mask64(__mmask64 __A, __mmask64 __B) -{ +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kandn_mask64(__mmask64 __A, + __mmask64 __B) { return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B); } @@ -63,9 +63,8 @@ _kor_mask32(__mmask32 __A, __mmask32 __B) return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS -_kor_mask64(__mmask64 __A, __mmask64 __B) -{ +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kor_mask64(__mmask64 __A, + __mmask64 __B) { return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B); } @@ -75,9 +74,8 @@ _kxnor_mask32(__mmask32 __A, __mmask32 __B) return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS -_kxnor_mask64(__mmask64 __A, __mmask64 __B) -{ +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kxnor_mask64(__mmask64 __A, + __mmask64 __B) { return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B); } @@ -87,9 +85,8 @@ _kxor_mask32(__mmask32 __A, __mmask32 __B) return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS -_kxor_mask64(__mmask64 __A, __mmask64 __B) -{ +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kxor_mask64(__mmask64 __A, + __mmask64 __B) { return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B); } @@ -112,14 +109,12 @@ _kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) { } static __inline__ unsigned char __DEFAULT_FN_ATTRS -_kortestc_mask64_u8(__mmask64 __A, __mmask64 __B) -{ +_kortestc_mask64_u8(__mmask64 __A, __mmask64 __B) { return (unsigned char)__builtin_ia32_kortestcdi(__A, __B); } static __inline__ unsigned char __DEFAULT_FN_ATTRS -_kortestz_mask64_u8(__mmask64 __A, __mmask64 __B) -{ +_kortestz_mask64_u8(__mmask64 __A, __mmask64 __B) { return (unsigned char)__builtin_ia32_kortestzdi(__A, __B); } @@ -148,14 +143,12 @@ _ktest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) { } static __inline__ unsigned char __DEFAULT_FN_ATTRS -_ktestc_mask64_u8(__mmask64 __A, __mmask64 __B) -{ +_ktestc_mask64_u8(__mmask64 __A, __mmask64 __B) { return (unsigned char)__builtin_ia32_ktestcdi(__A, __B); } static __inline__ unsigned char __DEFAULT_FN_ATTRS -_ktestz_mask64_u8(__mmask64 __A, __mmask64 __B) -{ +_ktestz_mask64_u8(__mmask64 __A, __mmask64 __B) { return (unsigned char)__builtin_ia32_ktestzdi(__A, __B); } @@ -171,23 +164,22 @@ _kadd_mask32(__mmask32 __A, __mmask32 __B) return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS -_kadd_mask64(__mmask64 __A, __mmask64 __B) -{ +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kadd_mask64(__mmask64 __A, + __mmask64 __B) { return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B); } #define _kshiftli_mask32(A, I) \ - (__mmask32)__builtin_ia32_kshiftlisi((__mmask32)(A), (unsigned int)(I)) + ((__mmask32)__builtin_ia32_kshiftlisi((__mmask32)(A), (unsigned int)(I))) #define _kshiftri_mask32(A, I) \ - (__mmask32)__builtin_ia32_kshiftrisi((__mmask32)(A), (unsigned int)(I)) + ((__mmask32)__builtin_ia32_kshiftrisi((__mmask32)(A), (unsigned int)(I))) #define _kshiftli_mask64(A, I) \ - (__mmask64)__builtin_ia32_kshiftlidi((__mmask64)(A), (unsigned int)(I)) + ((__mmask64)__builtin_ia32_kshiftlidi((__mmask64)(A), (unsigned int)(I))) #define _kshiftri_mask64(A, I) \ - (__mmask64)__builtin_ia32_kshiftridi((__mmask64)(A), (unsigned int)(I)) + ((__mmask64)__builtin_ia32_kshiftridi((__mmask64)(A), (unsigned int)(I))) static __inline__ unsigned int __DEFAULT_FN_ATTRS _cvtmask32_u32(__mmask32 __A) { @@ -214,8 +206,7 @@ _load_mask32(__mmask32 *__A) { return (__mmask32)__builtin_ia32_kmovd(*(__mmask32 *)__A); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS -_load_mask64(__mmask64 *__A) { +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _load_mask64(__mmask64 *__A) { return (__mmask64)__builtin_ia32_kmovq(*(__mmask64 *)__A); } @@ -224,52 +215,52 @@ _store_mask32(__mmask32 *__A, __mmask32 __B) { *(__mmask32 *)__A = __builtin_ia32_kmovd((__mmask32)__B); } -static __inline__ void __DEFAULT_FN_ATTRS -_store_mask64(__mmask64 *__A, __mmask64 __B) { +static __inline__ void __DEFAULT_FN_ATTRS _store_mask64(__mmask64 *__A, + __mmask64 __B) { *(__mmask64 *)__A = __builtin_ia32_kmovq((__mmask64)__B); } /* Integer compare */ #define _mm512_cmp_epi8_mask(a, b, p) \ - (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ - (__v64qi)(__m512i)(b), (int)(p), \ - (__mmask64)-1) + ((__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ + (__v64qi)(__m512i)(b), (int)(p), \ + (__mmask64)-1)) #define _mm512_mask_cmp_epi8_mask(m, a, b, p) \ - (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ - (__v64qi)(__m512i)(b), (int)(p), \ - (__mmask64)(m)) + ((__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ + (__v64qi)(__m512i)(b), (int)(p), \ + (__mmask64)(m))) #define _mm512_cmp_epu8_mask(a, b, p) \ - (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ - (__v64qi)(__m512i)(b), (int)(p), \ - (__mmask64)-1) + ((__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ + (__v64qi)(__m512i)(b), (int)(p), \ + (__mmask64)-1)) #define _mm512_mask_cmp_epu8_mask(m, a, b, p) \ - (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ - (__v64qi)(__m512i)(b), (int)(p), \ - (__mmask64)(m)) + ((__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ + (__v64qi)(__m512i)(b), (int)(p), \ + (__mmask64)(m))) #define _mm512_cmp_epi16_mask(a, b, p) \ - (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ - (__v32hi)(__m512i)(b), (int)(p), \ - (__mmask32)-1) + ((__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ + (__v32hi)(__m512i)(b), (int)(p), \ + (__mmask32)-1)) #define _mm512_mask_cmp_epi16_mask(m, a, b, p) \ - (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ - (__v32hi)(__m512i)(b), (int)(p), \ - (__mmask32)(m)) + ((__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ + (__v32hi)(__m512i)(b), (int)(p), \ + (__mmask32)(m))) #define _mm512_cmp_epu16_mask(a, b, p) \ - (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ - (__v32hi)(__m512i)(b), (int)(p), \ - (__mmask32)-1) + ((__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ + (__v32hi)(__m512i)(b), (int)(p), \ + (__mmask32)-1)) #define _mm512_mask_cmp_epu16_mask(m, a, b, p) \ - (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ - (__v32hi)(__m512i)(b), (int)(p), \ - (__mmask32)(m)) + ((__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ + (__v32hi)(__m512i)(b), (int)(p), \ + (__mmask32)(m))) #define _mm512_cmpeq_epi8_mask(A, B) \ _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ) @@ -485,7 +476,7 @@ _mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi8 (__m512i __A) { - return (__m512i)__builtin_ia32_pabsb512((__v64qi)__A); + return (__m512i)__builtin_elementwise_abs((__v64qs)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -507,7 +498,7 @@ _mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi16 (__m512i __A) { - return (__m512i)__builtin_ia32_pabsw512((__v32hi)__A); + return (__m512i)__builtin_elementwise_abs((__v32hi)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -617,7 +608,7 @@ _mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_adds_epi8 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_paddsb512((__v64qi)__A, (__v64qi)__B); + return (__m512i)__builtin_elementwise_add_sat((__v64qs)__A, (__v64qs)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -639,7 +630,7 @@ _mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_adds_epi16 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_paddsw512((__v32hi)__A, (__v32hi)__B); + return (__m512i)__builtin_elementwise_add_sat((__v32hi)__A, (__v32hi)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -661,7 +652,7 @@ _mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_adds_epu8 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_paddusb512((__v64qi) __A, (__v64qi) __B); + return (__m512i)__builtin_elementwise_add_sat((__v64qu) __A, (__v64qu) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -683,7 +674,7 @@ _mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_adds_epu16 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_paddusw512((__v32hi) __A, (__v32hi) __B); + return (__m512i)__builtin_elementwise_add_sat((__v32hu) __A, (__v32hu) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -751,7 +742,7 @@ _mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi8 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pmaxsb512((__v64qi) __A, (__v64qi) __B); + return (__m512i)__builtin_elementwise_max((__v64qs) __A, (__v64qs) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -773,7 +764,7 @@ _mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi16 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pmaxsw512((__v32hi) __A, (__v32hi) __B); + return (__m512i)__builtin_elementwise_max((__v32hi) __A, (__v32hi) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -796,7 +787,7 @@ _mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A, static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu8 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pmaxub512((__v64qi)__A, (__v64qi)__B); + return (__m512i)__builtin_elementwise_max((__v64qu)__A, (__v64qu)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -818,7 +809,7 @@ _mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu16 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pmaxuw512((__v32hi)__A, (__v32hi)__B); + return (__m512i)__builtin_elementwise_max((__v32hu)__A, (__v32hu)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -840,7 +831,7 @@ _mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi8 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pminsb512((__v64qi) __A, (__v64qi) __B); + return (__m512i)__builtin_elementwise_min((__v64qs) __A, (__v64qs) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -862,7 +853,7 @@ _mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi16 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pminsw512((__v32hi) __A, (__v32hi) __B); + return (__m512i)__builtin_elementwise_min((__v32hi) __A, (__v32hi) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -884,7 +875,7 @@ _mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu8 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pminub512((__v64qi)__A, (__v64qi)__B); + return (__m512i)__builtin_elementwise_min((__v64qu)__A, (__v64qu)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -906,7 +897,7 @@ _mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu16 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pminuw512((__v32hi)__A, (__v32hi)__B); + return (__m512i)__builtin_elementwise_min((__v32hu)__A, (__v32hu)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -950,7 +941,7 @@ _mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_subs_epi8 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_psubsb512((__v64qi)__A, (__v64qi)__B); + return (__m512i)__builtin_elementwise_sub_sat((__v64qs)__A, (__v64qs)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -972,7 +963,7 @@ _mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_subs_epi16 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_psubsw512((__v32hi)__A, (__v32hi)__B); + return (__m512i)__builtin_elementwise_sub_sat((__v32hi)__A, (__v32hi)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -994,7 +985,7 @@ _mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_subs_epu8 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_psubusb512((__v64qi) __A, (__v64qi) __B); + return (__m512i)__builtin_elementwise_sub_sat((__v64qu) __A, (__v64qu) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -1016,7 +1007,7 @@ _mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_subs_epu16 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_psubusw512((__v32hi) __A, (__v32hi) __B); + return (__m512i)__builtin_elementwise_sub_sat((__v32hu) __A, (__v32hu) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -1428,36 +1419,36 @@ _mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A) #define _mm512_shufflehi_epi16(A, imm) \ - (__m512i)__builtin_ia32_pshufhw512((__v32hi)(__m512i)(A), (int)(imm)) + ((__m512i)__builtin_ia32_pshufhw512((__v32hi)(__m512i)(A), (int)(imm))) #define _mm512_mask_shufflehi_epi16(W, U, A, imm) \ - (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ - (__v32hi)_mm512_shufflehi_epi16((A), \ - (imm)), \ - (__v32hi)(__m512i)(W)) + ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + (__v32hi)_mm512_shufflehi_epi16((A), \ + (imm)), \ + (__v32hi)(__m512i)(W))) #define _mm512_maskz_shufflehi_epi16(U, A, imm) \ - (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ - (__v32hi)_mm512_shufflehi_epi16((A), \ - (imm)), \ - (__v32hi)_mm512_setzero_si512()) + ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + (__v32hi)_mm512_shufflehi_epi16((A), \ + (imm)), \ + (__v32hi)_mm512_setzero_si512())) #define _mm512_shufflelo_epi16(A, imm) \ - (__m512i)__builtin_ia32_pshuflw512((__v32hi)(__m512i)(A), (int)(imm)) + ((__m512i)__builtin_ia32_pshuflw512((__v32hi)(__m512i)(A), (int)(imm))) #define _mm512_mask_shufflelo_epi16(W, U, A, imm) \ - (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ - (__v32hi)_mm512_shufflelo_epi16((A), \ - (imm)), \ - (__v32hi)(__m512i)(W)) + ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + (__v32hi)_mm512_shufflelo_epi16((A), \ + (imm)), \ + (__v32hi)(__m512i)(W))) #define _mm512_maskz_shufflelo_epi16(U, A, imm) \ - (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ - (__v32hi)_mm512_shufflelo_epi16((A), \ - (imm)), \ - (__v32hi)_mm512_setzero_si512()) + ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + (__v32hi)_mm512_shufflelo_epi16((A), \ + (imm)), \ + (__v32hi)_mm512_setzero_si512())) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sllv_epi16(__m512i __A, __m512i __B) @@ -1506,7 +1497,7 @@ _mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, __m128i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_slli_epi16(__m512i __A, unsigned int __B) { - return (__m512i)__builtin_ia32_psllwi512((__v32hi)__A, __B); + return (__m512i)__builtin_ia32_psllwi512((__v32hi)__A, (int)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -1527,7 +1518,7 @@ _mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, unsigned int __B) } #define _mm512_bslli_epi128(a, imm) \ - (__m512i)__builtin_ia32_pslldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm)) + ((__m512i)__builtin_ia32_pslldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srlv_epi16(__m512i __A, __m512i __B) @@ -1598,7 +1589,7 @@ _mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, __m128i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srai_epi16(__m512i __A, unsigned int __B) { - return (__m512i)__builtin_ia32_psrawi512((__v32hi)__A, __B); + return (__m512i)__builtin_ia32_psrawi512((__v32hi)__A, (int)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -1643,7 +1634,7 @@ _mm512_maskz_srl_epi16(__mmask32 __U, __m512i __A, __m128i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srli_epi16(__m512i __A, unsigned int __B) { - return (__m512i)__builtin_ia32_psrlwi512((__v32hi)__A, __B); + return (__m512i)__builtin_ia32_psrlwi512((__v32hi)__A, (int)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -1659,12 +1650,12 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, - (__v32hi)_mm512_srli_epi16(__A, __B), + (__v32hi)_mm512_srli_epi16(__A, (unsigned int)__B), (__v32hi)_mm512_setzero_si512()); } #define _mm512_bsrli_epi128(a, imm) \ - (__m512i)__builtin_ia32_psrldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm)) + ((__m512i)__builtin_ia32_psrldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A) @@ -1714,9 +1705,8 @@ _mm512_maskz_set1_epi8 (__mmask64 __M, char __A) (__v64qi) _mm512_setzero_si512()); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS -_mm512_kunpackd (__mmask64 __A, __mmask64 __B) -{ +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_kunpackd(__mmask64 __A, + __mmask64 __B) { return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A, (__mmask64) __B); } @@ -1984,32 +1974,32 @@ _mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A, } #define _mm512_alignr_epi8(A, B, N) \ - (__m512i)__builtin_ia32_palignr512((__v64qi)(__m512i)(A), \ - (__v64qi)(__m512i)(B), (int)(N)) + ((__m512i)__builtin_ia32_palignr512((__v64qi)(__m512i)(A), \ + (__v64qi)(__m512i)(B), (int)(N))) #define _mm512_mask_alignr_epi8(W, U, A, B, N) \ - (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ - (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \ - (__v64qi)(__m512i)(W)) + ((__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ + (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \ + (__v64qi)(__m512i)(W))) #define _mm512_maskz_alignr_epi8(U, A, B, N) \ - (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ + ((__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \ - (__v64qi)(__m512i)_mm512_setzero_si512()) + (__v64qi)(__m512i)_mm512_setzero_si512())) #define _mm512_dbsad_epu8(A, B, imm) \ - (__m512i)__builtin_ia32_dbpsadbw512((__v64qi)(__m512i)(A), \ - (__v64qi)(__m512i)(B), (int)(imm)) + ((__m512i)__builtin_ia32_dbpsadbw512((__v64qi)(__m512i)(A), \ + (__v64qi)(__m512i)(B), (int)(imm))) #define _mm512_mask_dbsad_epu8(W, U, A, B, imm) \ - (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \ - (__v32hi)(__m512i)(W)) + (__v32hi)(__m512i)(W))) #define _mm512_maskz_dbsad_epu8(U, A, B, imm) \ - (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \ - (__v32hi)_mm512_setzero_si512()) + (__v32hi)_mm512_setzero_si512())) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sad_epu8 (__m512i __A, __m512i __B) |