diff options
Diffstat (limited to 'contrib/llvm-project/clang/lib/Headers/xmmintrin.h')
-rw-r--r-- | contrib/llvm-project/clang/lib/Headers/xmmintrin.h | 43 |
1 files changed, 28 insertions, 15 deletions
diff --git a/contrib/llvm-project/clang/lib/Headers/xmmintrin.h b/contrib/llvm-project/clang/lib/Headers/xmmintrin.h index f4686691c7ed..47368f3c23d2 100644 --- a/contrib/llvm-project/clang/lib/Headers/xmmintrin.h +++ b/contrib/llvm-project/clang/lib/Headers/xmmintrin.h @@ -10,6 +10,10 @@ #ifndef __XMMINTRIN_H #define __XMMINTRIN_H +#if !defined(__i386__) && !defined(__x86_64__) +#error "This header is only meant to be used on x86 and x64 architecture" +#endif + #include <mmintrin.h> typedef int __v4si __attribute__((__vector_size__(16))); @@ -28,8 +32,12 @@ typedef unsigned int __v4su __attribute__((__vector_size__(16))); #endif /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse"), __min_vector_width__(128))) -#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,sse"), __min_vector_width__(64))) +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("sse,no-evex512"), \ + __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS_MMX \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("mmx,sse,no-evex512"), __min_vector_width__(64))) /// Adds the 32-bit float values in the low-order bits of the operands. /// @@ -1902,7 +1910,7 @@ _mm_setr_ps(float __z, float __y, float __x, float __w) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_setzero_ps(void) { - return __extension__ (__m128){ 0, 0, 0, 0 }; + return __extension__ (__m128){ 0.0f, 0.0f, 0.0f, 0.0f }; } /// Stores the upper 64 bits of a 128-bit vector of [4 x float] to a @@ -2082,7 +2090,7 @@ _mm_storer_ps(float *__p, __m128 __a) /// \headerfile <x86intrin.h> /// /// \code -/// void _mm_prefetch(const void * a, const int sel); +/// void _mm_prefetch(const void *a, const int sel); /// \endcode /// /// This intrinsic corresponds to the <c> PREFETCHNTA </c> instruction. @@ -2117,9 +2125,9 @@ _mm_storer_ps(float *__p, __m128 __a) /// \param __a /// A 64-bit integer containing the value to be stored. static __inline__ void __DEFAULT_FN_ATTRS_MMX -_mm_stream_pi(__m64 *__p, __m64 __a) +_mm_stream_pi(void *__p, __m64 __a) { - __builtin_ia32_movntq(__p, __a); + __builtin_ia32_movntq((__m64 *)__p, __a); } /// Moves packed float values from a 128-bit vector of [4 x float] to a @@ -2136,7 +2144,7 @@ _mm_stream_pi(__m64 *__p, __m64 __a) /// \param __a /// A 128-bit vector of [4 x float] containing the values to be moved. static __inline__ void __DEFAULT_FN_ATTRS -_mm_stream_ps(float *__p, __m128 __a) +_mm_stream_ps(void *__p, __m128 __a) { __builtin_nontemporal_store((__v4sf)__a, (__v4sf*)__p); } @@ -2181,7 +2189,7 @@ void _mm_sfence(void); /// 3: Bits [63:48] are copied to the destination. /// \returns A 16-bit integer containing the extracted 16 bits of packed data. #define _mm_extract_pi16(a, n) \ - (int)__builtin_ia32_vec_ext_v4hi((__v4hi)a, (int)n) + ((int)__builtin_ia32_vec_ext_v4hi((__v4hi)a, (int)n)) /// Copies data from the 64-bit vector of [4 x i16] to the destination, /// and inserts the lower 16-bits of an integer operand at the 16-bit offset @@ -2212,7 +2220,7 @@ void _mm_sfence(void); /// \returns A 64-bit integer vector containing the copied packed data from the /// operands. #define _mm_insert_pi16(a, d, n) \ - (__m64)__builtin_ia32_vec_set_v4hi((__v4hi)a, (int)d, (int)n) + ((__m64)__builtin_ia32_vec_set_v4hi((__v4hi)a, (int)d, (int)n)) /// Compares each of the corresponding packed 16-bit integer values of /// the 64-bit integer vectors, and writes the greater value to the @@ -2356,10 +2364,13 @@ _mm_mulhi_pu16(__m64 __a, __m64 __b) /// 00: assigned from bits [15:0] of \a a. \n /// 01: assigned from bits [31:16] of \a a. \n /// 10: assigned from bits [47:32] of \a a. \n -/// 11: assigned from bits [63:48] of \a a. +/// 11: assigned from bits [63:48] of \a a. \n +/// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. +/// <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form +/// <c>[b6, b4, b2, b0]</c>. /// \returns A 64-bit integer vector containing the shuffled values. #define _mm_shuffle_pi16(a, n) \ - (__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n)) + ((__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n))) /// Conditionally copies the values from each 8-bit element in the first /// 64-bit integer vector operand to the specified memory location, as @@ -2598,11 +2609,14 @@ void _mm_setcsr(unsigned int __i); /// 00: Bits [31:0] copied from the specified operand. \n /// 01: Bits [63:32] copied from the specified operand. \n /// 10: Bits [95:64] copied from the specified operand. \n -/// 11: Bits [127:96] copied from the specified operand. +/// 11: Bits [127:96] copied from the specified operand. \n +/// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. +/// <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form +/// <c>[b6, b4, b2, b0]</c>. /// \returns A 128-bit vector of [4 x float] containing the shuffled values. #define _mm_shuffle_ps(a, b, mask) \ - (__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \ - (int)(mask)) + ((__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \ + (int)(mask))) /// Unpacks the high-order (index 2,3) values from two 128-bit vectors of /// [4 x float] and interleaves them into a 128-bit vector of [4 x float]. @@ -2995,7 +3009,6 @@ do { \ #define _m_pavgw _mm_avg_pu16 #define _m_psadbw _mm_sad_pu8 #define _m_ _mm_ -#define _m_ _mm_ #undef __DEFAULT_FN_ATTRS #undef __DEFAULT_FN_ATTRS_MMX |