diff options
Diffstat (limited to 'contrib/llvm-project/clang/lib/Headers/immintrin.h')
-rw-r--r-- | contrib/llvm-project/clang/lib/Headers/immintrin.h | 279 |
1 files changed, 250 insertions, 29 deletions
diff --git a/contrib/llvm-project/clang/lib/Headers/immintrin.h b/contrib/llvm-project/clang/lib/Headers/immintrin.h index 56d3dadf6a33..27800f7a8202 100644 --- a/contrib/llvm-project/clang/lib/Headers/immintrin.h +++ b/contrib/llvm-project/clang/lib/Headers/immintrin.h @@ -10,6 +10,10 @@ #ifndef __IMMINTRIN_H #define __IMMINTRIN_H +#if !defined(__i386__) && !defined(__x86_64__) +#error "This header is only meant to be used on x86 and x64 architecture" +#endif + #include <x86gprintrin.h> #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ @@ -186,6 +190,11 @@ #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__AVXIFMA__) +#include <avxifmaintrin.h> +#endif + +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512VBMI__) #include <avx512vbmiintrin.h> #endif @@ -211,6 +220,16 @@ #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__AVX512FP16__) +#include <avx512fp16intrin.h> +#endif + +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + (defined(__AVX512VL__) && defined(__AVX512FP16__)) +#include <avx512vlfp16intrin.h> +#endif + +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512BF16__) #include <avx512bf16intrin.h> #endif @@ -241,12 +260,44 @@ #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__AVXVNNIINT8__) +#include <avxvnniint8intrin.h> +#endif + +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__AVXNECONVERT__) +#include <avxneconvertintrin.h> +#endif + +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__SHA512__) +#include <sha512intrin.h> +#endif + +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__SM3__) +#include <sm3intrin.h> +#endif + +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__SM4__) +#include <sm4intrin.h> +#endif + +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__AVXVNNIINT16__) +#include <avxvnniint16intrin.h> +#endif + +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__RDPID__) -/// Returns the value of the IA32_TSC_AUX MSR (0xc0000103). +/// Reads the value of the IA32_TSC_AUX MSR (0xc0000103). /// /// \headerfile <immintrin.h> /// /// This intrinsic corresponds to the <c> RDPID </c> instruction. +/// +/// \returns The 32-bit contents of the MSR. static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid"))) _rdpid_u32(void) { return __builtin_ia32_rdpid(); @@ -255,72 +306,172 @@ _rdpid_u32(void) { #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__RDRND__) +/// Returns a 16-bit hardware-generated random value. +/// +/// \headerfile <immintrin.h> +/// +/// This intrinsic corresponds to the <c> RDRAND </c> instruction. +/// +/// \param __p +/// A pointer to a 16-bit memory location to place the random value. +/// \returns 1 if the value was successfully generated, 0 otherwise. static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) _rdrand16_step(unsigned short *__p) { - return __builtin_ia32_rdrand16_step(__p); + return (int)__builtin_ia32_rdrand16_step(__p); } +/// Returns a 32-bit hardware-generated random value. +/// +/// \headerfile <immintrin.h> +/// +/// This intrinsic corresponds to the <c> RDRAND </c> instruction. +/// +/// \param __p +/// A pointer to a 32-bit memory location to place the random value. +/// \returns 1 if the value was successfully generated, 0 otherwise. static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) _rdrand32_step(unsigned int *__p) { - return __builtin_ia32_rdrand32_step(__p); + return (int)__builtin_ia32_rdrand32_step(__p); } -#ifdef __x86_64__ +/// Returns a 64-bit hardware-generated random value. +/// +/// \headerfile <immintrin.h> +/// +/// This intrinsic corresponds to the <c> RDRAND </c> instruction. +/// +/// \param __p +/// A pointer to a 64-bit memory location to place the random value. +/// \returns 1 if the value was successfully generated, 0 otherwise. static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) _rdrand64_step(unsigned long long *__p) { - return __builtin_ia32_rdrand64_step(__p); -} +#ifdef __x86_64__ + return (int)__builtin_ia32_rdrand64_step(__p); +#else + // We need to emulate the functionality of 64-bit rdrand with 2 32-bit + // rdrand instructions. + unsigned int __lo, __hi; + unsigned int __res_lo = __builtin_ia32_rdrand32_step(&__lo); + unsigned int __res_hi = __builtin_ia32_rdrand32_step(&__hi); + if (__res_lo && __res_hi) { + *__p = ((unsigned long long)__hi << 32) | (unsigned long long)__lo; + return 1; + } else { + *__p = 0; + return 0; + } #endif +} #endif /* __RDRND__ */ #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__FSGSBASE__) #ifdef __x86_64__ +/// Reads the FS base register. +/// +/// \headerfile <immintrin.h> +/// +/// This intrinsic corresponds to the <c> RDFSBASE </c> instruction. +/// +/// \returns The lower 32 bits of the FS base register. static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _readfsbase_u32(void) { return __builtin_ia32_rdfsbase32(); } +/// Reads the FS base register. +/// +/// \headerfile <immintrin.h> +/// +/// This intrinsic corresponds to the <c> RDFSBASE </c> instruction. +/// +/// \returns The contents of the FS base register. static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _readfsbase_u64(void) { return __builtin_ia32_rdfsbase64(); } +/// Reads the GS base register. +/// +/// \headerfile <immintrin.h> +/// +/// This intrinsic corresponds to the <c> RDGSBASE </c> instruction. +/// +/// \returns The lower 32 bits of the GS base register. static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _readgsbase_u32(void) { return __builtin_ia32_rdgsbase32(); } +/// Reads the GS base register. +/// +/// \headerfile <immintrin.h> +/// +/// This intrinsic corresponds to the <c> RDGSBASE </c> instruction. +/// +/// \returns The contents of the GS base register. static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _readgsbase_u64(void) { return __builtin_ia32_rdgsbase64(); } +/// Modifies the FS base register. +/// +/// \headerfile <immintrin.h> +/// +/// This intrinsic corresponds to the <c> WRFSBASE </c> instruction. +/// +/// \param __V +/// Value to use for the lower 32 bits of the FS base register. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _writefsbase_u32(unsigned int __V) { __builtin_ia32_wrfsbase32(__V); } +/// Modifies the FS base register. +/// +/// \headerfile <immintrin.h> +/// +/// This intrinsic corresponds to the <c> WRFSBASE </c> instruction. +/// +/// \param __V +/// Value to use for the FS base register. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _writefsbase_u64(unsigned long long __V) { __builtin_ia32_wrfsbase64(__V); } +/// Modifies the GS base register. +/// +/// \headerfile <immintrin.h> +/// +/// This intrinsic corresponds to the <c> WRGSBASE </c> instruction. +/// +/// \param __V +/// Value to use for the lower 32 bits of the GS base register. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _writegsbase_u32(unsigned int __V) { __builtin_ia32_wrgsbase32(__V); } +/// Modifies the GS base register. +/// +/// \headerfile <immintrin.h> +/// +/// This intrinsic corresponds to the <c> WRFSBASE </c> instruction. +/// +/// \param __V +/// Value to use for GS base register. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _writegsbase_u64(unsigned long long __V) { @@ -339,53 +490,110 @@ _writegsbase_u64(unsigned long long __V) * field inside of it. */ +/// Load a 16-bit value from memory and swap its bytes. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the MOVBE instruction. +/// +/// \param __P +/// A pointer to the 16-bit value to load. +/// \returns The byte-swapped value. static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) _loadbe_i16(void const * __P) { struct __loadu_i16 { - short __v; + unsigned short __v; } __attribute__((__packed__, __may_alias__)); - return __builtin_bswap16(((const struct __loadu_i16*)__P)->__v); + return (short)__builtin_bswap16(((const struct __loadu_i16*)__P)->__v); } +/// Swap the bytes of a 16-bit value and store it to memory. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the MOVBE instruction. +/// +/// \param __P +/// A pointer to the memory for storing the swapped value. +/// \param __D +/// The 16-bit value to be byte-swapped. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) _storebe_i16(void * __P, short __D) { struct __storeu_i16 { - short __v; + unsigned short __v; } __attribute__((__packed__, __may_alias__)); - ((struct __storeu_i16*)__P)->__v = __builtin_bswap16(__D); + ((struct __storeu_i16*)__P)->__v = __builtin_bswap16((unsigned short)__D); } +/// Load a 32-bit value from memory and swap its bytes. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the MOVBE instruction. +/// +/// \param __P +/// A pointer to the 32-bit value to load. +/// \returns The byte-swapped value. static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) _loadbe_i32(void const * __P) { struct __loadu_i32 { - int __v; + unsigned int __v; } __attribute__((__packed__, __may_alias__)); - return __builtin_bswap32(((const struct __loadu_i32*)__P)->__v); + return (int)__builtin_bswap32(((const struct __loadu_i32*)__P)->__v); } +/// Swap the bytes of a 32-bit value and store it to memory. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the MOVBE instruction. +/// +/// \param __P +/// A pointer to the memory for storing the swapped value. +/// \param __D +/// The 32-bit value to be byte-swapped. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) _storebe_i32(void * __P, int __D) { struct __storeu_i32 { - int __v; + unsigned int __v; } __attribute__((__packed__, __may_alias__)); - ((struct __storeu_i32*)__P)->__v = __builtin_bswap32(__D); + ((struct __storeu_i32*)__P)->__v = __builtin_bswap32((unsigned int)__D); } #ifdef __x86_64__ +/// Load a 64-bit value from memory and swap its bytes. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the MOVBE instruction. +/// +/// \param __P +/// A pointer to the 64-bit value to load. +/// \returns The byte-swapped value. static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) _loadbe_i64(void const * __P) { struct __loadu_i64 { - long long __v; + unsigned long long __v; } __attribute__((__packed__, __may_alias__)); - return __builtin_bswap64(((const struct __loadu_i64*)__P)->__v); + return (long long)__builtin_bswap64(((const struct __loadu_i64*)__P)->__v); } +/// Swap the bytes of a 64-bit value and store it to memory. +/// +/// \headerfile <x86intrin.h> +/// +/// This intrinsic corresponds to the MOVBE instruction. +/// +/// \param __P +/// A pointer to the memory for storing the swapped value. +/// \param __D +/// The 64-bit value to be byte-swapped. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) _storebe_i64(void * __P, long long __D) { struct __storeu_i64 { - long long __v; + unsigned long long __v; } __attribute__((__packed__, __may_alias__)); - ((struct __storeu_i64*)__P)->__v = __builtin_bswap64(__D); + ((struct __storeu_i64*)__P)->__v = __builtin_bswap64((unsigned long long)__D); } #endif #endif /* __MOVBE */ @@ -429,9 +637,13 @@ _storebe_i64(void * __P, long long __D) { #include <cetintrin.h> #endif -/* Some intrinsics inside adxintrin.h are available only on processors with ADX, - * whereas others are also available at all times. */ +/* Intrinsics inside adcintrin.h are available at all times. */ +#include <adcintrin.h> + +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__ADX__) #include <adxintrin.h> +#endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__RDSEED__) @@ -477,6 +689,10 @@ _storebe_i64(void * __P, long long __D) { defined(__INVPCID__) #include <invpcidintrin.h> #endif +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__AMX_FP16__) +#include <amxfp16intrin.h> +#endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__KL__) || defined(__WIDEKL__) @@ -484,11 +700,16 @@ _storebe_i64(void * __P, long long __D) { #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ - defined(__AMXTILE__) || defined(__AMXINT8__) || defined(__AMXBF16__) + defined(__AMX_TILE__) || defined(__AMX_INT8__) || defined(__AMX_BF16__) #include <amxintrin.h> #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__AMX_COMPLEX__) +#include <amxcomplexintrin.h> +#endif + +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512VP2INTERSECT__) #include <avx512vp2intersectintrin.h> #endif @@ -525,13 +746,13 @@ extern "C" { #if defined(__i386__) || defined(__x86_64__) static __inline__ long __DEFAULT_FN_ATTRS _InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) { - __asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1" + __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}" : "+r" (_Value), "+m" (*_Target) :: "memory"); return _Value; } static __inline__ long __DEFAULT_FN_ATTRS _InterlockedExchange_HLERelease(long volatile *_Target, long _Value) { - __asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1" + __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}" : "+r" (_Value), "+m" (*_Target) :: "memory"); return _Value; } @@ -539,13 +760,13 @@ _InterlockedExchange_HLERelease(long volatile *_Target, long _Value) { #if defined(__x86_64__) static __inline__ __int64 __DEFAULT_FN_ATTRS _InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) { - __asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1" + __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}" : "+r" (_Value), "+m" (*_Target) :: "memory"); return _Value; } static __inline__ __int64 __DEFAULT_FN_ATTRS _InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) { - __asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1" + __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}" : "+r" (_Value), "+m" (*_Target) :: "memory"); return _Value; } @@ -557,7 +778,7 @@ _InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) { static __inline__ long __DEFAULT_FN_ATTRS _InterlockedCompareExchange_HLEAcquire(long volatile *_Destination, long _Exchange, long _Comparand) { - __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1" + __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}" : "+a" (_Comparand), "+m" (*_Destination) : "r" (_Exchange) : "memory"); return _Comparand; @@ -565,7 +786,7 @@ _InterlockedCompareExchange_HLEAcquire(long volatile *_Destination, static __inline__ long __DEFAULT_FN_ATTRS _InterlockedCompareExchange_HLERelease(long volatile *_Destination, long _Exchange, long _Comparand) { - __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1" + __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}" : "+a" (_Comparand), "+m" (*_Destination) : "r" (_Exchange) : "memory"); return _Comparand; @@ -575,7 +796,7 @@ _InterlockedCompareExchange_HLERelease(long volatile *_Destination, static __inline__ __int64 __DEFAULT_FN_ATTRS _InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination, __int64 _Exchange, __int64 _Comparand) { - __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1" + __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}" : "+a" (_Comparand), "+m" (*_Destination) : "r" (_Exchange) : "memory"); return _Comparand; @@ -583,7 +804,7 @@ _InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination, static __inline__ __int64 __DEFAULT_FN_ATTRS _InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination, __int64 _Exchange, __int64 _Comparand) { - __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1" + __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}" : "+a" (_Comparand), "+m" (*_Destination) : "r" (_Exchange) : "memory"); return _Comparand; |