aboutsummaryrefslogtreecommitdiff
path: root/lib/Headers/avx512fintrin.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Headers/avx512fintrin.h')
-rw-r--r--lib/Headers/avx512fintrin.h701
1 files changed, 659 insertions, 42 deletions
diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h
index 4f7cba0b1507..8dcdc710d5c3 100644
--- a/lib/Headers/avx512fintrin.h
+++ b/lib/Headers/avx512fintrin.h
@@ -1,4 +1,4 @@
-/*===---- avx512fintrin.h - AVX2 intrinsics --------------------------------===
+/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -47,7 +47,7 @@ typedef unsigned short __mmask16;
#define _MM_FROUND_CUR_DIRECTION 0x04
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
/* Create vectors with repeated elements */
@@ -57,6 +57,30 @@ _mm512_setzero_si512(void)
return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
}
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_undefined_pd()
+{
+ return (__m512d)__builtin_ia32_undef512();
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_undefined()
+{
+ return (__m512)__builtin_ia32_undef512();
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_undefined_ps()
+{
+ return (__m512)__builtin_ia32_undef512();
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_undefined_epi32()
+{
+ return (__m512i)__builtin_ia32_undef512();
+}
+
static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
{
@@ -543,6 +567,66 @@ _mm512_max_ps(__m512 __A, __m512 __B)
_MM_FROUND_CUR_DIRECTION);
}
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) _mm_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_max_round_ss(__A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
+
+#define _mm_mask_max_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_max_round_ss(__U, __A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_max_round_sd(__A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, (__v2df) __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
+
+#define _mm_mask_max_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, (__v2df) __B, \
+ (__v2df) __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_max_round_sd(__U, __A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, (__v2df) __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
+
static __inline __m512i
__DEFAULT_FN_ATTRS
_mm512_max_epi32(__m512i __A, __m512i __B)
@@ -606,6 +690,66 @@ _mm512_min_ps(__m512 __A, __m512 __B)
_MM_FROUND_CUR_DIRECTION);
}
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) _mm_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_min_round_ss(__A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_minss_round ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
+
+#define _mm_mask_min_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_minss_round ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_min_round_ss(__U, __A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_minss_round ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_min_round_sd(__A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, (__v2df) __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
+
+#define _mm_mask_min_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, (__v2df) __B, \
+ (__v2df) __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_min_round_sd(__U, __A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, (__v2df) __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
+
static __inline __m512i
__DEFAULT_FN_ATTRS
_mm512_min_epi32(__m512i __A, __m512i __B)
@@ -728,18 +872,18 @@ _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_sqrt_pd(__m512d a)
+_mm512_sqrt_pd(__m512d __a)
{
- return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)a,
+ return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a,
(__v8df) _mm512_setzero_pd (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_sqrt_ps(__m512 a)
+_mm512_sqrt_ps(__m512 __a)
{
- return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)a,
+ return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a,
(__v16sf) _mm512_setzero_ps (),
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
@@ -765,7 +909,7 @@ _mm512_rsqrt14_ps(__m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_rsqrt14_ss(__m128 __A, __m128 __B)
{
- return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
+ return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __A,
(__v4sf) __B,
(__v4sf)
_mm_setzero_ps (),
@@ -775,7 +919,7 @@ _mm_rsqrt14_ss(__m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_rsqrt14_sd(__m128d __A, __m128d __B)
{
- return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
+ return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __A,
(__v2df) __B,
(__v2df)
_mm_setzero_pd (),
@@ -802,7 +946,7 @@ _mm512_rcp14_ps(__m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_rcp14_ss(__m128 __A, __m128 __B)
{
- return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
+ return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __A,
(__v4sf) __B,
(__v4sf)
_mm_setzero_ps (),
@@ -812,7 +956,7 @@ _mm_rcp14_ss(__m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_rcp14_sd(__m128d __A, __m128d __B)
{
- return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
+ return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __A,
(__v2df) __B,
(__v2df)
_mm_setzero_pd (),
@@ -873,6 +1017,489 @@ _mm512_abs_epi32(__m512i __A)
(__mmask16) -1);
}
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) _mm_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_add_round_ss(__A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_addss_round ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
+
+#define _mm_mask_add_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_addss_round ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_add_round_ss(__U, __A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_addss_round ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+#define _mm_add_round_sd(__A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, (__v2df) __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
+
+#define _mm_mask_add_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, (__v2df) __B, \
+ (__v2df) __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_add_round_sd(__U, __A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, (__v2df) __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+ return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+ return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+ return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+ return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_add_round_pd(__A, __B, __R) __extension__ ({ \
+ (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, (__v8df) __B, \
+ (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
+
+#define _mm512_mask_add_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m512d) __builtin_ia32_addpd512_mask((__v8df) __A, (__v8df) __B, \
+ (__v8df) __W, (__mmask8) __U, __R); })
+
+#define _mm512_maskz_add_round_pd(__U, __A, __B, __R) __extension__ ({ \
+ (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, (__v8df) __B, \
+ (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R); })
+
+#define _mm512_add_round_ps(__A, __B, __R) __extension__ ({ \
+ (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \
+ (__v16sf) _mm512_setzero_ps(), (__mmask16) -1, __R); })
+
+#define _mm512_mask_add_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \
+ (__v16sf) __W, (__mmask16)__U, __R); })
+
+#define _mm512_maskz_add_round_ps(__U, __A, __B, __R) __extension__ ({ \
+ (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \
+ (__v16sf) _mm512_setzero_ps(), (__mmask16)__U, __R); })
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) _mm_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+#define _mm_sub_round_ss(__A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_subss_round ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
+
+#define _mm_mask_sub_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_subss_round ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_sub_round_ss(__U, __A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_subss_round ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_sub_round_sd(__A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, (__v2df) __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
+
+#define _mm_mask_sub_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, (__v2df) __B, \
+ (__v2df) __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_sub_round_sd(__U, __A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, (__v2df) __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+ return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+ return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+ return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+ return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_sub_round_pd(__A, __B, __R) __extension__ ({ \
+ (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B,\
+ (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
+
+#define _mm512_mask_sub_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B, \
+ (__v8df) __W, (__mmask8) __U, __R); })
+
+#define _mm512_maskz_sub_round_pd(__U, __A, __B, __R) __extension__ ({ \
+ (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B, \
+ (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
+
+#define _mm512_sub_round_ps(__A, __B, __R) __extension__ ({ \
+ (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \
+ (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);})
+
+#define _mm512_mask_sub_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \
+ (__v16sf) __W, (__mmask16) __U, __R); });
+
+#define _mm512_maskz_sub_round_ps(__U, __A, __B, __R) __extension__ ({ \
+ (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \
+ (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);});
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) _mm_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+#define _mm_mul_round_ss(__A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
+
+#define _mm_mask_mul_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_mul_round_ss(__U, __A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_mul_round_sd(__A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, (__v2df) __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
+
+#define _mm_mask_mul_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, (__v2df) __B, \
+ (__v2df) __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_mul_round_sd(__U, __A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, (__v2df) __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+ return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+ return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+ return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+ return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_mul_round_pd(__A, __B, __R) __extension__ ({ \
+ (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B,\
+ (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
+
+#define _mm512_mask_mul_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B, \
+ (__v8df) __W, (__mmask8) __U, __R); })
+
+#define _mm512_maskz_mul_round_pd(__U, __A, __B, __R) __extension__ ({ \
+ (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B, \
+ (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
+
+#define _mm512_mul_round_ps(__A, __B, __R) __extension__ ({ \
+ (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \
+ (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);})
+
+#define _mm512_mask_mul_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \
+ (__v16sf) __W, (__mmask16) __U, __R); });
+
+#define _mm512_maskz_mul_round_ps(__U, __A, __B, __R) __extension__ ({ \
+ (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \
+ (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);});
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) _mm_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_div_round_ss(__A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_divss_round ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
+
+#define _mm_mask_div_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_divss_round ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_div_round_ss(__U, __A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_divss_round ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_div_round_sd(__A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, (__v2df) __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
+
+#define _mm_mask_div_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, (__v2df) __B, \
+ (__v2df) __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_div_round_sd(__U, __A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, (__v2df) __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+ return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+ return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+ return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+ return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_div_round_pd(__A, __B, __R) __extension__ ({ \
+ (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B,\
+ (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
+
+#define _mm512_mask_div_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B, \
+ (__v8df) __W, (__mmask8) __U, __R); })
+
+#define _mm512_maskz_div_round_pd(__U, __A, __B, __R) __extension__ ({ \
+ (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B, \
+ (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
+
+#define _mm512_div_round_ps(__A, __B, __R) __extension__ ({ \
+ (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \
+ (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);})
+
+#define _mm512_mask_div_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \
+ (__v16sf) __W, (__mmask16) __U, __R); });
+
+#define _mm512_maskz_div_round_ps(__U, __A, __B, __R) __extension__ ({ \
+ (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \
+ (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);});
+
#define _mm512_roundscale_ps(A, B) __extension__ ({ \
(__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(A), (B), (__v16sf)(A), \
-1, _MM_FROUND_CUR_DIRECTION); })
@@ -1706,17 +2333,15 @@ _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
/* Vector Extract */
#define _mm512_extractf64x4_pd(A, I) __extension__ ({ \
- __m512d __A = (A); \
(__m256d) \
- __builtin_ia32_extractf64x4_mask((__v8df)__A, \
+ __builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), \
(I), \
(__v4df)_mm256_setzero_si256(), \
(__mmask8) -1); })
#define _mm512_extractf32x4_ps(A, I) __extension__ ({ \
- __m512 __A = (A); \
(__m128) \
- __builtin_ia32_extractf32x4_mask((__v16sf)__A, \
+ __builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), \
(I), \
(__v4sf)_mm_setzero_ps(), \
(__mmask8) -1); })
@@ -1850,18 +2475,18 @@ _mm512_cvtph_ps(__m256i __A)
}
static __inline __m512i __DEFAULT_FN_ATTRS
-_mm512_cvttps_epi32(__m512 a)
+_mm512_cvttps_epi32(__m512 __a)
{
return (__m512i)
- __builtin_ia32_cvttps2dq512_mask((__v16sf) a,
+ __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
(__v16si) _mm512_setzero_si512 (),
(__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
}
static __inline __m256i __DEFAULT_FN_ATTRS
-_mm512_cvttpd_epi32(__m512d a)
+_mm512_cvttpd_epi32(__m512d __a)
{
- return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) a,
+ return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
(__v8si)_mm256_setzero_si256(),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
@@ -2405,51 +3030,43 @@ _mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
}
#define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \
- __m512i __a = (a); \
- __m512i __b = (b); \
- (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
+ (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
+ (__v16si)(__m512i)(b), (p), \
(__mmask16)-1); })
#define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \
- __m512i __a = (a); \
- __m512i __b = (b); \
- (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
+ (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
+ (__v16si)(__m512i)(b), (p), \
(__mmask16)-1); })
#define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \
- __m512i __a = (a); \
- __m512i __b = (b); \
- (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
+ (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
+ (__v8di)(__m512i)(b), (p), \
(__mmask8)-1); })
#define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \
- __m512i __a = (a); \
- __m512i __b = (b); \
- (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
+ (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
+ (__v8di)(__m512i)(b), (p), \
(__mmask8)-1); })
#define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
- __m512i __a = (a); \
- __m512i __b = (b); \
- (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
+ (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
+ (__v16si)(__m512i)(b), (p), \
(__mmask16)(m)); })
#define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
- __m512i __a = (a); \
- __m512i __b = (b); \
- (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
+ (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
+ (__v16si)(__m512i)(b), (p), \
(__mmask16)(m)); })
#define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
- __m512i __a = (a); \
- __m512i __b = (b); \
- (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
+ (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
+ (__v8di)(__m512i)(b), (p), \
(__mmask8)(m)); })
#define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
- __m512i __a = (a); \
- __m512i __b = (b); \
- (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
+ (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
+ (__v8di)(__m512i)(b), (p), \
(__mmask8)(m)); })
#undef __DEFAULT_FN_ATTRS