aboutsummaryrefslogtreecommitdiff
path: root/lib/Headers/ppc_wrappers/pmmintrin.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Headers/ppc_wrappers/pmmintrin.h')
-rw-r--r--lib/Headers/ppc_wrappers/pmmintrin.h150
1 files changed, 150 insertions, 0 deletions
diff --git a/lib/Headers/ppc_wrappers/pmmintrin.h b/lib/Headers/ppc_wrappers/pmmintrin.h
new file mode 100644
index 000000000000..6d93383d5412
--- /dev/null
+++ b/lib/Headers/ppc_wrappers/pmmintrin.h
@@ -0,0 +1,150 @@
+/*===---- pmmintrin.h - Implementation of SSE3 intrinsics on PowerPC -------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+ User Guide and Reference, version 9.0. */
+
+#ifndef NO_WARN_X86_INTRINSICS
+/* This header is distributed to simplify porting x86_64 code that
+ makes explicit use of Intel intrinsics to powerpc64le.
+ It is the user's responsibility to determine if the results are
+ acceptable and make additional changes as necessary.
+ Note that much code that uses Intel intrinsics can be rewritten in
+ standard C or GNU C extensions, which are more portable and better
+ optimized across multiple targets.
+
+ In the specific case of X86 SSE3 intrinsics, the PowerPC VMX/VSX ISA
+ is a good match for most SIMD operations. However the Horizontal
+ add/sub requires the data pairs be permuted into a separate
+ registers with vertical even/odd alignment for the operation.
+ And the addsub operation requires the sign of only the even numbered
+ elements be flipped (xored with -0.0).
+ For larger blocks of code using these intrinsic implementations,
+ the compiler be should be able to schedule instructions to avoid
+ additional latency.
+
+ In the specific case of the monitor and mwait instructions there are
+ no direct equivalent in the PowerISA at this time. So those
+ intrinsics are not implemented. */
+#error "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this warning."
+#endif
+
+#ifndef PMMINTRIN_H_
+#define PMMINTRIN_H_
+
+#if defined(__linux__) && defined(__ppc64__)
+
+/* We need definitions from the SSE2 and SSE header files*/
+#include <emmintrin.h>
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_addsub_ps (__m128 __X, __m128 __Y)
+{
+ const __v4sf even_n0 = {-0.0, 0.0, -0.0, 0.0};
+ __v4sf even_neg_Y = vec_xor(__Y, even_n0);
+ return (__m128) vec_add (__X, even_neg_Y);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_addsub_pd (__m128d __X, __m128d __Y)
+{
+ const __v2df even_n0 = {-0.0, 0.0};
+ __v2df even_neg_Y = vec_xor(__Y, even_n0);
+ return (__m128d) vec_add (__X, even_neg_Y);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadd_ps (__m128 __X, __m128 __Y)
+{
+ __vector unsigned char xform2 = {
+ 0x00, 0x01, 0x02, 0x03,
+ 0x08, 0x09, 0x0A, 0x0B,
+ 0x10, 0x11, 0x12, 0x13,
+ 0x18, 0x19, 0x1A, 0x1B
+ };
+ __vector unsigned char xform1 = {
+ 0x04, 0x05, 0x06, 0x07,
+ 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x14, 0x15, 0x16, 0x17,
+ 0x1C, 0x1D, 0x1E, 0x1F
+ };
+ return (__m128) vec_add (vec_perm ((__v4sf) __X, (__v4sf) __Y, xform2),
+ vec_perm ((__v4sf) __X, (__v4sf) __Y, xform1));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsub_ps (__m128 __X, __m128 __Y)
+{
+ __vector unsigned char xform2 = {
+ 0x00, 0x01, 0x02, 0x03,
+ 0x08, 0x09, 0x0A, 0x0B,
+ 0x10, 0x11, 0x12, 0x13,
+ 0x18, 0x19, 0x1A, 0x1B
+ };
+ __vector unsigned char xform1 = {
+ 0x04, 0x05, 0x06, 0x07,
+ 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x14, 0x15, 0x16, 0x17,
+ 0x1C, 0x1D, 0x1E, 0x1F
+ };
+ return (__m128) vec_sub (vec_perm ((__v4sf) __X, (__v4sf) __Y, xform2),
+ vec_perm ((__v4sf) __X, (__v4sf) __Y, xform1));
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadd_pd (__m128d __X, __m128d __Y)
+{
+ return (__m128d) vec_add (vec_mergeh ((__v2df) __X, (__v2df)__Y),
+ vec_mergel ((__v2df) __X, (__v2df)__Y));
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsub_pd (__m128d __X, __m128d __Y)
+{
+ return (__m128d) vec_sub (vec_mergeh ((__v2df) __X, (__v2df)__Y),
+ vec_mergel ((__v2df) __X, (__v2df)__Y));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movehdup_ps (__m128 __X)
+{
+ return (__m128)vec_mergeo ((__v4su)__X, (__v4su)__X);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_moveldup_ps (__m128 __X)
+{
+ return (__m128)vec_mergee ((__v4su)__X, (__v4su)__X);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loaddup_pd (double const *__P)
+{
+ return (__m128d) vec_splats (*__P);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movedup_pd (__m128d __X)
+{
+ return _mm_shuffle_pd (__X, __X, _MM_SHUFFLE2 (0,0));
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_lddqu_si128 (__m128i const *__P)
+{
+ return (__m128i) (vec_vsx_ld(0, (signed int const *)__P));
+}
+
+/* POWER8 / POWER9 have no equivalent for _mm_monitor nor _mm_wait. */
+
+#else
+#include_next <pmmintrin.h>
+#endif /* defined(__linux__) && defined(__ppc64__) */
+
+#endif /* PMMINTRIN_H_ */