147 files changed, 3276 insertions, 3257 deletions
diff --git a/contrib/arm-optimized-routines/math/Dir.mk b/contrib/arm-optimized-routines/math/Dir.mk
index 3b841ab71955..5e9494a7bd3c 100644
--- a/contrib/arm-optimized-routines/math/Dir.mk
+++ b/contrib/arm-optimized-routines/math/Dir.mk
@@ -1,12 +1,14 @@
 # Makefile fragment - requires GNU make
 #
-# Copyright (c) 2019, Arm Limited.
-# SPDX-License-Identifier: MIT
+# Copyright (c) 2019-2023, Arm Limited.
+# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 S := $(srcdir)/math
 B := build/math
 
 math-lib-srcs := $(wildcard $(S)/*.[cS])
+math-lib-srcs += $(wildcard $(S)/$(ARCH)/*.[cS])
+
 math-test-srcs := \
 	$(S)/test/mathtest.c \
 	$(S)/test/mathbench.c \
@@ -15,6 +17,7 @@ math-test-srcs := \
 math-test-host-srcs := $(wildcard $(S)/test/rtest/*.[cS])
 
 math-includes := $(patsubst $(S)/%,build/%,$(wildcard $(S)/include/*.h))
+math-test-includes := $(patsubst $(S)/%,build/include/%,$(wildcard $(S)/test/*.h))
 
 math-libs := \
 	build/lib/libmathlib.so \
@@ -42,10 +45,11 @@ math-files := \
 	$(math-tools) \
 	$(math-host-tools) \
 	$(math-includes) \
+	$(math-test-includes) \
 
-all-math: $(math-libs) $(math-tools) $(math-includes)
+all-math: $(math-libs) $(math-tools) $(math-includes) $(math-test-includes)
 
-$(math-objs): $(math-includes)
+$(math-objs): $(math-includes) $(math-test-includes)
 $(math-objs): CFLAGS_ALL += $(math-cflags)
 $(B)/test/mathtest.o: CFLAGS_ALL += -fmath-errno
 $(math-host-objs): CC = $(HOST_CC)
@@ -63,6 +67,8 @@ build/lib/libmathlib.a: $(math-lib-objs)
 
 $(math-host-tools): HOST_LDLIBS += -lm -lmpfr -lmpc
 $(math-tools): LDLIBS += $(math-ldlibs) -lm
+# math-sve-cflags should be empty if WANT_SVE_MATH is not enabled
+$(math-tools): CFLAGS_ALL += $(math-sve-cflags)
 
 build/bin/rtest: $(math-host-objs)
 	$(HOST_CC) $(HOST_CFLAGS) $(HOST_LDFLAGS) -o $@ $^ $(HOST_LDLIBS)
@@ -83,6 +89,9 @@ build/bin/ulp: $(B)/test/ulp.o build/lib/libmathlib.a
 build/include/%.h: $(S)/include/%.h
 	cp $< $@
 
+build/include/test/%.h: $(S)/test/%.h
+	cp $< $@
+
 build/bin/%.sh: $(S)/test/%.sh
 	cp $< $@
 
@@ -96,7 +105,7 @@ check-math-rtest: $(math-host-tools) $(math-tools)
 	cat $(math-rtests) | build/bin/rtest | $(EMULATOR) build/bin/mathtest $(math-testflags)
 
 check-math-ulp: $(math-tools)
-	ULPFLAGS="$(math-ulpflags)" build/bin/runulp.sh $(EMULATOR)
+	ULPFLAGS="$(math-ulpflags)" WANT_SIMD_EXCEPT="$(WANT_SIMD_EXCEPT)" build/bin/runulp.sh $(EMULATOR)
 
 check-math: check-math-test check-math-rtest check-math-ulp
 
diff --git a/contrib/arm-optimized-routines/math/README.contributors b/contrib/arm-optimized-routines/math/README.contributors
new file mode 100644
index 000000000000..33e7ba376e41
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/README.contributors
@@ -0,0 +1,78 @@
+STYLE REQUIREMENTS
+==================
+
+1. Most code in this sub-directory is expected to be upstreamed into glibc so
+   the GNU Coding Standard and glibc specific conventions should be followed
+   to ease upstreaming.
+
+2. ABI and symbols: the code should be written so it is suitable for inclusion
+   into a libc with minimal changes. This e.g. means that internal symbols
+   should be hidden and in the implementation reserved namespace according to
+   ISO C and POSIX rules. If possible the built shared libraries and static
+   library archives should be usable to override libc symbols at link time (or
+   at runtime via LD_PRELOAD). This requires the symbols to follow the glibc ABI
+   (other than symbol versioning), this cannot be done reliably for static
+   linking so this is a best effort requirement.
+
+3. API: include headers should be suitable for benchmarking and testing code
+   and should not conflict with libc headers.
+
+
+CONTRIBUTION GUIDELINES FOR math SUB-DIRECTORY
+==============================================
+
+1. Math functions have quality and performance requirements.
+
+2. Quality:
+   - Worst-case ULP error should be small in the entire input domain (for most
+     common double precision scalar functions the target is < 0.66 ULP error,
+     and < 1 ULP for single precision, even performance optimized function
+     variant should not have > 5 ULP error if the goal is to be a drop in
+     replacement for a standard math function), this should be tested
+     statistically (or on all inputs if possible in reasonable amount of time).
+     The ulp tool is for this and runulp.sh should be updated for new functions.
+
+   - All standard rounding modes need to be supported but in non-default rounding
+     modes the quality requirement can be relaxed. (Non-nearest rounded
+     computation can be slow and inaccurate but has to be correct for conformance
+     reasons.)
+
+   - Special cases and error handling need to follow ISO C Annex F requirements,
+     POSIX requirements, IEEE 754-2008 requirements and Glibc requiremnts:
+     https://www.gnu.org/software/libc/manual/html_mono/libc.html#Errors-in-Math-Functions
+     this should be tested by direct tests (glibc test system may be used for it).
+
+   - Error handling code should be decoupled from the approximation code as much
+     as possible. (There are helper functions, these take care of errno as well
+     as exception raising.)
+
+   - Vector math code does not need to work in non-nearest rounding mode and error
+     handling side effects need not happen (fenv exceptions and errno), but the
+     result should be correct (within quality requirements, which are lower for
+     vector code than for scalar code).
+
+   - Error bounds of the approximation should be clearly documented.
+
+   - The code should build and pass tests on arm, aarch64 and x86_64 GNU linux
+     systems. (Routines and features can be disabled on specific targets, but
+     the build must complete). On aarch64, both little- and big-endian targets
+     are supported as well as valid combinations of architecture extensions.
+     The configurations that should be tested depend on the contribution.
+
+3. Performance:
+   - Common math code should be benchmarked on modern aarch64 microarchitectures
+     over typical inputs.
+
+   - Performance improvements should be documented (relative numbers can be
+     published; it is enough to use the mathbench microbenchmark tool which should
+     be updated for new functions).
+
+   - Attention should be paid to the compilation flags: for aarch64 fma
+     contraction should be on and math errno turned off so some builtins can be
+     inlined.
+
+   - The code should be reasonably performant on x86_64 too, e.g. some rounding
+     instructions and fma may not be available on x86_64, such builtins turn into
+     libc calls with slow code. Such slowdown is not acceptable, a faster fallback
+     should be present: glibc and bionic use the same code on all targets. (This
+     does not apply to vector math code).
diff --git a/contrib/arm-optimized-routines/math/aarch64/v_cos.c b/contrib/arm-optimized-routines/math/aarch64/v_cos.c
new file mode 100644
index 000000000000..9a73575bce89
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/v_cos.c
@@ -0,0 +1,87 @@
+/*
+ * Double-precision vector cos function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "mathlib.h"
+#include "v_math.h"
+
+static const struct data
+{
+  float64x2_t poly[7];
+  float64x2_t range_val, shift, inv_pi, half_pi, pi_1, pi_2, pi_3;
+} data = {
+  /* Worst-case error is 3.3 ulp in [-pi/2, pi/2].  */
+  .poly = { V2 (-0x1.555555555547bp-3), V2 (0x1.1111111108a4dp-7),
+	    V2 (-0x1.a01a019936f27p-13), V2 (0x1.71de37a97d93ep-19),
+	    V2 (-0x1.ae633919987c6p-26), V2 (0x1.60e277ae07cecp-33),
+	    V2 (-0x1.9e9540300a1p-41) },
+  .inv_pi = V2 (0x1.45f306dc9c883p-2),
+  .half_pi = V2 (0x1.921fb54442d18p+0),
+  .pi_1 = V2 (0x1.921fb54442d18p+1),
+  .pi_2 = V2 (0x1.1a62633145c06p-53),
+  .pi_3 = V2 (0x1.c1cd129024e09p-106),
+  .shift = V2 (0x1.8p52),
+  .range_val = V2 (0x1p23)
+};
+
+#define C(i) d->poly[i]
+
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t x, float64x2_t y, uint64x2_t odd, uint64x2_t cmp)
+{
+  y = vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), odd));
+  return v_call_f64 (cos, x, y, cmp);
+}
+
+float64x2_t VPCS_ATTR V_NAME_D1 (cos) (float64x2_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+  float64x2_t n, r, r2, r3, r4, t1, t2, t3, y;
+  uint64x2_t odd, cmp;
+
+#if WANT_SIMD_EXCEPT
+  r = vabsq_f64 (x);
+  cmp = vcgeq_u64 (vreinterpretq_u64_f64 (r),
+		   vreinterpretq_u64_f64 (d->range_val));
+  if (unlikely (v_any_u64 (cmp)))
+    /* If fenv exceptions are to be triggered correctly, set any special lanes
+       to 1 (which is neutral w.r.t. fenv). These lanes will be fixed by
+       special-case handler later.  */
+    r = vbslq_f64 (cmp, v_f64 (1.0), r);
+#else
+  cmp = vcageq_f64 (x, d->range_val);
+  r = x;
+#endif
+
+  /* n = rint((|x|+pi/2)/pi) - 0.5.  */
+  n = vfmaq_f64 (d->shift, d->inv_pi, vaddq_f64 (r, d->half_pi));
+  odd = vshlq_n_u64 (vreinterpretq_u64_f64 (n), 63);
+  n = vsubq_f64 (n, d->shift);
+  n = vsubq_f64 (n, v_f64 (0.5));
+
+  /* r = |x| - n*pi  (range reduction into -pi/2 .. pi/2).  */
+  r = vfmsq_f64 (r, d->pi_1, n);
+  r = vfmsq_f64 (r, d->pi_2, n);
+  r = vfmsq_f64 (r, d->pi_3, n);
+
+  /* sin(r) poly approx.  */
+  r2 = vmulq_f64 (r, r);
+  r3 = vmulq_f64 (r2, r);
+  r4 = vmulq_f64 (r2, r2);
+
+  t1 = vfmaq_f64 (C (4), C (5), r2);
+  t2 = vfmaq_f64 (C (2), C (3), r2);
+  t3 = vfmaq_f64 (C (0), C (1), r2);
+
+  y = vfmaq_f64 (t1, C (6), r4);
+  y = vfmaq_f64 (t2, y, r4);
+  y = vfmaq_f64 (t3, y, r4);
+  y = vfmaq_f64 (r, y, r3);
+
+  if (unlikely (v_any_u64 (cmp)))
+    return special_case (x, y, odd, cmp);
+  return vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), odd));
+}
diff --git a/contrib/arm-optimized-routines/math/aarch64/v_cosf.c b/contrib/arm-optimized-routines/math/aarch64/v_cosf.c
new file mode 100644
index 000000000000..b9890b2998ad
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/v_cosf.c
@@ -0,0 +1,82 @@
+/*
+ * Single-precision vector cos function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "mathlib.h"
+#include "v_math.h"
+
+static const struct data
+{
+  float32x4_t poly[4];
+  float32x4_t range_val, inv_pi, half_pi, shift, pi_1, pi_2, pi_3;
+} data = {
+  /* 1.886 ulp error.  */
+  .poly = { V4 (-0x1.555548p-3f), V4 (0x1.110df4p-7f), V4 (-0x1.9f42eap-13f),
+	    V4 (0x1.5b2e76p-19f) },
+
+  .pi_1 = V4 (0x1.921fb6p+1f),
+  .pi_2 = V4 (-0x1.777a5cp-24f),
+  .pi_3 = V4 (-0x1.ee59dap-49f),
+
+  .inv_pi = V4 (0x1.45f306p-2f),
+  .shift = V4 (0x1.8p+23f),
+  .half_pi = V4 (0x1.921fb6p0f),
+  .range_val = V4 (0x1p20f)
+};
+
+#define C(i) d->poly[i]
+
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t x, float32x4_t y, uint32x4_t odd, uint32x4_t cmp)
+{
+  /* Fall back to scalar code.  */
+  y = vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), odd));
+  return v_call_f32 (cosf, x, y, cmp);
+}
+
+float32x4_t VPCS_ATTR V_NAME_F1 (cos) (float32x4_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+  float32x4_t n, r, r2, r3, y;
+  uint32x4_t odd, cmp;
+
+#if WANT_SIMD_EXCEPT
+  r = vabsq_f32 (x);
+  cmp = vcgeq_u32 (vreinterpretq_u32_f32 (r),
+		   vreinterpretq_u32_f32 (d->range_val));
+  if (unlikely (v_any_u32 (cmp)))
+    /* If fenv exceptions are to be triggered correctly, set any special lanes
+       to 1 (which is neutral w.r.t. fenv). These lanes will be fixed by
+       special-case handler later.  */
+    r = vbslq_f32 (cmp, v_f32 (1.0f), r);
+#else
+  cmp = vcageq_f32 (x, d->range_val);
+  r = x;
+#endif
+
+  /* n = rint((|x|+pi/2)/pi) - 0.5.  */
+  n = vfmaq_f32 (d->shift, d->inv_pi, vaddq_f32 (r, d->half_pi));
+  odd = vshlq_n_u32 (vreinterpretq_u32_f32 (n), 31);
+  n = vsubq_f32 (n, d->shift);
+  n = vsubq_f32 (n, v_f32 (0.5f));
+
+  /* r = |x| - n*pi  (range reduction into -pi/2 .. pi/2).  */
+  r = vfmsq_f32 (r, d->pi_1, n);
+  r = vfmsq_f32 (r, d->pi_2, n);
+  r = vfmsq_f32 (r, d->pi_3, n);
+
+  /* y = sin(r).  */
+  r2 = vmulq_f32 (r, r);
+  r3 = vmulq_f32 (r2, r);
+  y = vfmaq_f32 (C (2), C (3), r2);
+  y = vfmaq_f32 (C (1), y, r2);
+  y = vfmaq_f32 (C (0), y, r2);
+  y = vfmaq_f32 (r, y, r3);
+
+  if (unlikely (v_any_u32 (cmp)))
+    return special_case (x, y, odd, cmp);
+  return vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), odd));
+}
diff --git a/contrib/arm-optimized-routines/math/aarch64/v_exp.c b/contrib/arm-optimized-routines/math/aarch64/v_exp.c
new file mode 100644
index 000000000000..bc5609faf4fc
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/v_exp.c
@@ -0,0 +1,125 @@
+/*
+ * Double-precision vector e^x function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "mathlib.h"
+#include "v_math.h"
+
+#define N (1 << V_EXP_TABLE_BITS)
+#define IndexMask (N - 1)
+
+const static volatile struct
+{
+  float64x2_t poly[3];
+  float64x2_t inv_ln2, ln2_hi, ln2_lo, shift;
+#if !WANT_SIMD_EXCEPT
+  float64x2_t special_bound, scale_thresh;
+#endif
+} data = {
+  /* maxerr: 1.88 +0.5 ulp
+     rel error: 1.4337*2^-53
+     abs error: 1.4299*2^-53 in [ -ln2/256, ln2/256 ].  */
+  .poly = { V2 (0x1.ffffffffffd43p-2), V2 (0x1.55555c75adbb2p-3),
+	    V2 (0x1.55555da646206p-5) },
+#if !WANT_SIMD_EXCEPT
+  .scale_thresh = V2 (163840.0), /* 1280.0 * N.  */
+  .special_bound = V2 (704.0),
+#endif
+  .inv_ln2 = V2 (0x1.71547652b82fep7), /* N/ln2.  */
+  .ln2_hi = V2 (0x1.62e42fefa39efp-8), /* ln2/N.  */
+  .ln2_lo = V2 (0x1.abc9e3b39803f3p-63),
+  .shift = V2 (0x1.8p+52)
+};
+
+#define C(i) data.poly[i]
+#define Tab __v_exp_data
+
+#if WANT_SIMD_EXCEPT
+
+# define TinyBound v_u64 (0x2000000000000000) /* asuint64 (0x1p-511).  */
+# define BigBound v_u64 (0x4080000000000000) /* asuint64 (0x1p9).  */
+# define SpecialBound v_u64 (0x2080000000000000) /* BigBound - TinyBound.  */
+
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp)
+{
+  /* If fenv exceptions are to be triggered correctly, fall back to the scalar
+     routine to special lanes.  */
+  return v_call_f64 (exp, x, y, cmp);
+}
+
+#else
+
+# define SpecialOffset v_u64 (0x6000000000000000) /* 0x1p513.  */
+/* SpecialBias1 + SpecialBias1 = asuint(1.0).  */
+# define SpecialBias1 v_u64 (0x7000000000000000) /* 0x1p769.  */
+# define SpecialBias2 v_u64 (0x3010000000000000) /* 0x1p-254.  */
+
+static inline float64x2_t VPCS_ATTR
+special_case (float64x2_t s, float64x2_t y, float64x2_t n)
+{
+  /* 2^(n/N) may overflow, break it up into s1*s2.  */
+  uint64x2_t b = vandq_u64 (vcltzq_f64 (n), SpecialOffset);
+  float64x2_t s1 = vreinterpretq_f64_u64 (vsubq_u64 (SpecialBias1, b));
+  float64x2_t s2 = vreinterpretq_f64_u64 (
+      vaddq_u64 (vsubq_u64 (vreinterpretq_u64_f64 (s), SpecialBias2), b));
+  uint64x2_t cmp = vcagtq_f64 (n, data.scale_thresh);
+  float64x2_t r1 = vmulq_f64 (s1, s1);
+  float64x2_t r0 = vmulq_f64 (vfmaq_f64 (s2, y, s2), s1);
+  return vbslq_f64 (cmp, r1, r0);
+}
+
+#endif
+
+float64x2_t VPCS_ATTR V_NAME_D1 (exp) (float64x2_t x)
+{
+  float64x2_t n, r, r2, s, y, z;
+  uint64x2_t cmp, u, e;
+
+#if WANT_SIMD_EXCEPT
+  /* If any lanes are special, mask them with 1 and retain a copy of x to allow
+     special_case to fix special lanes later. This is only necessary if fenv
+     exceptions are to be triggered correctly.  */
+  float64x2_t xm = x;
+  uint64x2_t iax = vreinterpretq_u64_f64 (vabsq_f64 (x));
+  cmp = vcgeq_u64 (vsubq_u64 (iax, TinyBound), SpecialBound);
+  if (unlikely (v_any_u64 (cmp)))
+    x = vbslq_f64 (cmp, v_f64 (1), x);
+#else
+  cmp = vcagtq_f64 (x, data.special_bound);
+#endif
+
+  /* n = round(x/(ln2/N)).  */
+  z = vfmaq_f64 (data.shift, x, data.inv_ln2);
+  u = vreinterpretq_u64_f64 (z);
+  n = vsubq_f64 (z, data.shift);
+
+  /* r = x - n*ln2/N.  */
+  r = x;
+  r = vfmsq_f64 (r, data.ln2_hi, n);
+  r = vfmsq_f64 (r, data.ln2_lo, n);
+
+  e = vshlq_n_u64 (u, 52 - V_EXP_TABLE_BITS);
+
+  /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4.  */
+  r2 = vmulq_f64 (r, r);
+  y = vfmaq_f64 (C (0), C (1), r);
+  y = vfmaq_f64 (y, C (2), r2);
+  y = vfmaq_f64 (r, y, r2);
+
+  /* s = 2^(n/N).  */
+  u = (uint64x2_t){ Tab[u[0] & IndexMask], Tab[u[1] & IndexMask] };
+  s = vreinterpretq_f64_u64 (vaddq_u64 (u, e));
+
+  if (unlikely (v_any_u64 (cmp)))
+#if WANT_SIMD_EXCEPT
+    return special_case (xm, vfmaq_f64 (s, y, s), cmp);
+#else
+    return special_case (s, y, n);
+#endif
+
+  return vfmaq_f64 (s, y, s);
+}
diff --git a/contrib/arm-optimized-routines/math/aarch64/v_exp2f.c b/contrib/arm-optimized-routines/math/aarch64/v_exp2f.c
new file mode 100644
index 000000000000..e402205e98e6
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/v_exp2f.c
@@ -0,0 +1,113 @@
+/*
+ * Single-precision vector 2^x function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "mathlib.h"
+#include "v_math.h"
+
+static const struct data
+{
+  float32x4_t poly[5];
+  uint32x4_t exponent_bias;
+#if !WANT_SIMD_EXCEPT
+  float32x4_t special_bound, scale_thresh;
+#endif
+} data = {
+  /* maxerr: 1.962 ulp.  */
+  .poly = { V4 (0x1.59977ap-10f), V4 (0x1.3ce9e4p-7f), V4 (0x1.c6bd32p-5f),
+	    V4 (0x1.ebf9bcp-3f), V4 (0x1.62e422p-1f) },
+  .exponent_bias = V4 (0x3f800000),
+#if !WANT_SIMD_EXCEPT
+  .special_bound = V4 (126.0f),
+  .scale_thresh = V4 (192.0f),
+#endif
+};
+
+#define C(i) d->poly[i]
+
+#if WANT_SIMD_EXCEPT
+
+# define TinyBound v_u32 (0x20000000)	  /* asuint (0x1p-63).  */
+# define BigBound v_u32 (0x42800000)	  /* asuint (0x1p6).  */
+# define SpecialBound v_u32 (0x22800000) /* BigBound - TinyBound.  */
+
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t x, float32x4_t y, uint32x4_t cmp)
+{
+  /* If fenv exceptions are to be triggered correctly, fall back to the scalar
+     routine for special lanes.  */
+  return v_call_f32 (exp2f, x, y, cmp);
+}
+
+#else
+
+# define SpecialOffset v_u32 (0x82000000)
+# define SpecialBias v_u32 (0x7f000000)
+
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1,
+	      float32x4_t scale, const struct data *d)
+{
+  /* 2^n may overflow, break it up into s1*s2.  */
+  uint32x4_t b = vandq_u32 (vclezq_f32 (n), SpecialOffset);
+  float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, SpecialBias));
+  float32x4_t s2 = vreinterpretq_f32_u32 (vsubq_u32 (e, b));
+  uint32x4_t cmp2 = vcagtq_f32 (n, d->scale_thresh);
+  float32x4_t r2 = vmulq_f32 (s1, s1);
+  float32x4_t r1 = vmulq_f32 (vfmaq_f32 (s2, poly, s2), s1);
+  /* Similar to r1 but avoids double rounding in the subnormal range.  */
+  float32x4_t r0 = vfmaq_f32 (scale, poly, scale);
+  float32x4_t r = vbslq_f32 (cmp1, r1, r0);
+  return vbslq_f32 (cmp2, r2, r);
+}
+
+#endif
+
+float32x4_t VPCS_ATTR V_NAME_F1 (exp2) (float32x4_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+  float32x4_t n, r, r2, scale, p, q, poly;
+  uint32x4_t cmp, e;
+
+#if WANT_SIMD_EXCEPT
+  /* asuint(|x|) - TinyBound >= BigBound - TinyBound.  */
+  uint32x4_t ia = vreinterpretq_u32_f32 (vabsq_f32 (x));
+  cmp = vcgeq_u32 (vsubq_u32 (ia, TinyBound), SpecialBound);
+  float32x4_t xm = x;
+  /* If any lanes are special, mask them with 1 and retain a copy of x to allow
+     special_case to fix special lanes later. This is only necessary if fenv
+     exceptions are to be triggered correctly.  */
+  if (unlikely (v_any_u32 (cmp)))
+    x = vbslq_f32 (cmp, v_f32 (1), x);
+#endif
+
+    /* exp2(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
+       x = n + r, with r in [-1/2, 1/2].  */
+  n = vrndaq_f32 (x);
+  r = vsubq_f32 (x, n);
+  e = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtaq_s32_f32 (x)), 23);
+  scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias));
+
+#if !WANT_SIMD_EXCEPT
+  cmp = vcagtq_f32 (n, d->special_bound);
+#endif
+
+  r2 = vmulq_f32 (r, r);
+  p = vfmaq_f32 (C (1), C (0), r);
+  q = vfmaq_f32 (C (3), C (2), r);
+  q = vfmaq_f32 (q, p, r2);
+  p = vmulq_f32 (C (4), r);
+  poly = vfmaq_f32 (p, q, r2);
+
+  if (unlikely (v_any_u32 (cmp)))
+#if WANT_SIMD_EXCEPT
+    return special_case (xm, vfmaq_f32 (scale, poly, scale), cmp);
+#else
+    return special_case (poly, n, e, cmp, scale, d);
+#endif
+
+  return vfmaq_f32 (scale, poly, scale);
+}
diff --git a/contrib/arm-optimized-routines/math/aarch64/v_exp2f_1u.c b/contrib/arm-optimized-routines/math/aarch64/v_exp2f_1u.c
new file mode 100644
index 000000000000..ba6b02fbb4bc
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/v_exp2f_1u.c
@@ -0,0 +1,72 @@
+/*
+ * Single-precision vector 2^x function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "mathlib.h"
+#include "v_math.h"
+
+static const float Poly[] = {
+  /*  maxerr: 0.878 ulp.  */
+  0x1.416b5ep-13f, 0x1.5f082ep-10f, 0x1.3b2dep-7f, 0x1.c6af7cp-5f, 0x1.ebfbdcp-3f, 0x1.62e43p-1f
+};
+#define C0 v_f32 (Poly[0])
+#define C1 v_f32 (Poly[1])
+#define C2 v_f32 (Poly[2])
+#define C3 v_f32 (Poly[3])
+#define C4 v_f32 (Poly[4])
+#define C5 v_f32 (Poly[5])
+
+#define Shift v_f32 (0x1.8p23f)
+#define InvLn2 v_f32 (0x1.715476p+0f)
+#define Ln2hi v_f32 (0x1.62e4p-1f)
+#define Ln2lo v_f32 (0x1.7f7d1cp-20f)
+
+static float32x4_t VPCS_ATTR NOINLINE
+specialcase (float32x4_t poly, float32x4_t n, uint32x4_t e, float32x4_t absn)
+{
+  /* 2^n may overflow, break it up into s1*s2.  */
+  uint32x4_t b = (n <= v_f32 (0.0f)) & v_u32 (0x83000000);
+  float32x4_t s1 = vreinterpretq_f32_u32 (v_u32 (0x7f000000) + b);
+  float32x4_t s2 = vreinterpretq_f32_u32 (e - b);
+  uint32x4_t cmp = absn > v_f32 (192.0f);
+  float32x4_t r1 = s1 * s1;
+  float32x4_t r0 = poly * s1 * s2;
+  return vreinterpretq_f32_u32 ((cmp & vreinterpretq_u32_f32 (r1))
+				| (~cmp & vreinterpretq_u32_f32 (r0)));
+}
+
+float32x4_t VPCS_ATTR
+_ZGVnN4v_exp2f_1u (float32x4_t x)
+{
+  float32x4_t n, r, scale, poly, absn;
+  uint32x4_t cmp, e;
+
+  /* exp2(x) = 2^n * poly(r), with poly(r) in [1/sqrt(2),sqrt(2)]
+     x = n + r, with r in [-1/2, 1/2].  */
+#if 0
+  float32x4_t z;
+  z = x + Shift;
+  n = z - Shift;
+  r = x - n;
+  e = vreinterpretq_u32_f32 (z) << 23;
+#else
+  n = vrndaq_f32 (x);
+  r = x - n;
+  e = vreinterpretq_u32_s32 (vcvtaq_s32_f32 (x)) << 23;
+#endif
+  scale = vreinterpretq_f32_u32 (e + v_u32 (0x3f800000));
+  absn = vabsq_f32 (n);
+  cmp = absn > v_f32 (126.0f);
+  poly = vfmaq_f32 (C1, C0, r);
+  poly = vfmaq_f32 (C2, poly, r);
+  poly = vfmaq_f32 (C3, poly, r);
+  poly = vfmaq_f32 (C4, poly, r);
+  poly = vfmaq_f32 (C5, poly, r);
+  poly = vfmaq_f32 (v_f32 (1.0f), poly, r);
+  if (unlikely (v_any_u32 (cmp)))
+    return specialcase (poly, n, e, absn);
+  return scale * poly;
+}
diff --git a/contrib/arm-optimized-routines/math/aarch64/v_exp_data.c b/contrib/arm-optimized-routines/math/aarch64/v_exp_data.c
new file mode 100644
index 000000000000..45f0848cac5b
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/v_exp_data.c
@@ -0,0 +1,146 @@
+/*
+ * Lookup table for double-precision e^x vector function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+
+# define N (1 << V_EXP_TABLE_BITS)
+
+/* 2^(j/N), j=0..N.  */
+const uint64_t __v_exp_data[] = {
+# if N == 128
+  0x3ff0000000000000, 0x3feff63da9fb3335, 0x3fefec9a3e778061,
+  0x3fefe315e86e7f85, 0x3fefd9b0d3158574, 0x3fefd06b29ddf6de,
+  0x3fefc74518759bc8, 0x3fefbe3ecac6f383, 0x3fefb5586cf9890f,
+  0x3fefac922b7247f7, 0x3fefa3ec32d3d1a2, 0x3fef9b66affed31b,
+  0x3fef9301d0125b51, 0x3fef8abdc06c31cc, 0x3fef829aaea92de0,
+  0x3fef7a98c8a58e51, 0x3fef72b83c7d517b, 0x3fef6af9388c8dea,
+  0x3fef635beb6fcb75, 0x3fef5be084045cd4, 0x3fef54873168b9aa,
+  0x3fef4d5022fcd91d, 0x3fef463b88628cd6, 0x3fef3f49917ddc96,
+  0x3fef387a6e756238, 0x3fef31ce4fb2a63f, 0x3fef2b4565e27cdd,
+  0x3fef24dfe1f56381, 0x3fef1e9df51fdee1, 0x3fef187fd0dad990,
+  0x3fef1285a6e4030b, 0x3fef0cafa93e2f56, 0x3fef06fe0a31b715,
+  0x3fef0170fc4cd831, 0x3feefc08b26416ff, 0x3feef6c55f929ff1,
+  0x3feef1a7373aa9cb, 0x3feeecae6d05d866, 0x3feee7db34e59ff7,
+  0x3feee32dc313a8e5, 0x3feedea64c123422, 0x3feeda4504ac801c,
+  0x3feed60a21f72e2a, 0x3feed1f5d950a897, 0x3feece086061892d,
+  0x3feeca41ed1d0057, 0x3feec6a2b5c13cd0, 0x3feec32af0d7d3de,
+  0x3feebfdad5362a27, 0x3feebcb299fddd0d, 0x3feeb9b2769d2ca7,
+  0x3feeb6daa2cf6642, 0x3feeb42b569d4f82, 0x3feeb1a4ca5d920f,
+  0x3feeaf4736b527da, 0x3feead12d497c7fd, 0x3feeab07dd485429,
+  0x3feea9268a5946b7, 0x3feea76f15ad2148, 0x3feea5e1b976dc09,
+  0x3feea47eb03a5585, 0x3feea34634ccc320, 0x3feea23882552225,
+  0x3feea155d44ca973, 0x3feea09e667f3bcd, 0x3feea012750bdabf,
+  0x3fee9fb23c651a2f, 0x3fee9f7df9519484, 0x3fee9f75e8ec5f74,
+  0x3fee9f9a48a58174, 0x3fee9feb564267c9, 0x3feea0694fde5d3f,
+  0x3feea11473eb0187, 0x3feea1ed0130c132, 0x3feea2f336cf4e62,
+  0x3feea427543e1a12, 0x3feea589994cce13, 0x3feea71a4623c7ad,
+  0x3feea8d99b4492ed, 0x3feeaac7d98a6699, 0x3feeace5422aa0db,
+  0x3feeaf3216b5448c, 0x3feeb1ae99157736, 0x3feeb45b0b91ffc6,
+  0x3feeb737b0cdc5e5, 0x3feeba44cbc8520f, 0x3feebd829fde4e50,
+  0x3feec0f170ca07ba, 0x3feec49182a3f090, 0x3feec86319e32323,
+  0x3feecc667b5de565, 0x3feed09bec4a2d33, 0x3feed503b23e255d,
+  0x3feed99e1330b358, 0x3feede6b5579fdbf, 0x3feee36bbfd3f37a,
+  0x3feee89f995ad3ad, 0x3feeee07298db666, 0x3feef3a2b84f15fb,
+  0x3feef9728de5593a, 0x3feeff76f2fb5e47, 0x3fef05b030a1064a,
+  0x3fef0c1e904bc1d2, 0x3fef12c25bd71e09, 0x3fef199bdd85529c,
+  0x3fef20ab5fffd07a, 0x3fef27f12e57d14b, 0x3fef2f6d9406e7b5,
+  0x3fef3720dcef9069, 0x3fef3f0b555dc3fa, 0x3fef472d4a07897c,
+  0x3fef4f87080d89f2, 0x3fef5818dcfba487, 0x3fef60e316c98398,
+  0x3fef69e603db3285, 0x3fef7321f301b460, 0x3fef7c97337b9b5f,
+  0x3fef864614f5a129, 0x3fef902ee78b3ff6, 0x3fef9a51fbc74c83,
+  0x3fefa4afa2a490da, 0x3fefaf482d8e67f1, 0x3fefba1bee615a27,
+  0x3fefc52b376bba97, 0x3fefd0765b6e4540, 0x3fefdbfdad9cbe14,
+  0x3fefe7c1819e90d8, 0x3feff3c22b8f71f1,
+# elif N == 256
+  0x3ff0000000000000, 0x3feffb1afa5abcbf, 0x3feff63da9fb3335,
+  0x3feff168143b0281, 0x3fefec9a3e778061, 0x3fefe7d42e11bbcc,
+  0x3fefe315e86e7f85, 0x3fefde5f72f654b1, 0x3fefd9b0d3158574,
+  0x3fefd50a0e3c1f89, 0x3fefd06b29ddf6de, 0x3fefcbd42b72a836,
+  0x3fefc74518759bc8, 0x3fefc2bdf66607e0, 0x3fefbe3ecac6f383,
+  0x3fefb9c79b1f3919, 0x3fefb5586cf9890f, 0x3fefb0f145e46c85,
+  0x3fefac922b7247f7, 0x3fefa83b23395dec, 0x3fefa3ec32d3d1a2,
+  0x3fef9fa55fdfa9c5, 0x3fef9b66affed31b, 0x3fef973028d7233e,
+  0x3fef9301d0125b51, 0x3fef8edbab5e2ab6, 0x3fef8abdc06c31cc,
+  0x3fef86a814f204ab, 0x3fef829aaea92de0, 0x3fef7e95934f312e,
+  0x3fef7a98c8a58e51, 0x3fef76a45471c3c2, 0x3fef72b83c7d517b,
+  0x3fef6ed48695bbc0, 0x3fef6af9388c8dea, 0x3fef672658375d2f,
+  0x3fef635beb6fcb75, 0x3fef5f99f8138a1c, 0x3fef5be084045cd4,
+  0x3fef582f95281c6b, 0x3fef54873168b9aa, 0x3fef50e75eb44027,
+  0x3fef4d5022fcd91d, 0x3fef49c18438ce4d, 0x3fef463b88628cd6,
+  0x3fef42be3578a819, 0x3fef3f49917ddc96, 0x3fef3bdda27912d1,
+  0x3fef387a6e756238, 0x3fef351ffb82140a, 0x3fef31ce4fb2a63f,
+  0x3fef2e85711ece75, 0x3fef2b4565e27cdd, 0x3fef280e341ddf29,
+  0x3fef24dfe1f56381, 0x3fef21ba7591bb70, 0x3fef1e9df51fdee1,
+  0x3fef1b8a66d10f13, 0x3fef187fd0dad990, 0x3fef157e39771b2f,
+  0x3fef1285a6e4030b, 0x3fef0f961f641589, 0x3fef0cafa93e2f56,
+  0x3fef09d24abd886b, 0x3fef06fe0a31b715, 0x3fef0432edeeb2fd,
+  0x3fef0170fc4cd831, 0x3feefeb83ba8ea32, 0x3feefc08b26416ff,
+  0x3feef96266e3fa2d, 0x3feef6c55f929ff1, 0x3feef431a2de883b,
+  0x3feef1a7373aa9cb, 0x3feeef26231e754a, 0x3feeecae6d05d866,
+  0x3feeea401b7140ef, 0x3feee7db34e59ff7, 0x3feee57fbfec6cf4,
+  0x3feee32dc313a8e5, 0x3feee0e544ede173, 0x3feedea64c123422,
+  0x3feedc70df1c5175, 0x3feeda4504ac801c, 0x3feed822c367a024,
+  0x3feed60a21f72e2a, 0x3feed3fb2709468a, 0x3feed1f5d950a897,
+  0x3feecffa3f84b9d4, 0x3feece086061892d, 0x3feecc2042a7d232,
+  0x3feeca41ed1d0057, 0x3feec86d668b3237, 0x3feec6a2b5c13cd0,
+  0x3feec4e1e192aed2, 0x3feec32af0d7d3de, 0x3feec17dea6db7d7,
+  0x3feebfdad5362a27, 0x3feebe41b817c114, 0x3feebcb299fddd0d,
+  0x3feebb2d81d8abff, 0x3feeb9b2769d2ca7, 0x3feeb8417f4531ee,
+  0x3feeb6daa2cf6642, 0x3feeb57de83f4eef, 0x3feeb42b569d4f82,
+  0x3feeb2e2f4f6ad27, 0x3feeb1a4ca5d920f, 0x3feeb070dde910d2,
+  0x3feeaf4736b527da, 0x3feeae27dbe2c4cf, 0x3feead12d497c7fd,
+  0x3feeac0827ff07cc, 0x3feeab07dd485429, 0x3feeaa11fba87a03,
+  0x3feea9268a5946b7, 0x3feea84590998b93, 0x3feea76f15ad2148,
+  0x3feea6a320dceb71, 0x3feea5e1b976dc09, 0x3feea52ae6cdf6f4,
+  0x3feea47eb03a5585, 0x3feea3dd1d1929fd, 0x3feea34634ccc320,
+  0x3feea2b9febc8fb7, 0x3feea23882552225, 0x3feea1c1c70833f6,
+  0x3feea155d44ca973, 0x3feea0f4b19e9538, 0x3feea09e667f3bcd,
+  0x3feea052fa75173e, 0x3feea012750bdabf, 0x3fee9fdcddd47645,
+  0x3fee9fb23c651a2f, 0x3fee9f9298593ae5, 0x3fee9f7df9519484,
+  0x3fee9f7466f42e87, 0x3fee9f75e8ec5f74, 0x3fee9f8286ead08a,
+  0x3fee9f9a48a58174, 0x3fee9fbd35d7cbfd, 0x3fee9feb564267c9,
+  0x3feea024b1ab6e09, 0x3feea0694fde5d3f, 0x3feea0b938ac1cf6,
+  0x3feea11473eb0187, 0x3feea17b0976cfdb, 0x3feea1ed0130c132,
+  0x3feea26a62ff86f0, 0x3feea2f336cf4e62, 0x3feea3878491c491,
+  0x3feea427543e1a12, 0x3feea4d2add106d9, 0x3feea589994cce13,
+  0x3feea64c1eb941f7, 0x3feea71a4623c7ad, 0x3feea7f4179f5b21,
+  0x3feea8d99b4492ed, 0x3feea9cad931a436, 0x3feeaac7d98a6699,
+  0x3feeabd0a478580f, 0x3feeace5422aa0db, 0x3feeae05bad61778,
+  0x3feeaf3216b5448c, 0x3feeb06a5e0866d9, 0x3feeb1ae99157736,
+  0x3feeb2fed0282c8a, 0x3feeb45b0b91ffc6, 0x3feeb5c353aa2fe2,
+  0x3feeb737b0cdc5e5, 0x3feeb8b82b5f98e5, 0x3feeba44cbc8520f,
+  0x3feebbdd9a7670b3, 0x3feebd829fde4e50, 0x3feebf33e47a22a2,
+  0x3feec0f170ca07ba, 0x3feec2bb4d53fe0d, 0x3feec49182a3f090,
+  0x3feec674194bb8d5, 0x3feec86319e32323, 0x3feeca5e8d07f29e,
+  0x3feecc667b5de565, 0x3feece7aed8eb8bb, 0x3feed09bec4a2d33,
+  0x3feed2c980460ad8, 0x3feed503b23e255d, 0x3feed74a8af46052,
+  0x3feed99e1330b358, 0x3feedbfe53c12e59, 0x3feede6b5579fdbf,
+  0x3feee0e521356eba, 0x3feee36bbfd3f37a, 0x3feee5ff3a3c2774,
+  0x3feee89f995ad3ad, 0x3feeeb4ce622f2ff, 0x3feeee07298db666,
+  0x3feef0ce6c9a8952, 0x3feef3a2b84f15fb, 0x3feef68415b749b1,
+  0x3feef9728de5593a, 0x3feefc6e29f1c52a, 0x3feeff76f2fb5e47,
+  0x3fef028cf22749e4, 0x3fef05b030a1064a, 0x3fef08e0b79a6f1f,
+  0x3fef0c1e904bc1d2, 0x3fef0f69c3f3a207, 0x3fef12c25bd71e09,
+  0x3fef16286141b33d, 0x3fef199bdd85529c, 0x3fef1d1cd9fa652c,
+  0x3fef20ab5fffd07a, 0x3fef244778fafb22, 0x3fef27f12e57d14b,
+  0x3fef2ba88988c933, 0x3fef2f6d9406e7b5, 0x3fef33405751c4db,
+  0x3fef3720dcef9069, 0x3fef3b0f2e6d1675, 0x3fef3f0b555dc3fa,
+  0x3fef43155b5bab74, 0x3fef472d4a07897c, 0x3fef4b532b08c968,
+  0x3fef4f87080d89f2, 0x3fef53c8eacaa1d6, 0x3fef5818dcfba487,
+  0x3fef5c76e862e6d3, 0x3fef60e316c98398, 0x3fef655d71ff6075,
+  0x3fef69e603db3285, 0x3fef6e7cd63a8315, 0x3fef7321f301b460,
+  0x3fef77d5641c0658, 0x3fef7c97337b9b5f, 0x3fef81676b197d17,
+  0x3fef864614f5a129, 0x3fef8b333b16ee12, 0x3fef902ee78b3ff6,
+  0x3fef953924676d76, 0x3fef9a51fbc74c83, 0x3fef9f7977cdb740,
+  0x3fefa4afa2a490da, 0x3fefa9f4867cca6e, 0x3fefaf482d8e67f1,
+  0x3fefb4aaa2188510, 0x3fefba1bee615a27, 0x3fefbf9c1cb6412a,
+  0x3fefc52b376bba97, 0x3fefcac948dd7274, 0x3fefd0765b6e4540,
+  0x3fefd632798844f8, 0x3fefdbfdad9cbe14, 0x3fefe1d802243c89,
+  0x3fefe7c1819e90d8, 0x3fefedba3692d514, 0x3feff3c22b8f71f1,
+  0x3feff9d96b2a23d9,
+# endif
+};
diff --git a/contrib/arm-optimized-routines/math/aarch64/v_expf.c b/contrib/arm-optimized-routines/math/aarch64/v_expf.c
new file mode 100644
index 000000000000..34e8b6081bcd
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/v_expf.c
@@ -0,0 +1,122 @@
+/*
+ * Single-precision vector e^x function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "mathlib.h"
+#include "v_math.h"
+
+static const struct data
+{
+  float32x4_t poly[5];
+  float32x4_t shift, inv_ln2, ln2_hi, ln2_lo;
+  uint32x4_t exponent_bias;
+#if !WANT_SIMD_EXCEPT
+  float32x4_t special_bound, scale_thresh;
+#endif
+} data = {
+  /* maxerr: 1.45358 +0.5 ulp.  */
+  .poly = { V4 (0x1.0e4020p-7f), V4 (0x1.573e2ep-5f), V4 (0x1.555e66p-3f),
+	    V4 (0x1.fffdb6p-2f), V4 (0x1.ffffecp-1f) },
+  .shift = V4 (0x1.8p23f),
+  .inv_ln2 = V4 (0x1.715476p+0f),
+  .ln2_hi = V4 (0x1.62e4p-1f),
+  .ln2_lo = V4 (0x1.7f7d1cp-20f),
+  .exponent_bias = V4 (0x3f800000),
+#if !WANT_SIMD_EXCEPT
+  .special_bound = V4 (126.0f),
+  .scale_thresh = V4 (192.0f),
+#endif
+};
+
+#define C(i) d->poly[i]
+
+#if WANT_SIMD_EXCEPT
+
+# define TinyBound v_u32 (0x20000000)	/* asuint (0x1p-63).  */
+# define BigBound v_u32 (0x42800000)	/* asuint (0x1p6).  */
+# define SpecialBound v_u32 (0x22800000) /* BigBound - TinyBound.  */
+
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t x, float32x4_t y, uint32x4_t cmp)
+{
+  /* If fenv exceptions are to be triggered correctly, fall back to the scalar
+     routine to special lanes.  */
+  return v_call_f32 (expf, x, y, cmp);
+}
+
+#else
+
+# define SpecialOffset v_u32 (0x82000000)
+# define SpecialBias v_u32 (0x7f000000)
+
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1,
+	      float32x4_t scale, const struct data *d)
+{
+  /* 2^n may overflow, break it up into s1*s2.  */
+  uint32x4_t b = vandq_u32 (vclezq_f32 (n), SpecialOffset);
+  float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, SpecialBias));
+  float32x4_t s2 = vreinterpretq_f32_u32 (vsubq_u32 (e, b));
+  uint32x4_t cmp2 = vcagtq_f32 (n, d->scale_thresh);
+  float32x4_t r2 = vmulq_f32 (s1, s1);
+  float32x4_t r1 = vmulq_f32 (vfmaq_f32 (s2, poly, s2), s1);
+  /* Similar to r1 but avoids double rounding in the subnormal range.  */
+  float32x4_t r0 = vfmaq_f32 (scale, poly, scale);
+  float32x4_t r = vbslq_f32 (cmp1, r1, r0);
+  return vbslq_f32 (cmp2, r2, r);
+}
+
+#endif
+
+float32x4_t VPCS_ATTR V_NAME_F1 (exp) (float32x4_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+  float32x4_t n, r, r2, scale, p, q, poly, z;
+  uint32x4_t cmp, e;
+
+#if WANT_SIMD_EXCEPT
+  /* asuint(x) - TinyBound >= BigBound - TinyBound.  */
+  cmp = vcgeq_u32 (
+      vsubq_u32 (vandq_u32 (vreinterpretq_u32_f32 (x), v_u32 (0x7fffffff)),
+		 TinyBound),
+      SpecialBound);
+  float32x4_t xm = x;
+  /* If any lanes are special, mask them with 1 and retain a copy of x to allow
+     special case handler to fix special lanes later. This is only necessary if
+     fenv exceptions are to be triggered correctly.  */
+  if (unlikely (v_any_u32 (cmp)))
+    x = vbslq_f32 (cmp, v_f32 (1), x);
+#endif
+
+  /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
+     x = ln2*n + r, with r in [-ln2/2, ln2/2].  */
+  z = vfmaq_f32 (d->shift, x, d->inv_ln2);
+  n = vsubq_f32 (z, d->shift);
+  r = vfmsq_f32 (x, n, d->ln2_hi);
+  r = vfmsq_f32 (r, n, d->ln2_lo);
+  e = vshlq_n_u32 (vreinterpretq_u32_f32 (z), 23);
+  scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias));
+
+#if !WANT_SIMD_EXCEPT
+  cmp = vcagtq_f32 (n, d->special_bound);
+#endif
+
+  r2 = vmulq_f32 (r, r);
+  p = vfmaq_f32 (C (1), C (0), r);
+  q = vfmaq_f32 (C (3), C (2), r);
+  q = vfmaq_f32 (q, p, r2);
+  p = vmulq_f32 (C (4), r);
+  poly = vfmaq_f32 (p, q, r2);
+
+  if (unlikely (v_any_u32 (cmp)))
+#if WANT_SIMD_EXCEPT
+    return special_case (xm, vfmaq_f32 (scale, poly, scale), cmp);
+#else
+    return special_case (poly, n, e, cmp, scale, d);
+#endif
+
+  return vfmaq_f32 (scale, poly, scale);
+}
diff --git a/contrib/arm-optimized-routines/math/aarch64/v_expf_1u.c b/contrib/arm-optimized-routines/math/aarch64/v_expf_1u.c
new file mode 100644
index 000000000000..43d03fa34efa
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/v_expf_1u.c
@@ -0,0 +1,77 @@
+/*
+ * Single-precision vector e^x function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "mathlib.h"
+#include "v_math.h"
+
+static const float Poly[] = {
+  /*  maxerr: 0.36565 +0.5 ulp.  */
+  0x1.6a6000p-10f,
+  0x1.12718ep-7f,
+  0x1.555af0p-5f,
+  0x1.555430p-3f,
+  0x1.fffff4p-2f,
+};
+#define C0 v_f32 (Poly[0])
+#define C1 v_f32 (Poly[1])
+#define C2 v_f32 (Poly[2])
+#define C3 v_f32 (Poly[3])
+#define C4 v_f32 (Poly[4])
+
+#define Shift v_f32 (0x1.8p23f)
+#define InvLn2 v_f32 (0x1.715476p+0f)
+#define Ln2hi v_f32 (0x1.62e4p-1f)
+#define Ln2lo v_f32 (0x1.7f7d1cp-20f)
+
+static float32x4_t VPCS_ATTR NOINLINE
+specialcase (float32x4_t poly, float32x4_t n, uint32x4_t e, float32x4_t absn)
+{
+  /* 2^n may overflow, break it up into s1*s2.  */
+  uint32x4_t b = (n <= v_f32 (0.0f)) & v_u32 (0x83000000);
+  float32x4_t s1 = vreinterpretq_f32_u32 (v_u32 (0x7f000000) + b);
+  float32x4_t s2 = vreinterpretq_f32_u32 (e - b);
+  uint32x4_t cmp = absn > v_f32 (192.0f);
+  float32x4_t r1 = s1 * s1;
+  float32x4_t r0 = poly * s1 * s2;
+  return vreinterpretq_f32_u32 ((cmp & vreinterpretq_u32_f32 (r1))
+				| (~cmp & vreinterpretq_u32_f32 (r0)));
+}
+
+float32x4_t VPCS_ATTR
+_ZGVnN4v_expf_1u (float32x4_t x)
+{
+  float32x4_t n, r, scale, poly, absn, z;
+  uint32x4_t cmp, e;
+
+  /* exp(x) = 2^n * poly(r), with poly(r) in [1/sqrt(2),sqrt(2)]
+     x = ln2*n + r, with r in [-ln2/2, ln2/2].  */
+#if 1
+  z = vfmaq_f32 (Shift, x, InvLn2);
+  n = z - Shift;
+  r = vfmaq_f32 (x, n, -Ln2hi);
+  r = vfmaq_f32 (r, n, -Ln2lo);
+  e = vreinterpretq_u32_f32 (z) << 23;
+#else
+  z = x * InvLn2;
+  n = vrndaq_f32 (z);
+  r = vfmaq_f32 (x, n, -Ln2hi);
+  r = vfmaq_f32 (r, n, -Ln2lo);
+  e = vreinterpretq_u32_s32 (vcvtaq_s32_f32 (z)) << 23;
+#endif
+  scale = vreinterpretq_f32_u32 (e + v_u32 (0x3f800000));
+  absn = vabsq_f32 (n);
+  cmp = absn > v_f32 (126.0f);
+  poly = vfmaq_f32 (C1, C0, r);
+  poly = vfmaq_f32 (C2, poly, r);
+  poly = vfmaq_f32 (C3, poly, r);
+  poly = vfmaq_f32 (C4, poly, r);
+  poly = vfmaq_f32 (v_f32 (1.0f), poly, r);
+  poly = vfmaq_f32 (v_f32 (1.0f), poly, r);
+  if (unlikely (v_any_u32 (cmp)))
+    return specialcase (poly, n, e, absn);
+  return scale * poly;
+}
diff --git a/contrib/arm-optimized-routines/math/aarch64/v_log.c b/contrib/arm-optimized-routines/math/aarch64/v_log.c
new file mode 100644
index 000000000000..1d1c1fa62c04
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/v_log.c
@@ -0,0 +1,100 @@
+/*
+ * Double-precision vector log(x) function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "mathlib.h"
+#include "v_math.h"
+
+static const struct data
+{
+  uint64x2_t min_norm;
+  uint32x4_t special_bound;
+  float64x2_t poly[5];
+  float64x2_t ln2;
+  uint64x2_t sign_exp_mask;
+} data = {
+  /* Worst-case error: 1.17 + 0.5 ulp.
+     Rel error: 0x1.6272e588p-56 in [ -0x1.fc1p-9 0x1.009p-8 ].  */
+  .poly = { V2 (-0x1.ffffffffffff7p-2), V2 (0x1.55555555170d4p-2),
+	    V2 (-0x1.0000000399c27p-2), V2 (0x1.999b2e90e94cap-3),
+	    V2 (-0x1.554e550bd501ep-3) },
+  .ln2 = V2 (0x1.62e42fefa39efp-1),
+  .min_norm = V2 (0x0010000000000000),
+  .special_bound = V4 (0x7fe00000), /* asuint64(inf) - min_norm.  */
+  .sign_exp_mask = V2 (0xfff0000000000000)
+};
+
+#define A(i) d->poly[i]
+#define N (1 << V_LOG_TABLE_BITS)
+#define IndexMask (N - 1)
+#define Off v_u64 (0x3fe6900900000000)
+
+struct entry
+{
+  float64x2_t invc;
+  float64x2_t logc;
+};
+
+static inline struct entry
+lookup (uint64x2_t i)
+{
+  /* Since N is a power of 2, n % N = n & (N - 1).  */
+  struct entry e;
+  uint64_t i0 = (i[0] >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
+  uint64_t i1 = (i[1] >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
+  float64x2_t e0 = vld1q_f64 (&__v_log_data.table[i0].invc);
+  float64x2_t e1 = vld1q_f64 (&__v_log_data.table[i1].invc);
+  e.invc = vuzp1q_f64 (e0, e1);
+  e.logc = vuzp2q_f64 (e0, e1);
+  return e;
+}
+
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t x, float64x2_t y, float64x2_t hi, float64x2_t r2,
+	      uint32x2_t cmp)
+{
+  return v_call_f64 (log, x, vfmaq_f64 (hi, y, r2), vmovl_u32 (cmp));
+}
+
+float64x2_t VPCS_ATTR V_NAME_D1 (log) (float64x2_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+  float64x2_t z, r, r2, p, y, kd, hi;
+  uint64x2_t ix, iz, tmp;
+  uint32x2_t cmp;
+  int64x2_t k;
+  struct entry e;
+
+  ix = vreinterpretq_u64_f64 (x);
+  cmp = vcge_u32 (vsubhn_u64 (ix, d->min_norm),
+		  vget_low_u32 (d->special_bound));
+
+  /* x = 2^k z; where z is in range [Off,2*Off) and exact.
+     The range is split into N subintervals.
+     The ith subinterval contains z and c is near its center.  */
+  tmp = vsubq_u64 (ix, Off);
+  k = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52); /* arithmetic shift.  */
+  iz = vsubq_u64 (ix, vandq_u64 (tmp, d->sign_exp_mask));
+  z = vreinterpretq_f64_u64 (iz);
+  e = lookup (tmp);
+
+  /* log(x) = log1p(z/c-1) + log(c) + k*Ln2.  */
+  r = vfmaq_f64 (v_f64 (-1.0), z, e.invc);
+  kd = vcvtq_f64_s64 (k);
+
+  /* hi = r + log(c) + k*Ln2.  */
+  hi = vfmaq_f64 (vaddq_f64 (e.logc, r), kd, d->ln2);
+  /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi.  */
+  r2 = vmulq_f64 (r, r);
+  y = vfmaq_f64 (A (2), A (3), r);
+  p = vfmaq_f64 (A (0), A (1), r);
+  y = vfmaq_f64 (y, A (4), r2);
+  y = vfmaq_f64 (p, y, r2);
+
+  if (unlikely (v_any_u32h (cmp)))
+    return special_case (x, y, hi, r2, cmp);
+  return vfmaq_f64 (hi, y, r2);
+}
diff --git a/contrib/arm-optimized-routines/math/aarch64/v_log_data.c b/contrib/arm-optimized-routines/math/aarch64/v_log_data.c
new file mode 100644
index 000000000000..82351bb14766
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/v_log_data.c
@@ -0,0 +1,156 @@
+/*
+ * Lookup table for double-precision log(x) vector function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+
+#define N (1 << V_LOG_TABLE_BITS)
+
+const struct v_log_data __v_log_data = {
+  /* Algorithm:
+
+	x = 2^k z
+	log(x) = k ln2 + log(c) + poly(z/c - 1)
+
+  where z is in [a;2a) which is split into N subintervals (a=0x1.69009p-1,
+  N=128) and log(c) and 1/c for the ith subinterval comes from lookup tables:
+
+	table[i].invc = 1/c
+	table[i].logc = (double)log(c)
+
+  where c is near the center of the subinterval and is chosen by trying several
+  floating point invc candidates around 1/center and selecting one for which
+  the error in (double)log(c) is minimized (< 0x1p-74), except the subinterval
+  that contains 1 and the previous one got tweaked to avoid cancellation.  */
+  .table = { { 0x1.6a133d0dec120p+0, -0x1.62fe995eb963ap-2 },
+	     { 0x1.6815f2f3e42edp+0, -0x1.5d5a48dad6b67p-2 },
+	     { 0x1.661e39be1ac9ep+0, -0x1.57bde257d2769p-2 },
+	     { 0x1.642bfa30ac371p+0, -0x1.52294fbf2af55p-2 },
+	     { 0x1.623f1d916f323p+0, -0x1.4c9c7b598aa38p-2 },
+	     { 0x1.60578da220f65p+0, -0x1.47174fc5ff560p-2 },
+	     { 0x1.5e75349dea571p+0, -0x1.4199b7fa7b5cap-2 },
+	     { 0x1.5c97fd387a75ap+0, -0x1.3c239f48cfb99p-2 },
+	     { 0x1.5abfd2981f200p+0, -0x1.36b4f154d2aebp-2 },
+	     { 0x1.58eca051dc99cp+0, -0x1.314d9a0ff32fbp-2 },
+	     { 0x1.571e526d9df12p+0, -0x1.2bed85cca3cffp-2 },
+	     { 0x1.5554d555b3fcbp+0, -0x1.2694a11421af9p-2 },
+	     { 0x1.539015e2a20cdp+0, -0x1.2142d8d014fb2p-2 },
+	     { 0x1.51d0014ee0164p+0, -0x1.1bf81a2c77776p-2 },
+	     { 0x1.50148538cd9eep+0, -0x1.16b452a39c6a4p-2 },
+	     { 0x1.4e5d8f9f698a1p+0, -0x1.11776ffa6c67ep-2 },
+	     { 0x1.4cab0edca66bep+0, -0x1.0c416035020e0p-2 },
+	     { 0x1.4afcf1a9db874p+0, -0x1.071211aa10fdap-2 },
+	     { 0x1.495327136e16fp+0, -0x1.01e972e293b1bp-2 },
+	     { 0x1.47ad9e84af28fp+0, -0x1.f98ee587fd434p-3 },
+	     { 0x1.460c47b39ae15p+0, -0x1.ef5800ad716fbp-3 },
+	     { 0x1.446f12b278001p+0, -0x1.e52e160484698p-3 },
+	     { 0x1.42d5efdd720ecp+0, -0x1.db1104b19352ep-3 },
+	     { 0x1.4140cfe001a0fp+0, -0x1.d100ac59e0bd6p-3 },
+	     { 0x1.3fafa3b421f69p+0, -0x1.c6fced287c3bdp-3 },
+	     { 0x1.3e225c9c8ece5p+0, -0x1.bd05a7b317c29p-3 },
+	     { 0x1.3c98ec29a211ap+0, -0x1.b31abd229164fp-3 },
+	     { 0x1.3b13442a413fep+0, -0x1.a93c0edadb0a3p-3 },
+	     { 0x1.399156baa3c54p+0, -0x1.9f697ee30d7ddp-3 },
+	     { 0x1.38131639b4cdbp+0, -0x1.95a2efa9aa40ap-3 },
+	     { 0x1.36987540fbf53p+0, -0x1.8be843d796044p-3 },
+	     { 0x1.352166b648f61p+0, -0x1.82395ecc477edp-3 },
+	     { 0x1.33adddb3eb575p+0, -0x1.7896240966422p-3 },
+	     { 0x1.323dcd99fc1d3p+0, -0x1.6efe77aca8c55p-3 },
+	     { 0x1.30d129fefc7d2p+0, -0x1.65723e117ec5cp-3 },
+	     { 0x1.2f67e6b72fe7dp+0, -0x1.5bf15c0955706p-3 },
+	     { 0x1.2e01f7cf8b187p+0, -0x1.527bb6c111da1p-3 },
+	     { 0x1.2c9f518ddc86ep+0, -0x1.491133c939f8fp-3 },
+	     { 0x1.2b3fe86e5f413p+0, -0x1.3fb1b90c7fc58p-3 },
+	     { 0x1.29e3b1211b25cp+0, -0x1.365d2cc485f8dp-3 },
+	     { 0x1.288aa08b373cfp+0, -0x1.2d13758970de7p-3 },
+	     { 0x1.2734abcaa8467p+0, -0x1.23d47a721fd47p-3 },
+	     { 0x1.25e1c82459b81p+0, -0x1.1aa0229f25ec2p-3 },
+	     { 0x1.2491eb1ad59c5p+0, -0x1.117655ddebc3bp-3 },
+	     { 0x1.23450a54048b5p+0, -0x1.0856fbf83ab6bp-3 },
+	     { 0x1.21fb1bb09e578p+0, -0x1.fe83fabbaa106p-4 },
+	     { 0x1.20b415346d8f7p+0, -0x1.ec6e8507a56cdp-4 },
+	     { 0x1.1f6fed179a1acp+0, -0x1.da6d68c7cc2eap-4 },
+	     { 0x1.1e2e99b93c7b3p+0, -0x1.c88078462be0cp-4 },
+	     { 0x1.1cf011a7a882ap+0, -0x1.b6a786a423565p-4 },
+	     { 0x1.1bb44b97dba5ap+0, -0x1.a4e2676ac7f85p-4 },
+	     { 0x1.1a7b3e66cdd4fp+0, -0x1.9330eea777e76p-4 },
+	     { 0x1.1944e11dc56cdp+0, -0x1.8192f134d5ad9p-4 },
+	     { 0x1.18112aebb1a6ep+0, -0x1.70084464f0538p-4 },
+	     { 0x1.16e013231b7e9p+0, -0x1.5e90bdec5cb1fp-4 },
+	     { 0x1.15b1913f156cfp+0, -0x1.4d2c3433c5536p-4 },
+	     { 0x1.14859cdedde13p+0, -0x1.3bda7e219879ap-4 },
+	     { 0x1.135c2dc68cfa4p+0, -0x1.2a9b732d27194p-4 },
+	     { 0x1.12353bdb01684p+0, -0x1.196eeb2b10807p-4 },
+	     { 0x1.1110bf25b85b4p+0, -0x1.0854be8ef8a7ep-4 },
+	     { 0x1.0feeafd2f8577p+0, -0x1.ee998cb277432p-5 },
+	     { 0x1.0ecf062c51c3bp+0, -0x1.ccadb79919fb9p-5 },
+	     { 0x1.0db1baa076c8bp+0, -0x1.aae5b1d8618b0p-5 },
+	     { 0x1.0c96c5bb3048ep+0, -0x1.89413015d7442p-5 },
+	     { 0x1.0b7e20263e070p+0, -0x1.67bfe7bf158dep-5 },
+	     { 0x1.0a67c2acd0ce3p+0, -0x1.46618f83941bep-5 },
+	     { 0x1.0953a6391e982p+0, -0x1.2525df1b0618ap-5 },
+	     { 0x1.0841c3caea380p+0, -0x1.040c8e2f77c6ap-5 },
+	     { 0x1.07321489b13eap+0, -0x1.c62aad39f738ap-6 },
+	     { 0x1.062491aee9904p+0, -0x1.847fe3bdead9cp-6 },
+	     { 0x1.05193497a7cc5p+0, -0x1.43183683400acp-6 },
+	     { 0x1.040ff6b5f5e9fp+0, -0x1.01f31c4e1d544p-6 },
+	     { 0x1.0308d19aa6127p+0, -0x1.82201d1e6b69ap-7 },
+	     { 0x1.0203beedb0c67p+0, -0x1.00dd0f3e1bfd6p-7 },
+	     { 0x1.010037d38bcc2p+0, -0x1.ff6fe1feb4e53p-9 },
+	     { 1.0, 0.0 },
+	     { 0x1.fc06d493cca10p-1, 0x1.fe91885ec8e20p-8 },
+	     { 0x1.f81e6ac3b918fp-1, 0x1.fc516f716296dp-7 },
+	     { 0x1.f44546ef18996p-1, 0x1.7bb4dd70a015bp-6 },
+	     { 0x1.f07b10382c84bp-1, 0x1.f84c99b34b674p-6 },
+	     { 0x1.ecbf7070e59d4p-1, 0x1.39f9ce4fb2d71p-5 },
+	     { 0x1.e91213f715939p-1, 0x1.7756c0fd22e78p-5 },
+	     { 0x1.e572a9a75f7b7p-1, 0x1.b43ee82db8f3ap-5 },
+	     { 0x1.e1e0e2c530207p-1, 0x1.f0b3fced60034p-5 },
+	     { 0x1.de5c72d8a8be3p-1, 0x1.165bd78d4878ep-4 },
+	     { 0x1.dae50fa5658ccp-1, 0x1.3425d2715ebe6p-4 },
+	     { 0x1.d77a71145a2dap-1, 0x1.51b8bd91b7915p-4 },
+	     { 0x1.d41c51166623ep-1, 0x1.6f15632c76a47p-4 },
+	     { 0x1.d0ca6ba0bb29fp-1, 0x1.8c3c88ecbe503p-4 },
+	     { 0x1.cd847e8e59681p-1, 0x1.a92ef077625dap-4 },
+	     { 0x1.ca4a499693e00p-1, 0x1.c5ed5745fa006p-4 },
+	     { 0x1.c71b8e399e821p-1, 0x1.e27876de1c993p-4 },
+	     { 0x1.c3f80faf19077p-1, 0x1.fed104fce4cdcp-4 },
+	     { 0x1.c0df92dc2b0ecp-1, 0x1.0d7bd9c17d78bp-3 },
+	     { 0x1.bdd1de3cbb542p-1, 0x1.1b76986cef97bp-3 },
+	     { 0x1.baceb9e1007a3p-1, 0x1.295913d24f750p-3 },
+	     { 0x1.b7d5ef543e55ep-1, 0x1.37239fa295d17p-3 },
+	     { 0x1.b4e749977d953p-1, 0x1.44d68dd78714bp-3 },
+	     { 0x1.b20295155478ep-1, 0x1.52722ebe5d780p-3 },
+	     { 0x1.af279f8e82be2p-1, 0x1.5ff6d12671f98p-3 },
+	     { 0x1.ac5638197fdf3p-1, 0x1.6d64c2389484bp-3 },
+	     { 0x1.a98e2f102e087p-1, 0x1.7abc4da40fddap-3 },
+	     { 0x1.a6cf5606d05c1p-1, 0x1.87fdbda1e8452p-3 },
+	     { 0x1.a4197fc04d746p-1, 0x1.95295b06a5f37p-3 },
+	     { 0x1.a16c80293dc01p-1, 0x1.a23f6d34abbc5p-3 },
+	     { 0x1.9ec82c4dc5bc9p-1, 0x1.af403a28e04f2p-3 },
+	     { 0x1.9c2c5a491f534p-1, 0x1.bc2c06a85721ap-3 },
+	     { 0x1.9998e1480b618p-1, 0x1.c903161240163p-3 },
+	     { 0x1.970d9977c6c2dp-1, 0x1.d5c5aa93287ebp-3 },
+	     { 0x1.948a5c023d212p-1, 0x1.e274051823fa9p-3 },
+	     { 0x1.920f0303d6809p-1, 0x1.ef0e656300c16p-3 },
+	     { 0x1.8f9b698a98b45p-1, 0x1.fb9509f05aa2ap-3 },
+	     { 0x1.8d2f6b81726f6p-1, 0x1.04041821f37afp-2 },
+	     { 0x1.8acae5bb55badp-1, 0x1.0a340a49b3029p-2 },
+	     { 0x1.886db5d9275b8p-1, 0x1.105a7918a126dp-2 },
+	     { 0x1.8617ba567c13cp-1, 0x1.1677819812b84p-2 },
+	     { 0x1.83c8d27487800p-1, 0x1.1c8b405b40c0ep-2 },
+	     { 0x1.8180de3c5dbe7p-1, 0x1.2295d16cfa6b1p-2 },
+	     { 0x1.7f3fbe71cdb71p-1, 0x1.28975066318a2p-2 },
+	     { 0x1.7d055498071c1p-1, 0x1.2e8fd855d86fcp-2 },
+	     { 0x1.7ad182e54f65ap-1, 0x1.347f83d605e59p-2 },
+	     { 0x1.78a42c3c90125p-1, 0x1.3a666d1244588p-2 },
+	     { 0x1.767d342f76944p-1, 0x1.4044adb6f8ec4p-2 },
+	     { 0x1.745c7ef26b00ap-1, 0x1.461a5f077558cp-2 },
+	     { 0x1.7241f15769d0fp-1, 0x1.4be799e20b9c8p-2 },
+	     { 0x1.702d70d396e41p-1, 0x1.51ac76a6b79dfp-2 },
+	     { 0x1.6e1ee3700cd11p-1, 0x1.57690d5744a45p-2 },
+	     { 0x1.6c162fc9cbe02p-1, 0x1.5d1d758e45217p-2 } }
+};
diff --git a/contrib/arm-optimized-routines/math/aarch64/v_logf.c b/contrib/arm-optimized-routines/math/aarch64/v_logf.c
new file mode 100644
index 000000000000..66ebbbcd2b5a
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/v_logf.c
@@ -0,0 +1,74 @@
+/*
+ * Single-precision vector log function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "mathlib.h"
+#include "v_math.h"
+
+static const struct data
+{
+  uint32x4_t min_norm;
+  uint16x8_t special_bound;
+  float32x4_t poly[7];
+  float32x4_t ln2, tiny_bound;
+  uint32x4_t off, mantissa_mask;
+} data = {
+  /* 3.34 ulp error.  */
+  .poly = { V4 (-0x1.3e737cp-3f), V4 (0x1.5a9aa2p-3f), V4 (-0x1.4f9934p-3f),
+	    V4 (0x1.961348p-3f), V4 (-0x1.00187cp-2f), V4 (0x1.555d7cp-2f),
+	    V4 (-0x1.ffffc8p-2f) },
+  .ln2 = V4 (0x1.62e43p-1f),
+  .tiny_bound = V4 (0x1p-126),
+  .min_norm = V4 (0x00800000),
+  .special_bound = V8 (0x7f00), /* asuint32(inf) - min_norm.  */
+  .off = V4 (0x3f2aaaab),	/* 0.666667.  */
+  .mantissa_mask = V4 (0x007fffff)
+};
+
+#define P(i) d->poly[7 - i]
+
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t x, float32x4_t y, float32x4_t r2, float32x4_t p,
+	      uint16x4_t cmp)
+{
+  /* Fall back to scalar code.  */
+  return v_call_f32 (logf, x, vfmaq_f32 (p, y, r2), vmovl_u16 (cmp));
+}
+
+float32x4_t VPCS_ATTR V_NAME_F1 (log) (float32x4_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+  float32x4_t n, p, q, r, r2, y;
+  uint32x4_t u;
+  uint16x4_t cmp;
+
+  u = vreinterpretq_u32_f32 (x);
+  cmp = vcge_u16 (vsubhn_u32 (u, d->min_norm),
+		  vget_low_u16 (d->special_bound));
+
+  /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3.  */
+  u = vsubq_u32 (u, d->off);
+  n = vcvtq_f32_s32 (
+      vshrq_n_s32 (vreinterpretq_s32_u32 (u), 23)); /* signextend.  */
+  u = vandq_u32 (u, d->mantissa_mask);
+  u = vaddq_u32 (u, d->off);
+  r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f));
+
+  /* y = log(1+r) + n*ln2.  */
+  r2 = vmulq_f32 (r, r);
+  /* n*ln2 + r + r2*(P1 + r*P2 + r2*(P3 + r*P4 + r2*(P5 + r*P6 + r2*P7))).  */
+  p = vfmaq_f32 (P (5), P (6), r);
+  q = vfmaq_f32 (P (3), P (4), r);
+  y = vfmaq_f32 (P (1), P (2), r);
+  p = vfmaq_f32 (p, P (7), r2);
+  q = vfmaq_f32 (q, p, r2);
+  y = vfmaq_f32 (y, q, r2);
+  p = vfmaq_f32 (r, d->ln2, n);
+
+  if (unlikely (v_any_u16h (cmp)))
+    return special_case (x, y, r2, p, cmp);
+  return vfmaq_f32 (p, y, r2);
+}
diff --git a/contrib/arm-optimized-routines/math/aarch64/v_math.h b/contrib/arm-optimized-routines/math/aarch64/v_math.h
new file mode 100644
index 000000000000..1dc9916c6fb0
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/v_math.h
@@ -0,0 +1,135 @@
+/*
+ * Vector math abstractions.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _V_MATH_H
+#define _V_MATH_H
+
+#if !__aarch64__
+# error "Cannot build without AArch64"
+#endif
+
+#define VPCS_ATTR __attribute__ ((aarch64_vector_pcs))
+
+#define V_NAME_F1(fun) _ZGVnN4v_##fun##f
+#define V_NAME_D1(fun) _ZGVnN2v_##fun
+#define V_NAME_F2(fun) _ZGVnN4vv_##fun##f
+#define V_NAME_D2(fun) _ZGVnN2vv_##fun
+
+#include <stdint.h>
+#include "../math_config.h"
+#include <arm_neon.h>
+
+/* Shorthand helpers for declaring constants.  */
+#  define V2(X) { X, X }
+#  define V4(X) { X, X, X, X }
+#  define V8(X) { X, X, X, X, X, X, X, X }
+
+static inline int
+v_any_u16h (uint16x4_t x)
+{
+  return vget_lane_u64 (vreinterpret_u64_u16 (x), 0) != 0;
+}
+
+static inline int
+v_lanes32 (void)
+{
+  return 4;
+}
+
+static inline float32x4_t
+v_f32 (float x)
+{
+  return (float32x4_t) V4 (x);
+}
+static inline uint32x4_t
+v_u32 (uint32_t x)
+{
+  return (uint32x4_t) V4 (x);
+}
+/* true if any elements of a v_cond result is non-zero.  */
+static inline int
+v_any_u32 (uint32x4_t x)
+{
+  /* assume elements in x are either 0 or -1u.  */
+  return vpaddd_u64 (vreinterpretq_u64_u32 (x)) != 0;
+}
+static inline int
+v_any_u32h (uint32x2_t x)
+{
+  return vget_lane_u64 (vreinterpret_u64_u32 (x), 0) != 0;
+}
+static inline float32x4_t
+v_lookup_f32 (const float *tab, uint32x4_t idx)
+{
+  return (float32x4_t){tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]};
+}
+static inline uint32x4_t
+v_lookup_u32 (const uint32_t *tab, uint32x4_t idx)
+{
+  return (uint32x4_t){tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]};
+}
+static inline float32x4_t
+v_call_f32 (float (*f) (float), float32x4_t x, float32x4_t y, uint32x4_t p)
+{
+  return (float32x4_t){p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1],
+		       p[2] ? f (x[2]) : y[2], p[3] ? f (x[3]) : y[3]};
+}
+static inline float32x4_t
+v_call2_f32 (float (*f) (float, float), float32x4_t x1, float32x4_t x2,
+	     float32x4_t y, uint32x4_t p)
+{
+  return (float32x4_t){p[0] ? f (x1[0], x2[0]) : y[0],
+		       p[1] ? f (x1[1], x2[1]) : y[1],
+		       p[2] ? f (x1[2], x2[2]) : y[2],
+		       p[3] ? f (x1[3], x2[3]) : y[3]};
+}
+
+static inline int
+v_lanes64 (void)
+{
+  return 2;
+}
+static inline float64x2_t
+v_f64 (double x)
+{
+  return (float64x2_t) V2 (x);
+}
+static inline uint64x2_t
+v_u64 (uint64_t x)
+{
+  return (uint64x2_t) V2 (x);
+}
+/* true if any elements of a v_cond result is non-zero.  */
+static inline int
+v_any_u64 (uint64x2_t x)
+{
+  /* assume elements in x are either 0 or -1u.  */
+  return vpaddd_u64 (x) != 0;
+}
+static inline float64x2_t
+v_lookup_f64 (const double *tab, uint64x2_t idx)
+{
+  return (float64x2_t){tab[idx[0]], tab[idx[1]]};
+}
+static inline uint64x2_t
+v_lookup_u64 (const uint64_t *tab, uint64x2_t idx)
+{
+  return (uint64x2_t){tab[idx[0]], tab[idx[1]]};
+}
+static inline float64x2_t
+v_call_f64 (double (*f) (double), float64x2_t x, float64x2_t y, uint64x2_t p)
+{
+  double p1 = p[1];
+  double x1 = x[1];
+  if (likely (p[0]))
+    y[0] = f (x[0]);
+  if (likely (p1))
+    y[1] = f (x1);
+  return y;
+}
+
+#endif
diff --git a/contrib/arm-optimized-routines/math/aarch64/v_pow.c b/contrib/arm-optimized-routines/math/aarch64/v_pow.c
new file mode 100644
index 000000000000..734f1663a283
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/v_pow.c
@@ -0,0 +1,22 @@
+/*
+ * Double-precision vector pow function.
+ *
+ * Copyright (c) 2020-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "mathlib.h"
+#include "v_math.h"
+
+float64x2_t VPCS_ATTR V_NAME_D2 (pow) (float64x2_t x, float64x2_t y)
+{
+  float64x2_t z;
+  for (int lane = 0; lane < v_lanes64 (); lane++)
+    {
+      double sx = x[lane];
+      double sy = y[lane];
+      double sz = pow (sx, sy);
+      z[lane] = sz;
+    }
+  return z;
+}
diff --git a/contrib/arm-optimized-routines/math/aarch64/v_powf.c b/contrib/arm-optimized-routines/math/aarch64/v_powf.c
new file mode 100644
index 000000000000..3a4163ab0558
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/v_powf.c
@@ -0,0 +1,148 @@
+/*
+ * Single-precision vector powf function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+
+#define Min v_u32 (0x00800000)
+#define Max v_u32 (0x7f800000)
+#define Thresh v_u32 (0x7f000000) /* Max - Min.  */
+#define MantissaMask v_u32 (0x007fffff)
+
+#define A data.log2_poly
+#define C data.exp2f_poly
+
+/* 2.6 ulp ~ 0.5 + 2^24 (128*Ln2*relerr_log2 + relerr_exp2).  */
+#define Off v_u32 (0x3f35d000)
+
+#define V_POWF_LOG2_TABLE_BITS 5
+#define V_EXP2F_TABLE_BITS 5
+#define Log2IdxMask v_u32 ((1 << V_POWF_LOG2_TABLE_BITS) - 1)
+#define Scale ((double) (1 << V_EXP2F_TABLE_BITS))
+
+static const struct
+{
+  struct
+  {
+    double invc, logc;
+  } log2_tab[1 << V_POWF_LOG2_TABLE_BITS];
+  double log2_poly[4];
+  uint64_t exp2f_tab[1 << V_EXP2F_TABLE_BITS];
+  double exp2f_poly[3];
+} data = {
+  .log2_tab = {{0x1.6489890582816p+0, -0x1.e960f97b22702p-2 * Scale},
+	       {0x1.5cf19b35e3472p+0, -0x1.c993406cd4db6p-2 * Scale},
+	       {0x1.55aac0e956d65p+0, -0x1.aa711d9a7d0f3p-2 * Scale},
+	       {0x1.4eb0022977e01p+0, -0x1.8bf37bacdce9bp-2 * Scale},
+	       {0x1.47fcccda1dd1fp+0, -0x1.6e13b3519946ep-2 * Scale},
+	       {0x1.418ceabab68c1p+0, -0x1.50cb8281e4089p-2 * Scale},
+	       {0x1.3b5c788f1edb3p+0, -0x1.341504a237e2bp-2 * Scale},
+	       {0x1.3567de48e9c9ap+0, -0x1.17eaab624ffbbp-2 * Scale},
+	       {0x1.2fabc80fd19bap+0, -0x1.f88e708f8c853p-3 * Scale},
+	       {0x1.2a25200ce536bp+0, -0x1.c24b6da113914p-3 * Scale},
+	       {0x1.24d108e0152e3p+0, -0x1.8d02ee397cb1dp-3 * Scale},
+	       {0x1.1facd8ab2fbe1p+0, -0x1.58ac1223408b3p-3 * Scale},
+	       {0x1.1ab614a03efdfp+0, -0x1.253e6fd190e89p-3 * Scale},
+	       {0x1.15ea6d03af9ffp+0, -0x1.e5641882c12ffp-4 * Scale},
+	       {0x1.1147b994bb776p+0, -0x1.81fea712926f7p-4 * Scale},
+	       {0x1.0ccbf650593aap+0, -0x1.203e240de64a3p-4 * Scale},
+	       {0x1.0875408477302p+0, -0x1.8029b86a78281p-5 * Scale},
+	       {0x1.0441d42a93328p+0, -0x1.85d713190fb9p-6 * Scale},
+	       {0x1p+0, 0x0p+0 * Scale},
+	       {0x1.f1d006c855e86p-1, 0x1.4c1cc07312997p-5 * Scale},
+	       {0x1.e28c3341aa301p-1, 0x1.5e1848ccec948p-4 * Scale},
+	       {0x1.d4bdf9aa64747p-1, 0x1.04cfcb7f1196fp-3 * Scale},
+	       {0x1.c7b45a24e5803p-1, 0x1.582813d463c21p-3 * Scale},
+	       {0x1.bb5f5eb2ed60ap-1, 0x1.a936fa68760ccp-3 * Scale},
+	       {0x1.afb0bff8fe6b4p-1, 0x1.f81bc31d6cc4ep-3 * Scale},
+	       {0x1.a49badf7ab1f5p-1, 0x1.2279a09fae6b1p-2 * Scale},
+	       {0x1.9a14a111fc4c9p-1, 0x1.47ec0b6df5526p-2 * Scale},
+	       {0x1.901131f5b2fdcp-1, 0x1.6c71762280f1p-2 * Scale},
+	       {0x1.8687f73f6d865p-1, 0x1.90155070798dap-2 * Scale},
+	       {0x1.7d7067eb77986p-1, 0x1.b2e23b1d3068cp-2 * Scale},
+	       {0x1.74c2c1cf97b65p-1, 0x1.d4e21b0daa86ap-2 * Scale},
+	       {0x1.6c77f37cff2a1p-1, 0x1.f61e2a2f67f3fp-2 * Scale},},
+  .log2_poly = { /* rel err: 1.5 * 2^-30.  */
+		-0x1.6ff5daa3b3d7cp-2 * Scale, 0x1.ec81d03c01aebp-2 * Scale,
+		-0x1.71547bb43f101p-1 * Scale, 0x1.7154764a815cbp0 * Scale,},
+  .exp2f_tab = {0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f,
+		0x3fef9301d0125b51, 0x3fef72b83c7d517b, 0x3fef54873168b9aa,
+		0x3fef387a6e756238, 0x3fef1e9df51fdee1, 0x3fef06fe0a31b715,
+		0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,
+		0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429,
+		0x3feea47eb03a5585, 0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74,
+		0x3feea11473eb0187, 0x3feea589994cce13, 0x3feeace5422aa0db,
+		0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,
+		0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c,
+		0x3fef3720dcef9069, 0x3fef5818dcfba487, 0x3fef7c97337b9b5f,
+		0x3fefa4afa2a490da, 0x3fefd0765b6e4540,},
+  .exp2f_poly = { /* rel err: 1.69 * 2^-34.  */
+		 0x1.c6af84b912394p-5 / Scale / Scale / Scale,
+		 0x1.ebfce50fac4f3p-3 / Scale / Scale,
+		 0x1.62e42ff0c52d6p-1 / Scale}};
+
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t x, float32x4_t y, float32x4_t ret, uint32x4_t cmp)
+{
+  return v_call2_f32 (powf, x, y, ret, cmp);
+}
+
+float32x4_t VPCS_ATTR V_NAME_F2 (pow) (float32x4_t x, float32x4_t y)
+{
+  uint32x4_t u = vreinterpretq_u32_f32 (x);
+  uint32x4_t cmp = vcgeq_u32 (vsubq_u32 (u, Min), Thresh);
+  uint32x4_t tmp = vsubq_u32 (u, Off);
+  uint32x4_t i = vandq_u32 (vshrq_n_u32 (tmp, (23 - V_POWF_LOG2_TABLE_BITS)),
+			    Log2IdxMask);
+  uint32x4_t top = vbicq_u32 (tmp, MantissaMask);
+  uint32x4_t iz = vsubq_u32 (u, top);
+  int32x4_t k = vshrq_n_s32 (vreinterpretq_s32_u32 (top),
+			     23 - V_EXP2F_TABLE_BITS); /* arithmetic shift.  */
+
+  float32x4_t ret;
+  for (int lane = 0; lane < 4; lane++)
+    {
+      /* Use double precision for each lane.  */
+      double invc = data.log2_tab[i[lane]].invc;
+      double logc = data.log2_tab[i[lane]].logc;
+      double z = (double) asfloat (iz[lane]);
+
+      /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k.  */
+      double r = __builtin_fma (z, invc, -1.0);
+      double y0 = logc + (double) k[lane];
+
+      /* Polynomial to approximate log1p(r)/ln2.  */
+      double logx = A[0];
+      logx = r * logx + A[1];
+      logx = r * logx + A[2];
+      logx = r * logx + A[3];
+      logx = r * logx + y0;
+      double ylogx = y[lane] * logx;
+      cmp[lane] = (asuint64 (ylogx) >> 47 & 0xffff)
+			  >= asuint64 (126.0 * (1 << V_EXP2F_TABLE_BITS)) >> 47
+		      ? 1
+		      : cmp[lane];
+
+      /* N*x = k + r with r in [-1/2, 1/2].  */
+      double kd = round (ylogx);
+      uint64_t ki = lround (ylogx);
+      r = ylogx - kd;
+
+      /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1).  */
+      uint64_t t = data.exp2f_tab[ki % (1 << V_EXP2F_TABLE_BITS)];
+      t += ki << (52 - V_EXP2F_TABLE_BITS);
+      double s = asdouble (t);
+      double p = C[0];
+      p = __builtin_fma (p, r, C[1]);
+      p = __builtin_fma (p, r, C[2]);
+      p = __builtin_fma (p, s * r, s);
+
+      ret[lane] = p;
+    }
+  if (unlikely (v_any_u32 (cmp)))
+    return special_case (x, y, ret, cmp);
+  return ret;
+}
diff --git a/contrib/arm-optimized-routines/math/aarch64/v_sin.c b/contrib/arm-optimized-routines/math/aarch64/v_sin.c
new file mode 100644
index 000000000000..04129c31133d
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/v_sin.c
@@ -0,0 +1,97 @@
+/*
+ * Double-precision vector sin function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "mathlib.h"
+#include "v_math.h"
+
+static const struct data
+{
+  float64x2_t poly[7];
+  float64x2_t range_val, inv_pi, shift, pi_1, pi_2, pi_3;
+} data = {
+  .poly = { V2 (-0x1.555555555547bp-3), V2 (0x1.1111111108a4dp-7),
+	    V2 (-0x1.a01a019936f27p-13), V2 (0x1.71de37a97d93ep-19),
+	    V2 (-0x1.ae633919987c6p-26), V2 (0x1.60e277ae07cecp-33),
+	    V2 (-0x1.9e9540300a1p-41) },
+
+  .range_val = V2 (0x1p23),
+  .inv_pi = V2 (0x1.45f306dc9c883p-2),
+  .pi_1 = V2 (0x1.921fb54442d18p+1),
+  .pi_2 = V2 (0x1.1a62633145c06p-53),
+  .pi_3 = V2 (0x1.c1cd129024e09p-106),
+  .shift = V2 (0x1.8p52),
+};
+
+#if WANT_SIMD_EXCEPT
+# define TinyBound v_u64 (0x3000000000000000) /* asuint64 (0x1p-255).  */
+# define Thresh v_u64 (0x1160000000000000)    /* RangeVal - TinyBound.  */
+#endif
+
+#define C(i) d->poly[i]
+
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t x, float64x2_t y, uint64x2_t odd, uint64x2_t cmp)
+{
+  y = vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), odd));
+  return v_call_f64 (sin, x, y, cmp);
+}
+
+/* Vector (AdvSIMD) sin approximation.
+   Maximum observed error in [-pi/2, pi/2], where argument is not reduced,
+   is 2.87 ULP:
+   _ZGVnN2v_sin (0x1.921d5c6a07142p+0) got 0x1.fffffffa7dc02p-1
+				      want 0x1.fffffffa7dc05p-1
+   Maximum observed error in the entire non-special domain ([-2^23, 2^23])
+   is 3.22 ULP:
+   _ZGVnN2v_sin (0x1.5702447b6f17bp+22) got 0x1.ffdcd125c84fbp-3
+				       want 0x1.ffdcd125c84f8p-3.  */
+float64x2_t VPCS_ATTR V_NAME_D1 (sin) (float64x2_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+  float64x2_t n, r, r2, r3, r4, y, t1, t2, t3;
+  uint64x2_t odd, cmp;
+
+#if WANT_SIMD_EXCEPT
+  /* Detect |x| <= TinyBound or |x| >= RangeVal. If fenv exceptions are to be
+     triggered correctly, set any special lanes to 1 (which is neutral w.r.t.
+     fenv). These lanes will be fixed by special-case handler later.  */
+  uint64x2_t ir = vreinterpretq_u64_f64 (vabsq_f64 (x));
+  cmp = vcgeq_u64 (vsubq_u64 (ir, TinyBound), Thresh);
+  r = vbslq_f64 (cmp, vreinterpretq_f64_u64 (cmp), x);
+#else
+  r = x;
+  cmp = vcageq_f64 (x, d->range_val);
+#endif
+
+  /* n = rint(|x|/pi).  */
+  n = vfmaq_f64 (d->shift, d->inv_pi, r);
+  odd = vshlq_n_u64 (vreinterpretq_u64_f64 (n), 63);
+  n = vsubq_f64 (n, d->shift);
+
+  /* r = |x| - n*pi  (range reduction into -pi/2 .. pi/2).  */
+  r = vfmsq_f64 (r, d->pi_1, n);
+  r = vfmsq_f64 (r, d->pi_2, n);
+  r = vfmsq_f64 (r, d->pi_3, n);
+
+  /* sin(r) poly approx.  */
+  r2 = vmulq_f64 (r, r);
+  r3 = vmulq_f64 (r2, r);
+  r4 = vmulq_f64 (r2, r2);
+
+  t1 = vfmaq_f64 (C (4), C (5), r2);
+  t2 = vfmaq_f64 (C (2), C (3), r2);
+  t3 = vfmaq_f64 (C (0), C (1), r2);
+
+  y = vfmaq_f64 (t1, C (6), r4);
+  y = vfmaq_f64 (t2, y, r4);
+  y = vfmaq_f64 (t3, y, r4);
+  y = vfmaq_f64 (r, y, r3);
+
+  if (unlikely (v_any_u64 (cmp)))
+    return special_case (x, y, odd, cmp);
+  return vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), odd));
+}
diff --git a/contrib/arm-optimized-routines/math/aarch64/v_sinf.c b/contrib/arm-optimized-routines/math/aarch64/v_sinf.c
new file mode 100644
index 000000000000..336879844459
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/aarch64/v_sinf.c
@@ -0,0 +1,82 @@
+/*
+ * Single-precision vector sin function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "mathlib.h"
+#include "v_math.h"
+
+static const struct data
+{
+  float32x4_t poly[4];
+  float32x4_t range_val, inv_pi, shift, pi_1, pi_2, pi_3;
+} data = {
+  /* 1.886 ulp error.  */
+  .poly = { V4 (-0x1.555548p-3f), V4 (0x1.110df4p-7f), V4 (-0x1.9f42eap-13f),
+	    V4 (0x1.5b2e76p-19f) },
+
+  .pi_1 = V4 (0x1.921fb6p+1f),
+  .pi_2 = V4 (-0x1.777a5cp-24f),
+  .pi_3 = V4 (-0x1.ee59dap-49f),
+
+  .inv_pi = V4 (0x1.45f306p-2f),
+  .shift = V4 (0x1.8p+23f),
+  .range_val = V4 (0x1p20f)
+};
+
+#if WANT_SIMD_EXCEPT
+# define TinyBound v_u32 (0x21000000) /* asuint32(0x1p-61f).  */
+# define Thresh v_u32 (0x28800000)    /* RangeVal - TinyBound.  */
+#endif
+
+#define C(i) d->poly[i]
+
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t x, float32x4_t y, uint32x4_t odd, uint32x4_t cmp)
+{
+  /* Fall back to scalar code.  */
+  y = vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), odd));
+  return v_call_f32 (sinf, x, y, cmp);
+}
+
+float32x4_t VPCS_ATTR V_NAME_F1 (sin) (float32x4_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+  float32x4_t n, r, r2, y;
+  uint32x4_t odd, cmp;
+
+#if WANT_SIMD_EXCEPT
+  uint32x4_t ir = vreinterpretq_u32_f32 (vabsq_f32 (x));
+  cmp = vcgeq_u32 (vsubq_u32 (ir, TinyBound), Thresh);
+  /* If fenv exceptions are to be triggered correctly, set any special lanes
+     to 1 (which is neutral w.r.t. fenv). These lanes will be fixed by
+     special-case handler later.  */
+  r = vbslq_f32 (cmp, vreinterpretq_f32_u32 (cmp), x);
+#else
+  r = x;
+  cmp = vcageq_f32 (x, d->range_val);
+#endif
+
+  /* n = rint(|x|/pi) */
+  n = vfmaq_f32 (d->shift, d->inv_pi, r);
+  odd = vshlq_n_u32 (vreinterpretq_u32_f32 (n), 31);
+  n = vsubq_f32 (n, d->shift);
+
+  /* r = |x| - n*pi  (range reduction into -pi/2 .. pi/2) */
+  r = vfmsq_f32 (r, d->pi_1, n);
+  r = vfmsq_f32 (r, d->pi_2, n);
+  r = vfmsq_f32 (r, d->pi_3, n);
+
+  /* y = sin(r) */
+  r2 = vmulq_f32 (r, r);
+  y = vfmaq_f32 (C (2), C (3), r2);
+  y = vfmaq_f32 (C (1), y, r2);
+  y = vfmaq_f32 (C (0), y, r2);
+  y = vfmaq_f32 (r, vmulq_f32 (y, r2), r);
+
+  if (unlikely (v_any_u32 (cmp)))
+    return special_case (x, y, odd, cmp);
+  return vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), odd));
+}
diff --git a/contrib/arm-optimized-routines/math/cosf.c b/contrib/arm-optimized-routines/math/cosf.c
index f29f19474e23..6293ce8f1b7d 100644
--- a/contrib/arm-optimized-routines/math/cosf.c
+++ b/contrib/arm-optimized-routines/math/cosf.c
@@ -1,8 +1,8 @@
 /*
  * Single-precision cos function.
  *
- * Copyright (c) 2018-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2018-2021, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <stdint.h>
@@ -22,7 +22,7 @@ cosf (float y)
   int n;
   const sincos_t *p = &__sincosf_table[0];
 
-  if (abstop12 (y) < abstop12 (pio4))
+  if (abstop12 (y) < abstop12 (pio4f))
     {
       double x2 = x * x;
 
diff --git a/contrib/arm-optimized-routines/math/erf.c b/contrib/arm-optimized-routines/math/erf.c
index 12d7e5160df7..5f9f40dda264 100644
--- a/contrib/arm-optimized-routines/math/erf.c
+++ b/contrib/arm-optimized-routines/math/erf.c
@@ -2,7 +2,7 @@
  * Double-precision erf(x) function.
  *
  * Copyright (c) 2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
diff --git a/contrib/arm-optimized-routines/math/erf_data.c b/contrib/arm-optimized-routines/math/erf_data.c
index 807875bdd7f5..10cf1fae93e0 100644
--- a/contrib/arm-optimized-routines/math/erf_data.c
+++ b/contrib/arm-optimized-routines/math/erf_data.c
@@ -2,7 +2,7 @@
  * Shared data between erf and erfc.
  *
  * Copyright (c) 2019-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
diff --git a/contrib/arm-optimized-routines/math/erff.c b/contrib/arm-optimized-routines/math/erff.c
index a58e82565dc3..9fa476dbbab2 100644
--- a/contrib/arm-optimized-routines/math/erff.c
+++ b/contrib/arm-optimized-routines/math/erff.c
@@ -2,7 +2,7 @@
  * Single-precision erf(x) function.
  *
  * Copyright (c) 2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <stdint.h>
diff --git a/contrib/arm-optimized-routines/math/erff_data.c b/contrib/arm-optimized-routines/math/erff_data.c
index fa6b1ef4dedb..f822788d0dd8 100644
--- a/contrib/arm-optimized-routines/math/erff_data.c
+++ b/contrib/arm-optimized-routines/math/erff_data.c
@@ -2,7 +2,7 @@
  * Data for approximation of erff.
  *
  * Copyright (c) 2019-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
diff --git a/contrib/arm-optimized-routines/math/exp.c b/contrib/arm-optimized-routines/math/exp.c
index 7f5024cd8792..1de500c31f3e 100644
--- a/contrib/arm-optimized-routines/math/exp.c
+++ b/contrib/arm-optimized-routines/math/exp.c
@@ -2,7 +2,7 @@
  * Double-precision e^x function.
  *
  * Copyright (c) 2018-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <float.h>
diff --git a/contrib/arm-optimized-routines/math/exp10.c b/contrib/arm-optimized-routines/math/exp10.c
new file mode 100644
index 000000000000..0fbec4c694ca
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/exp10.c
@@ -0,0 +1,129 @@
+/*
+ * Double-precision 10^x function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+#define N (1 << EXP_TABLE_BITS)
+#define IndexMask (N - 1)
+#define OFlowBound 0x1.34413509f79ffp8 /* log10(DBL_MAX).  */
+#define UFlowBound -0x1.5ep+8 /* -350.  */
+#define SmallTop 0x3c6 /* top12(0x1p-57).  */
+#define BigTop 0x407   /* top12(0x1p8).  */
+#define Thresh 0x41    /* BigTop - SmallTop.  */
+#define Shift __exp_data.shift
+#define C(i) __exp_data.exp10_poly[i]
+
+static double
+special_case (uint64_t sbits, double_t tmp, uint64_t ki)
+{
+  double_t scale, y;
+
+  if (ki - (1ull << 16) < 0x80000000)
+    {
+      /* The exponent of scale might have overflowed by 1.  */
+      sbits -= 1ull << 52;
+      scale = asdouble (sbits);
+      y = 2 * (scale + scale * tmp);
+      return check_oflow (eval_as_double (y));
+    }
+
+  /* n < 0, need special care in the subnormal range.  */
+  sbits += 1022ull << 52;
+  scale = asdouble (sbits);
+  y = scale + scale * tmp;
+
+  if (y < 1.0)
+    {
+      /* Round y to the right precision before scaling it into the subnormal
+	 range to avoid double rounding that can cause 0.5+E/2 ulp error where
+	 E is the worst-case ulp error outside the subnormal range.  So this
+	 is only useful if the goal is better than 1 ulp worst-case error.  */
+      double_t lo = scale - y + scale * tmp;
+      double_t hi = 1.0 + y;
+      lo = 1.0 - hi + y + lo;
+      y = eval_as_double (hi + lo) - 1.0;
+      /* Avoid -0.0 with downward rounding.  */
+      if (WANT_ROUNDING && y == 0.0)
+	y = 0.0;
+      /* The underflow exception needs to be signaled explicitly.  */
+      force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
+    }
+  y = 0x1p-1022 * y;
+
+  return check_uflow (y);
+}
+
+/* Double-precision 10^x approximation. Largest observed error is ~0.513 ULP.  */
+double
+exp10 (double x)
+{
+  uint64_t ix = asuint64 (x);
+  uint32_t abstop = (ix >> 52) & 0x7ff;
+
+  if (unlikely (abstop - SmallTop >= Thresh))
+    {
+      if (abstop - SmallTop >= 0x80000000)
+	/* Avoid spurious underflow for tiny x.
+	   Note: 0 is common input.  */
+	return x + 1;
+      if (abstop == 0x7ff)
+	return ix == asuint64 (-INFINITY) ? 0.0 : x + 1.0;
+      if (x >= OFlowBound)
+	return __math_oflow (0);
+      if (x < UFlowBound)
+	return __math_uflow (0);
+
+      /* Large x is special-cased below.  */
+      abstop = 0;
+    }
+
+  /* Reduce x: z = x * N / log10(2), k = round(z).  */
+  double_t z = __exp_data.invlog10_2N * x;
+  double_t kd;
+  int64_t ki;
+#if TOINT_INTRINSICS
+  kd = roundtoint (z);
+  ki = converttoint (z);
+#else
+  kd = eval_as_double (z + Shift);
+  kd -= Shift;
+  ki = kd;
+#endif
+
+  /* r = x - k * log10(2), r in [-0.5, 0.5].  */
+  double_t r = x;
+  r = __exp_data.neglog10_2hiN * kd + r;
+  r = __exp_data.neglog10_2loN * kd + r;
+
+  /* exp10(x) = 2^(k/N) * 2^(r/N).
+     Approximate the two components separately.  */
+
+  /* s = 2^(k/N), using lookup table.  */
+  uint64_t e = ki << (52 - EXP_TABLE_BITS);
+  uint64_t i = (ki & IndexMask) * 2;
+  uint64_t u = __exp_data.tab[i + 1];
+  uint64_t sbits = u + e;
+
+  double_t tail = asdouble (__exp_data.tab[i]);
+
+  /* 2^(r/N) ~= 1 + r * Poly(r).  */
+  double_t r2 = r * r;
+  double_t p = C (0) + r * C (1);
+  double_t y = C (2) + r * C (3);
+  y = y + r2 * C (4);
+  y = p + r2 * y;
+  y = tail + y * r;
+
+  if (unlikely (abstop == 0))
+    return special_case (sbits, y, ki);
+
+  /* Assemble components:
+     y  = 2^(r/N) * 2^(k/N)
+       ~= (y + 1) * s.  */
+  double_t s = asdouble (sbits);
+  return eval_as_double (s * y + s);
+}
diff --git a/contrib/arm-optimized-routines/math/exp2.c b/contrib/arm-optimized-routines/math/exp2.c
index 35ab39f22ed5..a1eee44f1f48 100644
--- a/contrib/arm-optimized-routines/math/exp2.c
+++ b/contrib/arm-optimized-routines/math/exp2.c
@@ -2,7 +2,7 @@
  * Double-precision 2^x function.
  *
  * Copyright (c) 2018-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <float.h>
diff --git a/contrib/arm-optimized-routines/math/exp2f.c b/contrib/arm-optimized-routines/math/exp2f.c
index 94b32538aa0d..776c3ddf7663 100644
--- a/contrib/arm-optimized-routines/math/exp2f.c
+++ b/contrib/arm-optimized-routines/math/exp2f.c
@@ -2,7 +2,7 @@
  * Single-precision 2^x function.
  *
  * Copyright (c) 2017-2018, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <math.h>
diff --git a/contrib/arm-optimized-routines/math/exp2f_data.c b/contrib/arm-optimized-routines/math/exp2f_data.c
index 3fb0ad11b15a..f0cb7fccacd1 100644
--- a/contrib/arm-optimized-routines/math/exp2f_data.c
+++ b/contrib/arm-optimized-routines/math/exp2f_data.c
@@ -2,7 +2,7 @@
  * Shared data between expf, exp2f and powf.
  *
  * Copyright (c) 2017-2018, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
diff --git a/contrib/arm-optimized-routines/math/exp_data.c b/contrib/arm-optimized-routines/math/exp_data.c
index cba76832566f..c20b1b2d3e06 100644
--- a/contrib/arm-optimized-routines/math/exp_data.c
+++ b/contrib/arm-optimized-routines/math/exp_data.c
@@ -1,8 +1,8 @@
 /*
  * Shared data between exp, exp2 and pow.
  *
- * Copyright (c) 2018, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2018-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
@@ -12,6 +12,7 @@
 const struct exp_data __exp_data = {
 // N/ln2
 .invln2N = 0x1.71547652b82fep0 * N,
+.invlog10_2N = 0x1.a934f0979a371p1 * N,
 // -ln2/N
 #if N == 64
 .negln2hiN = -0x1.62e42fefa0000p-7,
@@ -26,6 +27,8 @@ const struct exp_data __exp_data = {
 .negln2hiN = -0x1.62e42fef80000p-10,
 .negln2loN = -0x1.1cf79abc9e3b4p-45,
 #endif
+.neglog10_2hiN = -0x1.3441350ap-2 / N,
+.neglog10_2loN = 0x1.0c0219dc1da99p-39 / N,
 // Used for rounding when !TOINT_INTRINSICS
 #if EXP_USE_TOINT_NARROW
 .shift = 0x1800000000.8p0,
@@ -147,6 +150,24 @@ const struct exp_data __exp_data = {
 0x1.3b2ab786ee1dap-7,
 #endif
 },
+.exp10_poly = {
+#if EXP10_POLY_WIDE
+/* Range is wider if using shift-based reduction: coeffs generated
+   using Remez in [-log10(2)/128, log10(2)/128 ].  */
+0x1.26bb1bbb55515p1,
+0x1.53524c73cd32bp1,
+0x1.0470591e1a108p1,
+0x1.2bd77b12fe9a8p0,
+0x1.14289fef24b78p-1
+#else
+/* Coeffs generated using Remez in [-log10(2)/256, log10(2)/256 ].  */
+0x1.26bb1bbb55516p1,
+0x1.53524c73ce9fep1,
+0x1.0470591ce4b26p1,
+0x1.2bd76577fe684p0,
+0x1.1446eeccd0efbp-1
+#endif
+},
 // 2^(k/N) ~= H[k]*(1 + T[k]) for int k in [0,N)
 // tab[2*k] = asuint64(T[k])
 // tab[2*k+1] = asuint64(H[k]) - (k << 52)/N
diff --git a/contrib/arm-optimized-routines/math/expf.c b/contrib/arm-optimized-routines/math/expf.c
index 9b2f0c3d8c56..08a20d59e491 100644
--- a/contrib/arm-optimized-routines/math/expf.c
+++ b/contrib/arm-optimized-routines/math/expf.c
@@ -2,7 +2,7 @@
  * Single-precision e^x function.
  *
  * Copyright (c) 2017-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <math.h>
diff --git a/contrib/arm-optimized-routines/math/include/mathlib.h b/contrib/arm-optimized-routines/math/include/mathlib.h
index 279d829d8ea1..64cbb9c1f850 100644
--- a/contrib/arm-optimized-routines/math/include/mathlib.h
+++ b/contrib/arm-optimized-routines/math/include/mathlib.h
@@ -1,8 +1,8 @@
 /*
  * Public API.
  *
- * Copyright (c) 2015-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2015-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #ifndef _MATHLIB_H
@@ -18,74 +18,33 @@ float cosf (float);
 void sincosf (float, float*, float*);
 
 double exp (double);
+double exp10 (double);
 double exp2 (double);
 double log (double);
 double log2 (double);
 double pow (double, double);
 
-/* Scalar functions using the vector algorithm with identical result.  */
-float __s_sinf (float);
-float __s_cosf (float);
-float __s_expf (float);
-float __s_expf_1u (float);
-float __s_exp2f (float);
-float __s_exp2f_1u (float);
-float __s_logf (float);
-float __s_powf (float, float);
-double __s_sin (double);
-double __s_cos (double);
-double __s_exp (double);
-double __s_log (double);
-double __s_pow (double, double);
-
 #if __aarch64__
-#if __GNUC__ >= 5
+# if __GNUC__ >= 5
 typedef __Float32x4_t __f32x4_t;
 typedef __Float64x2_t __f64x2_t;
-#elif __clang_major__*100+__clang_minor__ >= 305
+# elif __clang_major__*100+__clang_minor__ >= 305
 typedef __attribute__((__neon_vector_type__(4))) float __f32x4_t;
 typedef __attribute__((__neon_vector_type__(2))) double __f64x2_t;
-#else
-#error Unsupported compiler
-#endif
-
-/* Vector functions following the base PCS.  */
-__f32x4_t __v_sinf (__f32x4_t);
-__f32x4_t __v_cosf (__f32x4_t);
-__f32x4_t __v_expf (__f32x4_t);
-__f32x4_t __v_expf_1u (__f32x4_t);
-__f32x4_t __v_exp2f (__f32x4_t);
-__f32x4_t __v_exp2f_1u (__f32x4_t);
-__f32x4_t __v_logf (__f32x4_t);
-__f32x4_t __v_powf (__f32x4_t, __f32x4_t);
-__f64x2_t __v_sin (__f64x2_t);
-__f64x2_t __v_cos (__f64x2_t);
-__f64x2_t __v_exp (__f64x2_t);
-__f64x2_t __v_log (__f64x2_t);
-__f64x2_t __v_pow (__f64x2_t, __f64x2_t);
+# else
+#  error Unsupported compiler
+# endif
 
-#if __GNUC__ >= 9 || __clang_major__ >= 8
-#define __vpcs __attribute__((__aarch64_vector_pcs__))
-
-/* Vector functions following the vector PCS.  */
-__vpcs __f32x4_t __vn_sinf (__f32x4_t);
-__vpcs __f32x4_t __vn_cosf (__f32x4_t);
-__vpcs __f32x4_t __vn_expf (__f32x4_t);
-__vpcs __f32x4_t __vn_expf_1u (__f32x4_t);
-__vpcs __f32x4_t __vn_exp2f (__f32x4_t);
-__vpcs __f32x4_t __vn_exp2f_1u (__f32x4_t);
-__vpcs __f32x4_t __vn_logf (__f32x4_t);
-__vpcs __f32x4_t __vn_powf (__f32x4_t, __f32x4_t);
-__vpcs __f64x2_t __vn_sin (__f64x2_t);
-__vpcs __f64x2_t __vn_cos (__f64x2_t);
-__vpcs __f64x2_t __vn_exp (__f64x2_t);
-__vpcs __f64x2_t __vn_log (__f64x2_t);
-__vpcs __f64x2_t __vn_pow (__f64x2_t, __f64x2_t);
+# if __GNUC__ >= 9 || __clang_major__ >= 8
+#  undef __vpcs
+#  define __vpcs __attribute__((__aarch64_vector_pcs__))
 
 /* Vector functions following the vector PCS using ABI names.  */
 __vpcs __f32x4_t _ZGVnN4v_sinf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_cosf (__f32x4_t);
+__vpcs __f32x4_t _ZGVnN4v_expf_1u (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_expf (__f32x4_t);
+__vpcs __f32x4_t _ZGVnN4v_exp2f_1u (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_exp2f (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_logf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4vv_powf (__f32x4_t, __f32x4_t);
@@ -94,7 +53,7 @@ __vpcs __f64x2_t _ZGVnN2v_cos (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_exp (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_log (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2vv_pow (__f64x2_t, __f64x2_t);
-#endif
+# endif
 #endif
 
 #endif
diff --git a/contrib/arm-optimized-routines/math/log.c b/contrib/arm-optimized-routines/math/log.c
index d3b7bc60747c..43dfc2a744f0 100644
--- a/contrib/arm-optimized-routines/math/log.c
+++ b/contrib/arm-optimized-routines/math/log.c
@@ -2,7 +2,7 @@
  * Double-precision log(x) function.
  *
  * Copyright (c) 2018-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <float.h>
diff --git a/contrib/arm-optimized-routines/math/log2.c b/contrib/arm-optimized-routines/math/log2.c
index 55102b772969..3f9c21b03962 100644
--- a/contrib/arm-optimized-routines/math/log2.c
+++ b/contrib/arm-optimized-routines/math/log2.c
@@ -2,7 +2,7 @@
  * Double-precision log2(x) function.
  *
  * Copyright (c) 2018-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <float.h>
diff --git a/contrib/arm-optimized-routines/math/log2_data.c b/contrib/arm-optimized-routines/math/log2_data.c
index 3fc9b47c1f03..293bd7df4118 100644
--- a/contrib/arm-optimized-routines/math/log2_data.c
+++ b/contrib/arm-optimized-routines/math/log2_data.c
@@ -2,7 +2,7 @@
  * Data for log2.
  *
  * Copyright (c) 2018, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
diff --git a/contrib/arm-optimized-routines/math/log2f.c b/contrib/arm-optimized-routines/math/log2f.c
index acb629e6846c..0a44fa2024f6 100644
--- a/contrib/arm-optimized-routines/math/log2f.c
+++ b/contrib/arm-optimized-routines/math/log2f.c
@@ -2,7 +2,7 @@
  * Single-precision log2 function.
  *
  * Copyright (c) 2017-2018, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <math.h>
diff --git a/contrib/arm-optimized-routines/math/log2f_data.c b/contrib/arm-optimized-routines/math/log2f_data.c
index f3546d730aba..4866ef7f8171 100644
--- a/contrib/arm-optimized-routines/math/log2f_data.c
+++ b/contrib/arm-optimized-routines/math/log2f_data.c
@@ -2,7 +2,7 @@
  * Data definition for log2f.
  *
  * Copyright (c) 2017-2018, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
diff --git a/contrib/arm-optimized-routines/math/log_data.c b/contrib/arm-optimized-routines/math/log_data.c
index 96a098d42c16..3ecc1f40a822 100644
--- a/contrib/arm-optimized-routines/math/log_data.c
+++ b/contrib/arm-optimized-routines/math/log_data.c
@@ -2,7 +2,7 @@
  * Data for log.
  *
  * Copyright (c) 2018, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
diff --git a/contrib/arm-optimized-routines/math/logf.c b/contrib/arm-optimized-routines/math/logf.c
index cfbaee12df10..820f74c3e66a 100644
--- a/contrib/arm-optimized-routines/math/logf.c
+++ b/contrib/arm-optimized-routines/math/logf.c
@@ -1,8 +1,8 @@
 /*
  * Single-precision log function.
  *
- * Copyright (c) 2017-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2017-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <math.h>
@@ -57,7 +57,7 @@ logf (float x)
   tmp = ix - OFF;
   i = (tmp >> (23 - LOGF_TABLE_BITS)) % N;
   k = (int32_t) tmp >> 23; /* arithmetic shift */
-  iz = ix - (tmp & 0x1ff << 23);
+  iz = ix - (tmp & 0xff800000);
   invc = T[i].invc;
   logc = T[i].logc;
   z = (double_t) asfloat (iz);
diff --git a/contrib/arm-optimized-routines/math/logf_data.c b/contrib/arm-optimized-routines/math/logf_data.c
index e8973ce4fedc..04247684755f 100644
--- a/contrib/arm-optimized-routines/math/logf_data.c
+++ b/contrib/arm-optimized-routines/math/logf_data.c
@@ -2,7 +2,7 @@
  * Data definition for logf.
  *
  * Copyright (c) 2017-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
diff --git a/contrib/arm-optimized-routines/math/math_config.h b/contrib/arm-optimized-routines/math/math_config.h
index e85104337048..faf77b31fc99 100644
--- a/contrib/arm-optimized-routines/math/math_config.h
+++ b/contrib/arm-optimized-routines/math/math_config.h
@@ -1,8 +1,8 @@
 /*
  * Configuration for math routines.
  *
- * Copyright (c) 2017-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2017-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #ifndef _MATH_CONFIG_H
@@ -92,6 +92,46 @@
 # define unlikely(x) (x)
 #endif
 
+/* Return ptr but hide its value from the compiler so accesses through it
+   cannot be optimized based on the contents.  */
+#define ptr_barrier(ptr)                                                      \
+  ({                                                                          \
+    __typeof (ptr) __ptr = (ptr);                                             \
+    __asm("" : "+r"(__ptr));                                                  \
+    __ptr;                                                                    \
+  })
+
+/* Symbol renames to avoid libc conflicts.  */
+#define __math_oflowf arm_math_oflowf
+#define __math_uflowf arm_math_uflowf
+#define __math_may_uflowf arm_math_may_uflowf
+#define __math_divzerof arm_math_divzerof
+#define __math_oflow arm_math_oflow
+#define __math_uflow arm_math_uflow
+#define __math_may_uflow arm_math_may_uflow
+#define __math_divzero arm_math_divzero
+#define __math_invalidf arm_math_invalidf
+#define __math_invalid arm_math_invalid
+#define __math_check_oflow arm_math_check_oflow
+#define __math_check_uflow arm_math_check_uflow
+#define __math_check_oflowf arm_math_check_oflowf
+#define __math_check_uflowf arm_math_check_uflowf
+
+#define __sincosf_table arm_math_sincosf_table
+#define __inv_pio4 arm_math_inv_pio4
+#define __exp2f_data arm_math_exp2f_data
+#define __logf_data arm_math_logf_data
+#define __log2f_data arm_math_log2f_data
+#define __powf_log2_data arm_math_powf_log2_data
+#define __exp_data arm_math_exp_data
+#define __log_data arm_math_log_data
+#define __log2_data arm_math_log2_data
+#define __pow_log_data arm_math_pow_log_data
+#define __erff_data arm_math_erff_data
+#define __erf_data arm_math_erf_data
+#define __v_exp_data arm_math_v_exp_data
+#define __v_log_data arm_math_v_log_data
+
 #if HAVE_FAST_ROUND
 /* When set, the roundtoint and converttoint functions are provided with
    the semantics documented below.  */
@@ -381,15 +421,22 @@ extern const struct powf_log2_data
 #define EXP_USE_TOINT_NARROW 0
 #define EXP2_POLY_ORDER 5
 #define EXP2_POLY_WIDE 0
+/* Wider exp10 polynomial necessary for good precision in non-nearest rounding
+   and !TOINT_INTRINSICS.  */
+#define EXP10_POLY_WIDE 0
 extern const struct exp_data
 {
   double invln2N;
+  double invlog10_2N;
   double shift;
   double negln2hiN;
   double negln2loN;
+  double neglog10_2hiN;
+  double neglog10_2loN;
   double poly[4]; /* Last four coefficients.  */
   double exp2_shift;
   double exp2_poly[EXP2_POLY_ORDER];
+  double exp10_poly[5];
   uint64_t tab[2*(1 << EXP_TABLE_BITS)];
 } __exp_data HIDDEN;
 
@@ -459,4 +506,16 @@ extern const struct erf_data
   double erfc_poly_F[ERFC_POLY_F_NCOEFFS];
 } __erf_data HIDDEN;
 
+#define V_EXP_TABLE_BITS 7
+extern const uint64_t __v_exp_data[1 << V_EXP_TABLE_BITS] HIDDEN;
+
+#define V_LOG_TABLE_BITS 7
+extern const struct v_log_data
+{
+  struct
+  {
+    double invc, logc;
+  } table[1 << V_LOG_TABLE_BITS];
+} __v_log_data HIDDEN;
+
 #endif
diff --git a/contrib/arm-optimized-routines/math/math_err.c b/contrib/arm-optimized-routines/math/math_err.c
index 1bf9538a1ab1..cfe072809cf4 100644
--- a/contrib/arm-optimized-routines/math/math_err.c
+++ b/contrib/arm-optimized-routines/math/math_err.c
@@ -2,7 +2,7 @@
  * Double-precision math error handling.
  *
  * Copyright (c) 2018, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
diff --git a/contrib/arm-optimized-routines/math/math_errf.c b/contrib/arm-optimized-routines/math/math_errf.c
index d5350b819ab1..4233918b1eae 100644
--- a/contrib/arm-optimized-routines/math/math_errf.c
+++ b/contrib/arm-optimized-routines/math/math_errf.c
@@ -2,7 +2,7 @@
  * Single-precision math error handling.
  *
  * Copyright (c) 2017-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
diff --git a/contrib/arm-optimized-routines/math/pow.c b/contrib/arm-optimized-routines/math/pow.c
index 86842c6abacd..af719fe5ab10 100644
--- a/contrib/arm-optimized-routines/math/pow.c
+++ b/contrib/arm-optimized-routines/math/pow.c
@@ -2,7 +2,7 @@
  * Double-precision x^y function.
  *
  * Copyright (c) 2018-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <float.h>
diff --git a/contrib/arm-optimized-routines/math/pow_log_data.c b/contrib/arm-optimized-routines/math/pow_log_data.c
index 45569c5cc064..2a4c250d85c3 100644
--- a/contrib/arm-optimized-routines/math/pow_log_data.c
+++ b/contrib/arm-optimized-routines/math/pow_log_data.c
@@ -2,7 +2,7 @@
  * Data for the log part of pow.
  *
  * Copyright (c) 2018, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
diff --git a/contrib/arm-optimized-routines/math/powf.c b/contrib/arm-optimized-routines/math/powf.c
index 6ba45d3852a5..05c80bb2eb67 100644
--- a/contrib/arm-optimized-routines/math/powf.c
+++ b/contrib/arm-optimized-routines/math/powf.c
@@ -2,7 +2,7 @@
  * Single-precision pow function.
  *
  * Copyright (c) 2017-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <math.h>
diff --git a/contrib/arm-optimized-routines/math/powf_log2_data.c b/contrib/arm-optimized-routines/math/powf_log2_data.c
index 97e0d98cdbab..243836a549fd 100644
--- a/contrib/arm-optimized-routines/math/powf_log2_data.c
+++ b/contrib/arm-optimized-routines/math/powf_log2_data.c
@@ -2,7 +2,7 @@
  * Data definition for powf.
  *
  * Copyright (c) 2017-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "math_config.h"
diff --git a/contrib/arm-optimized-routines/math/s_cos.c b/contrib/arm-optimized-routines/math/s_cos.c
deleted file mode 100644
index 53a95b0adfde..000000000000
--- a/contrib/arm-optimized-routines/math/s_cos.c
+++ /dev/null
@@ -1,6 +0,0 @@
-/*
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-#define SCALAR 1
-#include "v_cos.c"
diff --git a/contrib/arm-optimized-routines/math/s_cosf.c b/contrib/arm-optimized-routines/math/s_cosf.c
deleted file mode 100644
index 914c02eba651..000000000000
--- a/contrib/arm-optimized-routines/math/s_cosf.c
+++ /dev/null
@@ -1,6 +0,0 @@
-/*
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-#define SCALAR 1
-#include "v_cosf.c"
diff --git a/contrib/arm-optimized-routines/math/s_exp.c b/contrib/arm-optimized-routines/math/s_exp.c
deleted file mode 100644
index ac7246b2c100..000000000000
--- a/contrib/arm-optimized-routines/math/s_exp.c
+++ /dev/null
@@ -1,6 +0,0 @@
-/*
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-#define SCALAR 1
-#include "v_exp.c"
diff --git a/contrib/arm-optimized-routines/math/s_exp2f.c b/contrib/arm-optimized-routines/math/s_exp2f.c
deleted file mode 100644
index df7dfd680ff4..000000000000
--- a/contrib/arm-optimized-routines/math/s_exp2f.c
+++ /dev/null
@@ -1,6 +0,0 @@
-/*
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-#define SCALAR 1
-#include "v_exp2f.c"
diff --git a/contrib/arm-optimized-routines/math/s_exp2f_1u.c b/contrib/arm-optimized-routines/math/s_exp2f_1u.c
deleted file mode 100644
index 5e3852b41d83..000000000000
--- a/contrib/arm-optimized-routines/math/s_exp2f_1u.c
+++ /dev/null
@@ -1,6 +0,0 @@
-/*
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-#define SCALAR 1
-#include "v_exp2f_1u.c"
diff --git a/contrib/arm-optimized-routines/math/s_expf.c b/contrib/arm-optimized-routines/math/s_expf.c
deleted file mode 100644
index 3492c460733d..000000000000
--- a/contrib/arm-optimized-routines/math/s_expf.c
+++ /dev/null
@@ -1,6 +0,0 @@
-/*
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-#define SCALAR 1
-#include "v_expf.c"
diff --git a/contrib/arm-optimized-routines/math/s_expf_1u.c b/contrib/arm-optimized-routines/math/s_expf_1u.c
deleted file mode 100644
index eb7bbcba5566..000000000000
--- a/contrib/arm-optimized-routines/math/s_expf_1u.c
+++ /dev/null
@@ -1,6 +0,0 @@
-/*
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-#define SCALAR 1
-#include "v_expf_1u.c"
diff --git a/contrib/arm-optimized-routines/math/s_log.c b/contrib/arm-optimized-routines/math/s_log.c
deleted file mode 100644
index 23289cf948ec..000000000000
--- a/contrib/arm-optimized-routines/math/s_log.c
+++ /dev/null
@@ -1,6 +0,0 @@
-/*
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-#define SCALAR 1
-#include "v_log.c"
diff --git a/contrib/arm-optimized-routines/math/s_logf.c b/contrib/arm-optimized-routines/math/s_logf.c
deleted file mode 100644
index 9399350fc1ee..000000000000
--- a/contrib/arm-optimized-routines/math/s_logf.c
+++ /dev/null
@@ -1,6 +0,0 @@
-/*
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-#define SCALAR 1
-#include "v_logf.c"
diff --git a/contrib/arm-optimized-routines/math/s_pow.c b/contrib/arm-optimized-routines/math/s_pow.c
deleted file mode 100644
index 2e34c9f896d6..000000000000
--- a/contrib/arm-optimized-routines/math/s_pow.c
+++ /dev/null
@@ -1,6 +0,0 @@
-/*
- * Copyright (c) 2020, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-#define SCALAR 1
-#include "v_pow.c"
diff --git a/contrib/arm-optimized-routines/math/s_powf.c b/contrib/arm-optimized-routines/math/s_powf.c
deleted file mode 100644
index 6d91a4a72b37..000000000000
--- a/contrib/arm-optimized-routines/math/s_powf.c
+++ /dev/null
@@ -1,6 +0,0 @@
-/*
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-#define SCALAR 1
-#include "v_powf.c"
diff --git a/contrib/arm-optimized-routines/math/s_sin.c b/contrib/arm-optimized-routines/math/s_sin.c
deleted file mode 100644
index 06982c2018c6..000000000000
--- a/contrib/arm-optimized-routines/math/s_sin.c
+++ /dev/null
@@ -1,6 +0,0 @@
-/*
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-#define SCALAR 1
-#include "v_sin.c"
diff --git a/contrib/arm-optimized-routines/math/s_sinf.c b/contrib/arm-optimized-routines/math/s_sinf.c
deleted file mode 100644
index 68ca90853736..000000000000
--- a/contrib/arm-optimized-routines/math/s_sinf.c
+++ /dev/null
@@ -1,6 +0,0 @@
-/*
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-#define SCALAR 1
-#include "v_sinf.c"
diff --git a/contrib/arm-optimized-routines/math/sincosf.c b/contrib/arm-optimized-routines/math/sincosf.c
index 9746f1c22e6c..446f21d60faf 100644
--- a/contrib/arm-optimized-routines/math/sincosf.c
+++ b/contrib/arm-optimized-routines/math/sincosf.c
@@ -1,8 +1,8 @@
 /*
  * Single-precision sin/cos function.
  *
- * Copyright (c) 2018-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2018-2021, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <stdint.h>
@@ -22,7 +22,7 @@ sincosf (float y, float *sinp, float *cosp)
   int n;
   const sincos_t *p = &__sincosf_table[0];
 
-  if (abstop12 (y) < abstop12 (pio4))
+  if (abstop12 (y) < abstop12 (pio4f))
     {
       double x2 = x * x;
 
diff --git a/contrib/arm-optimized-routines/math/sincosf.h b/contrib/arm-optimized-routines/math/sincosf.h
index 1e80fc9ba8e1..ec23ed7aeb26 100644
--- a/contrib/arm-optimized-routines/math/sincosf.h
+++ b/contrib/arm-optimized-routines/math/sincosf.h
@@ -1,8 +1,8 @@
 /*
  * Header for sinf, cosf and sincosf.
  *
- * Copyright (c) 2018, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2018-2021, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <stdint.h>
@@ -12,7 +12,7 @@
 /* 2PI * 2^-64.  */
 static const double pi63 = 0x1.921FB54442D18p-62;
 /* PI / 4.  */
-static const double pio4 = 0x1.921FB54442D18p-1;
+static const float pio4f = 0x1.921FB6p-1f;
 
 /* The constants and polynomials for sine and cosine.  */
 typedef struct
diff --git a/contrib/arm-optimized-routines/math/sincosf_data.c b/contrib/arm-optimized-routines/math/sincosf_data.c
index ab4ac4710fef..22525290ab08 100644
--- a/contrib/arm-optimized-routines/math/sincosf_data.c
+++ b/contrib/arm-optimized-routines/math/sincosf_data.c
@@ -2,7 +2,7 @@
  * Data definition for sinf, cosf and sincosf.
  *
  * Copyright (c) 2018-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <stdint.h>
diff --git a/contrib/arm-optimized-routines/math/sinf.c b/contrib/arm-optimized-routines/math/sinf.c
index ddbc1daf74a9..8dd8ae458794 100644
--- a/contrib/arm-optimized-routines/math/sinf.c
+++ b/contrib/arm-optimized-routines/math/sinf.c
@@ -1,8 +1,8 @@
 /*
  * Single-precision sin function.
  *
- * Copyright (c) 2018-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2018-2021, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <math.h>
@@ -21,7 +21,7 @@ sinf (float y)
   int n;
   const sincos_t *p = &__sincosf_table[0];
 
-  if (abstop12 (y) < abstop12 (pio4))
+  if (abstop12 (y) < abstop12 (pio4f))
     {
       s = x * x;
 
diff --git a/contrib/arm-optimized-routines/math/test/mathbench.c b/contrib/arm-optimized-routines/math/test/mathbench.c
index 0c17826e5296..ed7e89bb7710 100644
--- a/contrib/arm-optimized-routines/math/test/mathbench.c
+++ b/contrib/arm-optimized-routines/math/test/mathbench.c
@@ -1,8 +1,8 @@
 /*
  * Microbenchmark for math functions.
  *
- * Copyright (c) 2018-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2018-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #undef _GNU_SOURCE
@@ -15,11 +15,6 @@
 #include <math.h>
 #include "mathlib.h"
 
-#ifndef WANT_VMATH
-/* Enable the build of vector math code.  */
-# define WANT_VMATH 1
-#endif
-
 /* Number of measurements, best result is reported.  */
 #define MEASURE 60
 /* Array size.  */
@@ -34,8 +29,9 @@ static float Af[N];
 static long measurecount = MEASURE;
 static long itercount = ITER;
 
-#if __aarch64__ && WANT_VMATH
-typedef __f64x2_t v_double;
+#ifdef __vpcs
+#include <arm_neon.h>
+typedef float64x2_t v_double;
 
 #define v_double_len() 2
 
@@ -51,7 +47,7 @@ v_double_dup (double x)
   return (v_double){x, x};
 }
 
-typedef __f32x4_t v_float;
+typedef float32x4_t v_float;
 
 #define v_float_len() 4
 
@@ -76,141 +72,91 @@ typedef float v_float;
 #define v_float_len(x) 1
 #define v_float_load(x) (x)[0]
 #define v_float_dup(x) (x)
-#endif
-
-static double
-dummy (double x)
-{
-  return x;
-}
-
-static float
-dummyf (float x)
-{
-  return x;
-}
-
-#if WANT_VMATH
-#if __aarch64__
-static v_double
-__v_dummy (v_double x)
-{
-  return x;
-}
 
-static v_float
-__v_dummyf (v_float x)
-{
-  return x;
-}
-
-#ifdef __vpcs
-__vpcs static v_double
-__vn_dummy (v_double x)
-{
-  return x;
-}
+#endif
 
-__vpcs static v_float
-__vn_dummyf (v_float x)
-{
-  return x;
-}
+#if WANT_SVE_MATH
+#include <arm_sve.h>
+typedef svbool_t sv_bool;
+typedef svfloat64_t sv_double;
 
-__vpcs static v_float
-xy__vn_powf (v_float x)
-{
-  return __vn_powf (x, x);
-}
+#define sv_double_len() svcntd()
 
-__vpcs static v_float
-xy_Z_powf (v_float x)
+static inline sv_double
+sv_double_load (const double *p)
 {
-  return _ZGVnN4vv_powf (x, x);
+  svbool_t pg = svptrue_b64();
+  return svld1(pg, p);
 }
 
-__vpcs static v_double
-xy__vn_pow (v_double x)
+static inline sv_double
+sv_double_dup (double x)
 {
-  return __vn_pow (x, x);
+  return svdup_n_f64(x);
 }
 
-__vpcs static v_double
-xy_Z_pow (v_double x)
-{
-  return _ZGVnN2vv_pow (x, x);
-}
-#endif
+typedef svfloat32_t sv_float;
 
-static v_float
-xy__v_powf (v_float x)
-{
-  return __v_powf (x, x);
-}
+#define sv_float_len() svcntw()
 
-static v_double
-xy__v_pow (v_double x)
+static inline sv_float
+sv_float_load (const float *p)
 {
-  return __v_pow (x, x);
+  svbool_t pg = svptrue_b32();
+  return svld1(pg, p);
 }
-#endif
 
-static float
-xy__s_powf (float x)
+static inline sv_float
+sv_float_dup (float x)
 {
-  return __s_powf (x, x);
-}
-
-static double
-xy__s_pow (double x)
-{
-  return __s_pow (x, x);
+  return svdup_n_f32(x);
 }
+#else
+/* dummy definitions to make things compile.  */
+#define sv_double_len(x) 1
+#define sv_float_len(x) 1
 #endif
 
 static double
-xypow (double x)
+dummy (double x)
 {
-  return pow (x, x);
+  return x;
 }
 
 static float
-xypowf (float x)
+dummyf (float x)
 {
-  return powf (x, x);
+  return x;
 }
-
-static double
-xpow (double x)
+#ifdef __vpcs
+__vpcs static v_double
+__vn_dummy (v_double x)
 {
-  return pow (x, 23.4);
+  return x;
 }
 
-static float
-xpowf (float x)
+__vpcs static v_float
+__vn_dummyf (v_float x)
 {
-  return powf (x, 23.4f);
+  return x;
 }
-
-static double
-ypow (double x)
+#endif
+#if WANT_SVE_MATH
+static sv_double
+__sv_dummy (sv_double x, sv_bool pg)
 {
-  return pow (2.34, x);
+  return x;
 }
 
-static float
-ypowf (float x)
+static sv_float
+__sv_dummyf (sv_float x, sv_bool pg)
 {
-  return powf (2.34f, x);
+  return x;
 }
 
-static float
-sincosf_wrap (float x)
-{
-  float s, c;
-  sincosf (x, &s, &c);
-  return s + c;
-}
+#endif
+
+#include "test/mathbench_wrappers.h"
 
 static const struct fun
 {
@@ -223,127 +169,40 @@ static const struct fun
   {
     double (*d) (double);
     float (*f) (float);
-    v_double (*vd) (v_double);
-    v_float (*vf) (v_float);
 #ifdef __vpcs
     __vpcs v_double (*vnd) (v_double);
     __vpcs v_float (*vnf) (v_float);
 #endif
+#if WANT_SVE_MATH
+    sv_double (*svd) (sv_double, sv_bool);
+    sv_float (*svf) (sv_float, sv_bool);
+#endif
   } fun;
 } funtab[] = {
 #define D(func, lo, hi) {#func, 'd', 0, lo, hi, {.d = func}},
 #define F(func, lo, hi) {#func, 'f', 0, lo, hi, {.f = func}},
-#define VD(func, lo, hi) {#func, 'd', 'v', lo, hi, {.vd = func}},
-#define VF(func, lo, hi) {#func, 'f', 'v', lo, hi, {.vf = func}},
 #define VND(func, lo, hi) {#func, 'd', 'n', lo, hi, {.vnd = func}},
 #define VNF(func, lo, hi) {#func, 'f', 'n', lo, hi, {.vnf = func}},
+#define SVD(func, lo, hi) {#func, 'd', 's', lo, hi, {.svd = func}},
+#define SVF(func, lo, hi) {#func, 'f', 's', lo, hi, {.svf = func}},
 D (dummy, 1.0, 2.0)
-D (exp, -9.9, 9.9)
-D (exp, 0.5, 1.0)
-D (exp2, -9.9, 9.9)
-D (log, 0.01, 11.1)
-D (log, 0.999, 1.001)
-D (log2, 0.01, 11.1)
-D (log2, 0.999, 1.001)
-{"pow", 'd', 0, 0.01, 11.1, {.d = xypow}},
-D (xpow, 0.01, 11.1)
-D (ypow, -9.9, 9.9)
-D (erf, -6.0, 6.0)
-
 F (dummyf, 1.0, 2.0)
-F (expf, -9.9, 9.9)
-F (exp2f, -9.9, 9.9)
-F (logf, 0.01, 11.1)
-F (log2f, 0.01, 11.1)
-{"powf", 'f', 0, 0.01, 11.1, {.f = xypowf}},
-F (xpowf, 0.01, 11.1)
-F (ypowf, -9.9, 9.9)
-{"sincosf", 'f', 0, 0.1, 0.7, {.f = sincosf_wrap}},
-{"sincosf", 'f', 0, 0.8, 3.1, {.f = sincosf_wrap}},
-{"sincosf", 'f', 0, -3.1, 3.1, {.f = sincosf_wrap}},
-{"sincosf", 'f', 0, 3.3, 33.3, {.f = sincosf_wrap}},
-{"sincosf", 'f', 0, 100, 1000, {.f = sincosf_wrap}},
-{"sincosf", 'f', 0, 1e6, 1e32, {.f = sincosf_wrap}},
-F (sinf, 0.1, 0.7)
-F (sinf, 0.8, 3.1)
-F (sinf, -3.1, 3.1)
-F (sinf, 3.3, 33.3)
-F (sinf, 100, 1000)
-F (sinf, 1e6, 1e32)
-F (cosf, 0.1, 0.7)
-F (cosf, 0.8, 3.1)
-F (cosf, -3.1, 3.1)
-F (cosf, 3.3, 33.3)
-F (cosf, 100, 1000)
-F (cosf, 1e6, 1e32)
-F (erff, -4.0, 4.0)
-#if WANT_VMATH
-D (__s_sin, -3.1, 3.1)
-D (__s_cos, -3.1, 3.1)
-D (__s_exp, -9.9, 9.9)
-D (__s_log, 0.01, 11.1)
-{"__s_pow", 'd', 0, 0.01, 11.1, {.d = xy__s_pow}},
-F (__s_expf, -9.9, 9.9)
-F (__s_expf_1u, -9.9, 9.9)
-F (__s_exp2f, -9.9, 9.9)
-F (__s_exp2f_1u, -9.9, 9.9)
-F (__s_logf, 0.01, 11.1)
-{"__s_powf", 'f', 0, 0.01, 11.1, {.f = xy__s_powf}},
-F (__s_sinf, -3.1, 3.1)
-F (__s_cosf, -3.1, 3.1)
-#if __aarch64__
-VD (__v_dummy, 1.0, 2.0)
-VD (__v_sin, -3.1, 3.1)
-VD (__v_cos, -3.1, 3.1)
-VD (__v_exp, -9.9, 9.9)
-VD (__v_log, 0.01, 11.1)
-{"__v_pow", 'd', 'v', 0.01, 11.1, {.vd = xy__v_pow}},
-VF (__v_dummyf, 1.0, 2.0)
-VF (__v_expf, -9.9, 9.9)
-VF (__v_expf_1u, -9.9, 9.9)
-VF (__v_exp2f, -9.9, 9.9)
-VF (__v_exp2f_1u, -9.9, 9.9)
-VF (__v_logf, 0.01, 11.1)
-{"__v_powf", 'f', 'v', 0.01, 11.1, {.vf = xy__v_powf}},
-VF (__v_sinf, -3.1, 3.1)
-VF (__v_cosf, -3.1, 3.1)
 #ifdef __vpcs
 VND (__vn_dummy, 1.0, 2.0)
-VND (__vn_exp, -9.9, 9.9)
-VND (_ZGVnN2v_exp, -9.9, 9.9)
-VND (__vn_log, 0.01, 11.1)
-VND (_ZGVnN2v_log, 0.01, 11.1)
-{"__vn_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy__vn_pow}},
-{"_ZGVnN2vv_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy_Z_pow}},
-VND (__vn_sin, -3.1, 3.1)
-VND (_ZGVnN2v_sin, -3.1, 3.1)
-VND (__vn_cos, -3.1, 3.1)
-VND (_ZGVnN2v_cos, -3.1, 3.1)
 VNF (__vn_dummyf, 1.0, 2.0)
-VNF (__vn_expf, -9.9, 9.9)
-VNF (_ZGVnN4v_expf, -9.9, 9.9)
-VNF (__vn_expf_1u, -9.9, 9.9)
-VNF (__vn_exp2f, -9.9, 9.9)
-VNF (_ZGVnN4v_exp2f, -9.9, 9.9)
-VNF (__vn_exp2f_1u, -9.9, 9.9)
-VNF (__vn_logf, 0.01, 11.1)
-VNF (_ZGVnN4v_logf, 0.01, 11.1)
-{"__vn_powf", 'f', 'n', 0.01, 11.1, {.vnf = xy__vn_powf}},
-{"_ZGVnN4vv_powf", 'f', 'n', 0.01, 11.1, {.vnf = xy_Z_powf}},
-VNF (__vn_sinf, -3.1, 3.1)
-VNF (_ZGVnN4v_sinf, -3.1, 3.1)
-VNF (__vn_cosf, -3.1, 3.1)
-VNF (_ZGVnN4v_cosf, -3.1, 3.1)
-#endif
 #endif
+#if WANT_SVE_MATH
+SVD (__sv_dummy, 1.0, 2.0)
+SVF (__sv_dummyf, 1.0, 2.0)
 #endif
+#include "test/mathbench_funcs.h"
 {0},
 #undef F
 #undef D
-#undef VF
-#undef VD
 #undef VNF
 #undef VND
+#undef SVF
+#undef SVD
 };
 
 static void
@@ -442,69 +301,75 @@ runf_latency (float f (float))
     prev = f (Af[i] + prev * z);
 }
 
+#ifdef __vpcs
 static void
-run_v_thruput (v_double f (v_double))
+run_vn_thruput (__vpcs v_double f (v_double))
 {
   for (int i = 0; i < N; i += v_double_len ())
     f (v_double_load (A+i));
 }
 
 static void
-runf_v_thruput (v_float f (v_float))
+runf_vn_thruput (__vpcs v_float f (v_float))
 {
   for (int i = 0; i < N; i += v_float_len ())
     f (v_float_load (Af+i));
 }
 
 static void
-run_v_latency (v_double f (v_double))
+run_vn_latency (__vpcs v_double f (v_double))
 {
-  v_double z = v_double_dup (zero);
-  v_double prev = z;
+  volatile uint64x2_t vsel = (uint64x2_t) { 0, 0 };
+  uint64x2_t sel = vsel;
+  v_double prev = v_double_dup (0);
   for (int i = 0; i < N; i += v_double_len ())
-    prev = f (v_double_load (A+i) + prev * z);
+    prev = f (vbslq_f64 (sel, prev, v_double_load (A+i)));
 }
 
 static void
-runf_v_latency (v_float f (v_float))
+runf_vn_latency (__vpcs v_float f (v_float))
 {
-  v_float z = v_float_dup (zero);
-  v_float prev = z;
+  volatile uint32x4_t vsel = (uint32x4_t) { 0, 0, 0, 0 };
+  uint32x4_t sel = vsel;
+  v_float prev = v_float_dup (0);
   for (int i = 0; i < N; i += v_float_len ())
-    prev = f (v_float_load (Af+i) + prev * z);
+    prev = f (vbslq_f32 (sel, prev, v_float_load (Af+i)));
 }
+#endif
 
-#ifdef __vpcs
+#if WANT_SVE_MATH
 static void
-run_vn_thruput (__vpcs v_double f (v_double))
+run_sv_thruput (sv_double f (sv_double, sv_bool))
 {
-  for (int i = 0; i < N; i += v_double_len ())
-    f (v_double_load (A+i));
+  for (int i = 0; i < N; i += sv_double_len ())
+    f (sv_double_load (A+i), svptrue_b64 ());
 }
 
 static void
-runf_vn_thruput (__vpcs v_float f (v_float))
+runf_sv_thruput (sv_float f (sv_float, sv_bool))
 {
-  for (int i = 0; i < N; i += v_float_len ())
-    f (v_float_load (Af+i));
+  for (int i = 0; i < N; i += sv_float_len ())
+    f (sv_float_load (Af+i), svptrue_b32 ());
 }
 
 static void
-run_vn_latency (__vpcs v_double f (v_double))
+run_sv_latency (sv_double f (sv_double, sv_bool))
 {
-  v_double z = v_double_dup (zero);
-  v_double prev = z;
-  for (int i = 0; i < N; i += v_double_len ())
-    prev = f (v_double_load (A+i) + prev * z);
+  volatile sv_bool vsel = svptrue_b64 ();
+  sv_bool sel = vsel;
+  sv_double prev = sv_double_dup (0);
+  for (int i = 0; i < N; i += sv_double_len ())
+    prev = f (svsel_f64 (sel, sv_double_load (A+i), prev), svptrue_b64 ());
 }
 
 static void
-runf_vn_latency (__vpcs v_float f (v_float))
+runf_sv_latency (sv_float f (sv_float, sv_bool))
 {
-  v_float z = v_float_dup (zero);
-  v_float prev = z;
-  for (int i = 0; i < N; i += v_float_len ())
-    prev = f (v_float_load (Af+i) + prev * z);
+  volatile sv_bool vsel = svptrue_b32 ();
+  sv_bool sel = vsel;
+  sv_float prev = sv_float_dup (0);
+  for (int i = 0; i < N; i += sv_float_len ())
+    prev = f (svsel_f32 (sel, sv_float_load (Af+i), prev), svptrue_b32 ());
 }
 #endif
 
@@ -539,10 +404,10 @@ bench1 (const struct fun *f, int type, double lo, double hi)
   const char *s = type == 't' ? "rthruput" : "latency";
   int vlen = 1;
 
-  if (f->vec && f->prec == 'd')
-    vlen = v_double_len();
-  else if (f->vec && f->prec == 'f')
-    vlen = v_float_len();
+  if (f->vec == 'n')
+    vlen = f->prec == 'd' ? v_double_len() : v_float_len();
+  else if (f->vec == 's')
+    vlen = f->prec == 'd' ? sv_double_len() : sv_float_len();
 
   if (f->prec == 'd' && type == 't' && f->vec == 0)
     TIMEIT (run_thruput, f->fun.d);
@@ -552,14 +417,6 @@ bench1 (const struct fun *f, int type, double lo, double hi)
     TIMEIT (runf_thruput, f->fun.f);
   else if (f->prec == 'f' && type == 'l' && f->vec == 0)
     TIMEIT (runf_latency, f->fun.f);
-  else if (f->prec == 'd' && type == 't' && f->vec == 'v')
-    TIMEIT (run_v_thruput, f->fun.vd);
-  else if (f->prec == 'd' && type == 'l' && f->vec == 'v')
-    TIMEIT (run_v_latency, f->fun.vd);
-  else if (f->prec == 'f' && type == 't' && f->vec == 'v')
-    TIMEIT (runf_v_thruput, f->fun.vf);
-  else if (f->prec == 'f' && type == 'l' && f->vec == 'v')
-    TIMEIT (runf_v_latency, f->fun.vf);
 #ifdef __vpcs
   else if (f->prec == 'd' && type == 't' && f->vec == 'n')
     TIMEIT (run_vn_thruput, f->fun.vnd);
@@ -570,20 +427,32 @@ bench1 (const struct fun *f, int type, double lo, double hi)
   else if (f->prec == 'f' && type == 'l' && f->vec == 'n')
     TIMEIT (runf_vn_latency, f->fun.vnf);
 #endif
+#if WANT_SVE_MATH
+  else if (f->prec == 'd' && type == 't' && f->vec == 's')
+    TIMEIT (run_sv_thruput, f->fun.svd);
+  else if (f->prec == 'd' && type == 'l' && f->vec == 's')
+    TIMEIT (run_sv_latency, f->fun.svd);
+  else if (f->prec == 'f' && type == 't' && f->vec == 's')
+    TIMEIT (runf_sv_thruput, f->fun.svf);
+  else if (f->prec == 'f' && type == 'l' && f->vec == 's')
+    TIMEIT (runf_sv_latency, f->fun.svf);
+#endif
 
   if (type == 't')
     {
       ns100 = (100 * dt + itercount * N / 2) / (itercount * N);
-      printf ("%9s %8s: %4u.%02u ns/elem %10llu ns in [%g %g]\n", f->name, s,
+      printf ("%9s %8s: %4u.%02u ns/elem %10llu ns in [%g %g] vlen %d\n",
+	      f->name, s,
 	      (unsigned) (ns100 / 100), (unsigned) (ns100 % 100),
-	      (unsigned long long) dt, lo, hi);
+	      (unsigned long long) dt, lo, hi, vlen);
     }
   else if (type == 'l')
     {
       ns100 = (100 * dt + itercount * N / vlen / 2) / (itercount * N / vlen);
-      printf ("%9s %8s: %4u.%02u ns/call %10llu ns in [%g %g]\n", f->name, s,
+      printf ("%9s %8s: %4u.%02u ns/call %10llu ns in [%g %g] vlen %d\n",
+	      f->name, s,
 	      (unsigned) (ns100 / 100), (unsigned) (ns100 % 100),
-	      (unsigned long long) dt, lo, hi);
+	      (unsigned long long) dt, lo, hi, vlen);
     }
   fflush (stdout);
 }
diff --git a/contrib/arm-optimized-routines/math/test/mathbench_funcs.h b/contrib/arm-optimized-routines/math/test/mathbench_funcs.h
new file mode 100644
index 000000000000..84c4e68650ac
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/test/mathbench_funcs.h
@@ -0,0 +1,62 @@
+/*
+ * Function entries for mathbench.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+/* clang-format off */
+D (exp, -9.9, 9.9)
+D (exp, 0.5, 1.0)
+D (exp10, -9.9, 9.9)
+D (exp2, -9.9, 9.9)
+D (log, 0.01, 11.1)
+D (log, 0.999, 1.001)
+D (log2, 0.01, 11.1)
+D (log2, 0.999, 1.001)
+{"pow", 'd', 0, 0.01, 11.1, {.d = xypow}},
+D (xpow, 0.01, 11.1)
+D (ypow, -9.9, 9.9)
+D (erf, -6.0, 6.0)
+
+F (expf, -9.9, 9.9)
+F (exp2f, -9.9, 9.9)
+F (logf, 0.01, 11.1)
+F (log2f, 0.01, 11.1)
+{"powf", 'f', 0, 0.01, 11.1, {.f = xypowf}},
+F (xpowf, 0.01, 11.1)
+F (ypowf, -9.9, 9.9)
+{"sincosf", 'f', 0, 0.1, 0.7, {.f = sincosf_wrap}},
+{"sincosf", 'f', 0, 0.8, 3.1, {.f = sincosf_wrap}},
+{"sincosf", 'f', 0, -3.1, 3.1, {.f = sincosf_wrap}},
+{"sincosf", 'f', 0, 3.3, 33.3, {.f = sincosf_wrap}},
+{"sincosf", 'f', 0, 100, 1000, {.f = sincosf_wrap}},
+{"sincosf", 'f', 0, 1e6, 1e32, {.f = sincosf_wrap}},
+F (sinf, 0.1, 0.7)
+F (sinf, 0.8, 3.1)
+F (sinf, -3.1, 3.1)
+F (sinf, 3.3, 33.3)
+F (sinf, 100, 1000)
+F (sinf, 1e6, 1e32)
+F (cosf, 0.1, 0.7)
+F (cosf, 0.8, 3.1)
+F (cosf, -3.1, 3.1)
+F (cosf, 3.3, 33.3)
+F (cosf, 100, 1000)
+F (cosf, 1e6, 1e32)
+F (erff, -4.0, 4.0)
+#ifdef __vpcs
+VND (_ZGVnN2v_exp, -9.9, 9.9)
+VND (_ZGVnN2v_log, 0.01, 11.1)
+{"_ZGVnN2vv_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy_Z_pow}},
+VND (_ZGVnN2v_sin, -3.1, 3.1)
+VND (_ZGVnN2v_cos, -3.1, 3.1)
+VNF (_ZGVnN4v_expf, -9.9, 9.9)
+VNF (_ZGVnN4v_expf_1u, -9.9, 9.9)
+VNF (_ZGVnN4v_exp2f, -9.9, 9.9)
+VNF (_ZGVnN4v_exp2f_1u, -9.9, 9.9)
+VNF (_ZGVnN4v_logf, 0.01, 11.1)
+{"_ZGVnN4vv_powf", 'f', 'n', 0.01, 11.1, {.vnf = xy_Z_powf}},
+VNF (_ZGVnN4v_sinf, -3.1, 3.1)
+VNF (_ZGVnN4v_cosf, -3.1, 3.1)
+#endif
+  /* clang-format on */
diff --git a/contrib/arm-optimized-routines/math/test/mathbench_wrappers.h b/contrib/arm-optimized-routines/math/test/mathbench_wrappers.h
new file mode 100644
index 000000000000..062b9db56de5
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/test/mathbench_wrappers.h
@@ -0,0 +1,66 @@
+/*
+ * Function wrappers for mathbench.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifdef __vpcs
+
+__vpcs static v_float
+xy_Z_powf (v_float x)
+{
+  return _ZGVnN4vv_powf (x, x);
+}
+
+__vpcs static v_double
+xy_Z_pow (v_double x)
+{
+  return _ZGVnN2vv_pow (x, x);
+}
+
+#endif
+
+static double
+xypow (double x)
+{
+  return pow (x, x);
+}
+
+static float
+xypowf (float x)
+{
+  return powf (x, x);
+}
+
+static double
+xpow (double x)
+{
+  return pow (x, 23.4);
+}
+
+static float
+xpowf (float x)
+{
+  return powf (x, 23.4f);
+}
+
+static double
+ypow (double x)
+{
+  return pow (2.34, x);
+}
+
+static float
+ypowf (float x)
+{
+  return powf (2.34f, x);
+}
+
+static float
+sincosf_wrap (float x)
+{
+  float s, c;
+  sincosf (x, &s, &c);
+  return s + c;
+}
diff --git a/contrib/arm-optimized-routines/math/test/mathtest.c b/contrib/arm-optimized-routines/math/test/mathtest.c
index 310896738e47..834233fdde9d 100644
--- a/contrib/arm-optimized-routines/math/test/mathtest.c
+++ b/contrib/arm-optimized-routines/math/test/mathtest.c
@@ -1,8 +1,8 @@
 /*
  * mathtest.c - test rig for mathlib
  *
- * Copyright (c) 1998-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 1998-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <assert.h>
@@ -196,9 +196,11 @@ int is_complex_rettype(int rettype) {
 #define TFUNCARM(arg,ret,name,tolerance) { t_func, arg, ret, (void*)& ARM_PREFIX(name), m_none, tolerance, #name }
 #define MFUNC(arg,ret,name,tolerance) { t_macro, arg, ret, NULL, m_##name, tolerance, #name }
 
+#ifndef PL
 /* sincosf wrappers for easier testing.  */
 static float sincosf_sinf(float x) { float s,c; sincosf(x, &s, &c); return s; }
 static float sincosf_cosf(float x) { float s,c; sincosf(x, &s, &c); return c; }
+#endif
 
 test_func tfuncs[] = {
     /* trigonometric */
@@ -218,9 +220,10 @@ test_func tfuncs[] = {
     TFUNCARM(at_s,rt_s, tanf, 4*ULPUNIT),
     TFUNCARM(at_s,rt_s, sinf, 3*ULPUNIT/4),
     TFUNCARM(at_s,rt_s, cosf, 3*ULPUNIT/4),
+#ifndef PL
     TFUNCARM(at_s,rt_s, sincosf_sinf, 3*ULPUNIT/4),
     TFUNCARM(at_s,rt_s, sincosf_cosf, 3*ULPUNIT/4),
-
+#endif
     /* hyperbolic */
     TFUNC(at_d, rt_d, atanh, 4*ULPUNIT),
     TFUNC(at_d, rt_d, asinh, 4*ULPUNIT),
@@ -251,6 +254,7 @@ test_func tfuncs[] = {
     TFUNCARM(at_s,rt_s, expf, 3*ULPUNIT/4),
     TFUNCARM(at_s,rt_s, exp2f, 3*ULPUNIT/4),
     TFUNC(at_s,rt_s, expm1f, ULPUNIT),
+    TFUNC(at_d,rt_d, exp10, ULPUNIT),
 
     /* power */
     TFUNC(at_d2,rt_d, pow, 3*ULPUNIT/4),
@@ -1018,6 +1022,7 @@ int runtest(testdetail t) {
     DO_DOP(d_arg1,op1r);
     DO_DOP(d_arg2,op2r);
     s_arg1.i = t.op1r[0]; s_arg2.i = t.op2r[0];
+    s_res.i = 0;
 
     /*
      * Detect NaNs, infinities and denormals on input, and set a
@@ -1152,22 +1157,25 @@ int runtest(testdetail t) {
             tresultr[0] = t.resultr[0];
             tresultr[1] = t.resultr[1];
             resultr[0] = d_res.i[dmsd]; resultr[1] = d_res.i[dlsd];
+            resulti[0] = resulti[1] = 0;
             wres = 2;
             break;
         case rt_i:
             tresultr[0] = t.resultr[0];
             resultr[0] = intres;
+            resulti[0] = 0;
             wres = 1;
             break;
         case rt_s:
         case rt_s2:
             tresultr[0] = t.resultr[0];
             resultr[0] = s_res.i;
+            resulti[0] = 0;
             wres = 1;
             break;
         default:
             puts("unhandled rettype in runtest");
-            wres = 0;
+            abort ();
         }
         if(t.resultc != rc_none) {
             int err = 0;
diff --git a/contrib/arm-optimized-routines/math/test/rtest/dotest.c b/contrib/arm-optimized-routines/math/test/rtest/dotest.c
index 6be79e1df0d1..5b3e9b4f18e4 100644
--- a/contrib/arm-optimized-routines/math/test/rtest/dotest.c
+++ b/contrib/arm-optimized-routines/math/test/rtest/dotest.c
@@ -2,7 +2,7 @@
  * dotest.c - actually generate mathlib test cases
  *
  * Copyright (c) 1999-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <stdio.h>
diff --git a/contrib/arm-optimized-routines/math/test/rtest/intern.h b/contrib/arm-optimized-routines/math/test/rtest/intern.h
index 12a9c749e18e..3ebd7ddaf85d 100644
--- a/contrib/arm-optimized-routines/math/test/rtest/intern.h
+++ b/contrib/arm-optimized-routines/math/test/rtest/intern.h
@@ -2,7 +2,7 @@
  * intern.h
  *
  * Copyright (c) 1999-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #ifndef mathtest_intern_h
diff --git a/contrib/arm-optimized-routines/math/test/rtest/main.c b/contrib/arm-optimized-routines/math/test/rtest/main.c
index 0d8ead891320..3d533c946f79 100644
--- a/contrib/arm-optimized-routines/math/test/rtest/main.c
+++ b/contrib/arm-optimized-routines/math/test/rtest/main.c
@@ -2,7 +2,7 @@
  * main.c
  *
  * Copyright (c) 1999-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <assert.h>
diff --git a/contrib/arm-optimized-routines/math/test/rtest/random.c b/contrib/arm-optimized-routines/math/test/rtest/random.c
index 56123966b8c4..1de32580b733 100644
--- a/contrib/arm-optimized-routines/math/test/rtest/random.c
+++ b/contrib/arm-optimized-routines/math/test/rtest/random.c
@@ -2,7 +2,7 @@
  * random.c - random number generator for producing mathlib test cases
  *
  * Copyright (c) 1998-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "types.h"
diff --git a/contrib/arm-optimized-routines/math/test/rtest/random.h b/contrib/arm-optimized-routines/math/test/rtest/random.h
index b4b22df82a3d..0b477d72b234 100644
--- a/contrib/arm-optimized-routines/math/test/rtest/random.h
+++ b/contrib/arm-optimized-routines/math/test/rtest/random.h
@@ -2,7 +2,7 @@
  * random.h - header for random.c
  *
  * Copyright (c) 2009-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include "types.h"
diff --git a/contrib/arm-optimized-routines/math/test/rtest/semi.c b/contrib/arm-optimized-routines/math/test/rtest/semi.c
index c9f0daf76508..70a7844a48d6 100644
--- a/contrib/arm-optimized-routines/math/test/rtest/semi.c
+++ b/contrib/arm-optimized-routines/math/test/rtest/semi.c
@@ -2,7 +2,7 @@
  * semi.c: test implementations of mathlib seminumerical functions
  *
  * Copyright (c) 1999-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <stdio.h>
diff --git a/contrib/arm-optimized-routines/math/test/rtest/semi.h b/contrib/arm-optimized-routines/math/test/rtest/semi.h
index 17dc4158fb51..7a1444e55d28 100644
--- a/contrib/arm-optimized-routines/math/test/rtest/semi.h
+++ b/contrib/arm-optimized-routines/math/test/rtest/semi.h
@@ -2,7 +2,7 @@
  * semi.h: header for semi.c
  *
  * Copyright (c) 1999-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #ifndef test_semi_h
diff --git a/contrib/arm-optimized-routines/math/test/rtest/types.h b/contrib/arm-optimized-routines/math/test/rtest/types.h
index 53cd557fa4cf..e15b4e06a0d4 100644
--- a/contrib/arm-optimized-routines/math/test/rtest/types.h
+++ b/contrib/arm-optimized-routines/math/test/rtest/types.h
@@ -2,7 +2,7 @@
  * types.h
  *
  * Copyright (c) 2005-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #ifndef mathtest_types_h
diff --git a/contrib/arm-optimized-routines/math/test/rtest/wrappers.c b/contrib/arm-optimized-routines/math/test/rtest/wrappers.c
index de45ac5768d0..441017192ab4 100644
--- a/contrib/arm-optimized-routines/math/test/rtest/wrappers.c
+++ b/contrib/arm-optimized-routines/math/test/rtest/wrappers.c
@@ -2,7 +2,7 @@
  * wrappers.c - wrappers to modify output of MPFR/MPC test functions
  *
  * Copyright (c) 2014-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #include <assert.h>
diff --git a/contrib/arm-optimized-routines/math/test/rtest/wrappers.h b/contrib/arm-optimized-routines/math/test/rtest/wrappers.h
index 7b09c85a59f1..0a8a58777d8a 100644
--- a/contrib/arm-optimized-routines/math/test/rtest/wrappers.h
+++ b/contrib/arm-optimized-routines/math/test/rtest/wrappers.h
@@ -2,7 +2,7 @@
  * wrappers.h - wrappers to modify output of MPFR/MPC test functions
  *
  * Copyright (c) 2014-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 typedef struct {
diff --git a/contrib/arm-optimized-routines/math/test/runulp.sh b/contrib/arm-optimized-routines/math/test/runulp.sh
index 0190d9ab27fb..e2e03e3ae761 100755
--- a/contrib/arm-optimized-routines/math/test/runulp.sh
+++ b/contrib/arm-optimized-routines/math/test/runulp.sh
@@ -2,8 +2,8 @@
 
 # ULP error check script.
 #
-# Copyright (c) 2019-2020, Arm Limited.
-# SPDX-License-Identifier: MIT
+# Copyright (c) 2019-2023, Arm Limited.
+# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 #set -x
 set -eu
@@ -72,6 +72,16 @@ t pow  0x1.ffffffffffff0p-1  0x1.0000000000008p0 x 0x1p60 0x1p68 50000
 t pow  0x1.ffffffffff000p-1  0x1p0 x 0x1p50 0x1p52 50000
 t pow -0x1.ffffffffff000p-1 -0x1p0 x 0x1p50 0x1p52 50000
 
+L=0.02
+t exp10   0                   0x1p-47             5000
+t exp10  -0                  -0x1p-47             5000
+t exp10   0x1p-47             1                   50000
+t exp10  -0x1p-47            -1                   50000
+t exp10   1                   0x1.34413509f79ffp8 50000
+t exp10  -1                  -0x1.434e6420f4374p8 50000
+t exp10  0x1.34413509f79ffp8  inf                 5000
+t exp10 -0x1.434e6420f4374p8 -inf                 5000
+
 L=1.0
 Ldir=0.9
 t erf  0 0xffff000000000000 10000
@@ -143,15 +153,10 @@ Ldir=0.5
 done
 
 # vector functions
+
 Ldir=0.5
 r='n'
-flags="${ULPFLAGS:--q} -f"
-runs=
-check __s_exp 1 && runs=1
-runv=
-check __v_exp 1 && runv=1
-runvn=
-check __vn_exp 1 && runvn=1
+flags="${ULPFLAGS:--q}"
 
 range_exp='
   0 0xffff000000000000 10000
@@ -177,9 +182,10 @@ range_pow='
 '
 
 range_sin='
-  0 0xffff000000000000 10000
-  0x1p-4     0x1p4     400000
- -0x1p-23    0x1p23    400000
+  0       0x1p23     500000
+ -0      -0x1p23     500000
+  0x1p23  inf        10000
+ -0x1p23 -inf        10000
 '
 range_cos="$range_sin"
 
@@ -199,9 +205,10 @@ range_logf='
 '
 
 range_sinf='
- 0    0xffff0000    10000
- 0x1p-4    0x1p4    300000
--0x1p-9   -0x1p9    300000
+  0        0x1p20   500000
+ -0       -0x1p20   500000
+  0x1p20   inf      10000
+ -0x1p20  -inf      10000
 '
 range_cosf="$range_sinf"
 
@@ -229,9 +236,8 @@ L_sinf=1.4
 L_cosf=1.4
 L_powf=2.1
 
-while read G F R
+while read G F D
 do
-	[ "$R" = 1 ] || continue
 	case "$G" in \#*) continue ;; esac
 	eval range="\${range_$G}"
 	eval L="\${L_$G}"
@@ -239,74 +245,35 @@ do
 	do
 		[ -n "$X" ] || continue
 		case "$X" in \#*) continue ;; esac
-		t $F $X
+		disable_fenv=""
+		if [ -z "$WANT_SIMD_EXCEPT" ] || [ $WANT_SIMD_EXCEPT -eq 0 ]; then
+			# If library was built with SIMD exceptions
+			# disabled, disable fenv checking in ulp
+			# tool. Otherwise, fenv checking may still be
+			# disabled by adding -f to the end of the run
+			# line.
+			disable_fenv="-f"
+		fi
+		t $D $disable_fenv $F $X
 	done << EOF
 $range
+
 EOF
 done << EOF
 # group symbol run
-exp  __s_exp       $runs
-exp  __v_exp       $runv
-exp  __vn_exp      $runvn
-exp  _ZGVnN2v_exp  $runvn
-
-log  __s_log       $runs
-log  __v_log       $runv
-log  __vn_log      $runvn
-log  _ZGVnN2v_log  $runvn
-
-pow __s_pow       $runs
-pow __v_pow       $runv
-pow __vn_pow      $runvn
-pow _ZGVnN2vv_pow $runvn
-
-sin __s_sin       $runs
-sin __v_sin       $runv
-sin __vn_sin      $runvn
-sin _ZGVnN2v_sin  $runvn
-
-cos __s_cos       $runs
-cos __v_cos       $runv
-cos __vn_cos      $runvn
-cos _ZGVnN2v_cos  $runvn
-
-expf __s_expf      $runs
-expf __v_expf      $runv
-expf __vn_expf     $runvn
-expf _ZGVnN4v_expf $runvn
-
-expf_1u __s_expf_1u   $runs
-expf_1u __v_expf_1u   $runv
-expf_1u __vn_expf_1u  $runvn
-
-exp2f __s_exp2f      $runs
-exp2f __v_exp2f      $runv
-exp2f __vn_exp2f     $runvn
-exp2f _ZGVnN4v_exp2f $runvn
-
-exp2f_1u __s_exp2f_1u  $runs
-exp2f_1u __v_exp2f_1u  $runv
-exp2f_1u __vn_exp2f_1u $runvn
-
-logf __s_logf      $runs
-logf __v_logf      $runv
-logf __vn_logf     $runvn
-logf _ZGVnN4v_logf $runvn
-
-sinf __s_sinf      $runs
-sinf __v_sinf      $runv
-sinf __vn_sinf     $runvn
-sinf _ZGVnN4v_sinf $runvn
-
-cosf __s_cosf      $runs
-cosf __v_cosf      $runv
-cosf __vn_cosf     $runvn
-cosf _ZGVnN4v_cosf $runvn
-
-powf __s_powf       $runs
-powf __v_powf       $runv
-powf __vn_powf      $runvn
-powf _ZGVnN4vv_powf $runvn
+exp       _ZGVnN2v_exp
+log       _ZGVnN2v_log
+pow       _ZGVnN2vv_pow      -f
+sin       _ZGVnN2v_sin       -z
+cos       _ZGVnN2v_cos
+expf      _ZGVnN4v_expf
+expf_1u   _ZGVnN4v_expf_1u   -f
+exp2f     _ZGVnN4v_exp2f
+exp2f_1u  _ZGVnN4v_exp2f_1u  -f
+logf      _ZGVnN4v_logf
+sinf      _ZGVnN4v_sinf      -z
+cosf      _ZGVnN4v_cosf
+powf      _ZGVnN4vv_powf     -f
 EOF
 
 [ 0 -eq $FAIL ] || {
diff --git a/contrib/arm-optimized-routines/math/test/testcases/directed/cosf.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/cosf.tst
index 79160443f099..7ea0d45795a3 100644
--- a/contrib/arm-optimized-routines/math/test/testcases/directed/cosf.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/cosf.tst
@@ -1,7 +1,7 @@
 ; cosf.tst - Directed test cases for SP cosine
 ;
 ; Copyright (c) 2007-2019, Arm Limited.
-; SPDX-License-Identifier: MIT
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=cosf op1=7fc00001 result=7fc00001 errno=0
 func=cosf op1=ffc00001 result=7fc00001 errno=0
diff --git a/contrib/arm-optimized-routines/math/test/testcases/directed/erf.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/erf.tst
index 7fa4d1868c0e..12384cef0dd9 100644
--- a/contrib/arm-optimized-routines/math/test/testcases/directed/erf.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/erf.tst
@@ -1,7 +1,7 @@
 ; erf.tst - Directed test cases for erf
 ;
 ; Copyright (c) 2007-2020, Arm Limited.
-; SPDX-License-Identifier: MIT
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=erf op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
 func=erf op1=fff80000.00000001 result=7ff80000.00000001 errno=0
diff --git a/contrib/arm-optimized-routines/math/test/testcases/directed/erff.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/erff.tst
index d05b7b1119c4..28f8fa37f5aa 100644
--- a/contrib/arm-optimized-routines/math/test/testcases/directed/erff.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/erff.tst
@@ -1,7 +1,7 @@
 ; erff.tst
 ;
 ; Copyright (c) 2007-2020, Arm Limited.
-; SPDX-License-Identifier: MIT
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=erff op1=7fc00001 result=7fc00001 errno=0
 func=erff op1=ffc00001 result=7fc00001 errno=0
diff --git a/contrib/arm-optimized-routines/math/test/testcases/directed/exp.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/exp.tst
index 85d556cd1e00..0bb2ef4579cc 100644
--- a/contrib/arm-optimized-routines/math/test/testcases/directed/exp.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/exp.tst
@@ -1,7 +1,7 @@
 ; Directed test cases for exp
 ;
 ; Copyright (c) 2018-2019, Arm Limited.
-; SPDX-License-Identifier: MIT
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=exp op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
 func=exp op1=fff80000.00000001 result=7ff80000.00000001 errno=0
diff --git a/contrib/arm-optimized-routines/math/test/testcases/directed/exp10.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/exp10.tst
new file mode 100644
index 000000000000..2cf4273bd1d7
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/exp10.tst
@@ -0,0 +1,15 @@
+; Directed test cases for exp10
+;
+; Copyright (c) 2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=exp10 op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=exp10 op1=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=exp10 op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=exp10 op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=exp10 op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
+func=exp10 op1=7fefffff.ffffffff result=7ff00000.00000000 errno=ERANGE status=ox
+func=exp10 op1=fff00000.00000000 result=00000000.00000000 errno=0
+func=exp10 op1=ffefffff.ffffffff result=00000000.00000000 errno=ERANGE status=ux
+func=exp10 op1=00000000.00000000 result=3ff00000.00000000 errno=0
+func=exp10 op1=80000000.00000000 result=3ff00000.00000000 errno=0
diff --git a/contrib/arm-optimized-routines/math/test/testcases/directed/exp2.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/exp2.tst
index fa56c9f8be4b..7069f9010c8c 100644
--- a/contrib/arm-optimized-routines/math/test/testcases/directed/exp2.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/exp2.tst
@@ -1,7 +1,7 @@
 ; Directed test cases for exp2
 ;
 ; Copyright (c) 2018-2019, Arm Limited.
-; SPDX-License-Identifier: MIT
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=exp2 op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
 func=exp2 op1=fff80000.00000001 result=7ff80000.00000001 errno=0
diff --git a/contrib/arm-optimized-routines/math/test/testcases/directed/exp2f.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/exp2f.tst
index 38cfc3f78ac6..6ca2eeab4e12 100644
--- a/contrib/arm-optimized-routines/math/test/testcases/directed/exp2f.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/exp2f.tst
@@ -1,7 +1,7 @@
 ; exp2f.tst - Directed test cases for exp2f
 ;
 ; Copyright (c) 2017-2019, Arm Limited.
-; SPDX-License-Identifier: MIT
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=exp2f op1=7fc00001 result=7fc00001 errno=0
 func=exp2f op1=ffc00001 result=7fc00001 errno=0
diff --git a/contrib/arm-optimized-routines/math/test/testcases/directed/expf.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/expf.tst
index ff0f671c2656..89ae8fe78e6c 100644
--- a/contrib/arm-optimized-routines/math/test/testcases/directed/expf.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/expf.tst
@@ -1,7 +1,7 @@
 ; expf.tst - Directed test cases for expf
 ;
 ; Copyright (c) 2007-2019, Arm Limited.
-; SPDX-License-Identifier: MIT
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=expf op1=7fc00001 result=7fc00001 errno=0
 func=expf op1=ffc00001 result=7fc00001 errno=0
diff --git a/contrib/arm-optimized-routines/math/test/testcases/directed/log.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/log.tst
index a0aa398cbf73..686ea835645b 100644
--- a/contrib/arm-optimized-routines/math/test/testcases/directed/log.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/log.tst
@@ -1,7 +1,7 @@
 ; Directed test cases for log
 ;
 ; Copyright (c) 2018-2019, Arm Limited.
-; SPDX-License-Identifier: MIT
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=log op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
 func=log op1=fff80000.00000001 result=7ff80000.00000001 errno=0
diff --git a/contrib/arm-optimized-routines/math/test/testcases/directed/log2.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/log2.tst
index ff1286cbd53e..361bddec374b 100644
--- a/contrib/arm-optimized-routines/math/test/testcases/directed/log2.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/log2.tst
@@ -1,7 +1,7 @@
 ; Directed test cases for log2
 ;
 ; Copyright (c) 2018-2019, Arm Limited.
-; SPDX-License-Identifier: MIT
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=log2 op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
 func=log2 op1=fff80000.00000001 result=7ff80000.00000001 errno=0
diff --git a/contrib/arm-optimized-routines/math/test/testcases/directed/log2f.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/log2f.tst
index 5832c4f08f1e..5fce051cddba 100644
--- a/contrib/arm-optimized-routines/math/test/testcases/directed/log2f.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/log2f.tst
@@ -1,7 +1,7 @@
 ; log2f.tst - Directed test cases for log2f
 ;
 ; Copyright (c) 2017-2019, Arm Limited.
-; SPDX-License-Identifier: MIT
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=log2f op1=7fc00001 result=7fc00001 errno=0
 func=log2f op1=ffc00001 result=7fc00001 errno=0
diff --git a/contrib/arm-optimized-routines/math/test/testcases/directed/logf.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/logf.tst
index 6e68a36e0f6a..a6d1b9d5c51f 100644
--- a/contrib/arm-optimized-routines/math/test/testcases/directed/logf.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/logf.tst
@@ -1,7 +1,7 @@
 ; logf.tst - Directed test cases for logf
 ;
 ; Copyright (c) 2007-2019, Arm Limited.
-; SPDX-License-Identifier: MIT
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=logf op1=7fc00001 result=7fc00001 errno=0
 func=logf op1=ffc00001 result=7fc00001 errno=0
diff --git a/contrib/arm-optimized-routines/math/test/testcases/directed/pow.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/pow.tst
index 19665817153d..879d12864afe 100644
--- a/contrib/arm-optimized-routines/math/test/testcases/directed/pow.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/pow.tst
@@ -1,7 +1,7 @@
 ; Directed test cases for pow
 ;
 ; Copyright (c) 2018-2019, Arm Limited.
-; SPDX-License-Identifier: MIT
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=pow op1=00000000.00000000 op2=00000000.00000000 result=3ff00000.00000000 errno=0
 func=pow op1=00000000.00000000 op2=00000000.00000001 result=00000000.00000000 errno=0
diff --git a/contrib/arm-optimized-routines/math/test/testcases/directed/powf.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/powf.tst
index 3fa8b110f8bc..46d522400871 100644
--- a/contrib/arm-optimized-routines/math/test/testcases/directed/powf.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/powf.tst
@@ -1,7 +1,7 @@
 ; powf.tst - Directed test cases for powf
 ;
 ; Copyright (c) 2007-2019, Arm Limited.
-; SPDX-License-Identifier: MIT
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 func=powf op1=7f800001 op2=7f800001 result=7fc00001 errno=0 status=i
 func=powf op1=7f800001 op2=ff800001 result=7fc00001 errno=0 status=i
diff --git a/contrib/arm-optimized-routines/math/test/testcases/directed/sincosf.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/sincosf.tst
index 4b33d2291c66..cddb346558ea 100644
--- a/contrib/arm-optimized-routines/math/test/testcases/directed/sincosf.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/sincosf.tst
@@ -1,7 +1,7 @@
 ; Directed test cases for SP sincos
 ;
 ; Copyright (c) 2007-2019, Arm Limited.
-; SPDX-License-Identifier: MIT
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 
 func=sincosf_sinf op1=7fc00001 result=7fc00001 errno=0
diff --git a/contrib/arm-optimized-routines/math/test/testcases/directed/sinf.tst b/contrib/arm-optimized-routines/math/test/testcases/directed/sinf.tst
index ded80b1598c6..041b13d5d6cb 100644
--- a/contrib/arm-optimized-routines/math/test/testcases/directed/sinf.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/directed/sinf.tst
@@ -1,7 +1,7 @@
 ; sinf.tst - Directed test cases for SP sine
 ;
 ; Copyright (c) 2007-2019, Arm Limited.
-; SPDX-License-Identifier: MIT
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 
 func=sinf op1=7fc00001 result=7fc00001 errno=0
diff --git a/contrib/arm-optimized-routines/math/test/testcases/random/double.tst b/contrib/arm-optimized-routines/math/test/testcases/random/double.tst
index c24ff80d5d95..8e885d61722a 100644
--- a/contrib/arm-optimized-routines/math/test/testcases/random/double.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/random/double.tst
@@ -1,7 +1,7 @@
 !! double.tst - Random test case specification for DP functions
 !!
 !! Copyright (c) 1999-2019, Arm Limited.
-!! SPDX-License-Identifier: MIT
+!! SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 test exp 10000
 test exp2 10000
diff --git a/contrib/arm-optimized-routines/math/test/testcases/random/float.tst b/contrib/arm-optimized-routines/math/test/testcases/random/float.tst
index d02a22750abe..ea4a5a015214 100644
--- a/contrib/arm-optimized-routines/math/test/testcases/random/float.tst
+++ b/contrib/arm-optimized-routines/math/test/testcases/random/float.tst
@@ -1,7 +1,7 @@
 !! single.tst - Random test case specification for SP functions
 !!
 !! Copyright (c) 1999-2019, Arm Limited.
-!! SPDX-License-Identifier: MIT
+!! SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 test sinf 10000
 test cosf 10000
diff --git a/contrib/arm-optimized-routines/math/test/ulp.c b/contrib/arm-optimized-routines/math/test/ulp.c
index 51479b87a0fd..5ff29972e50e 100644
--- a/contrib/arm-optimized-routines/math/test/ulp.c
+++ b/contrib/arm-optimized-routines/math/test/ulp.c
@@ -1,10 +1,11 @@
 /*
  * ULP error checking tool for math functions.
  *
- * Copyright (c) 2019-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
+#define _GNU_SOURCE
 #include <ctype.h>
 #include <fenv.h>
 #include <float.h>
@@ -23,11 +24,6 @@
 # include <mpfr.h>
 #endif
 
-#ifndef WANT_VMATH
-/* Enable the build of vector math code.  */
-# define WANT_VMATH 1
-#endif
-
 static inline uint64_t
 asuint64 (double f)
 {
@@ -212,73 +208,61 @@ struct conf
   unsigned long long n;
   double softlim;
   double errlim;
+  int ignore_zero_sign;
 };
 
-/* Wrappers for sincos.  */
-static float sincosf_sinf(float x) {(void)cosf(x); return sinf(x);}
-static float sincosf_cosf(float x) {(void)sinf(x); return cosf(x);}
-static double sincos_sin(double x) {(void)cos(x); return sin(x);}
-static double sincos_cos(double x) {(void)sin(x); return cos(x);}
-#if USE_MPFR
-static int sincos_mpfr_sin(mpfr_t y, const mpfr_t x, mpfr_rnd_t r) { mpfr_cos(y,x,r); return mpfr_sin(y,x,r); }
-static int sincos_mpfr_cos(mpfr_t y, const mpfr_t x, mpfr_rnd_t r) { mpfr_sin(y,x,r); return mpfr_cos(y,x,r); }
-#endif
-
 /* A bit of a hack: call vector functions twice with the same
    input in lane 0 but a different value in other lanes: once
    with an in-range value and then with a special case value.  */
 static int secondcall;
 
 /* Wrappers for vector functions.  */
-#if __aarch64__ && WANT_VMATH
+#ifdef __vpcs
 typedef __f32x4_t v_float;
 typedef __f64x2_t v_double;
-static const float fv[2] = {1.0f, -INFINITY};
-static const double dv[2] = {1.0, -INFINITY};
+/* First element of fv and dv may be changed by -c argument.  */
+static float fv[2] = {1.0f, -INFINITY};
+static double dv[2] = {1.0, -INFINITY};
 static inline v_float argf(float x) { return (v_float){x,x,x,fv[secondcall]}; }
 static inline v_double argd(double x) { return (v_double){x,dv[secondcall]}; }
-
-static float v_sinf(float x) { return __v_sinf(argf(x))[0]; }
-static float v_cosf(float x) { return __v_cosf(argf(x))[0]; }
-static float v_expf_1u(float x) { return __v_expf_1u(argf(x))[0]; }
-static float v_expf(float x) { return __v_expf(argf(x))[0]; }
-static float v_exp2f_1u(float x) { return __v_exp2f_1u(argf(x))[0]; }
-static float v_exp2f(float x) { return __v_exp2f(argf(x))[0]; }
-static float v_logf(float x) { return __v_logf(argf(x))[0]; }
-static float v_powf(float x, float y) { return __v_powf(argf(x),argf(y))[0]; }
-static double v_sin(double x) { return __v_sin(argd(x))[0]; }
-static double v_cos(double x) { return __v_cos(argd(x))[0]; }
-static double v_exp(double x) { return __v_exp(argd(x))[0]; }
-static double v_log(double x) { return __v_log(argd(x))[0]; }
-static double v_pow(double x, double y) { return __v_pow(argd(x),argd(y))[0]; }
-#ifdef __vpcs
-static float vn_sinf(float x) { return __vn_sinf(argf(x))[0]; }
-static float vn_cosf(float x) { return __vn_cosf(argf(x))[0]; }
-static float vn_expf_1u(float x) { return __vn_expf_1u(argf(x))[0]; }
-static float vn_expf(float x) { return __vn_expf(argf(x))[0]; }
-static float vn_exp2f_1u(float x) { return __vn_exp2f_1u(argf(x))[0]; }
-static float vn_exp2f(float x) { return __vn_exp2f(argf(x))[0]; }
-static float vn_logf(float x) { return __vn_logf(argf(x))[0]; }
-static float vn_powf(float x, float y) { return __vn_powf(argf(x),argf(y))[0]; }
-static double vn_sin(double x) { return __vn_sin(argd(x))[0]; }
-static double vn_cos(double x) { return __vn_cos(argd(x))[0]; }
-static double vn_exp(double x) { return __vn_exp(argd(x))[0]; }
-static double vn_log(double x) { return __vn_log(argd(x))[0]; }
-static double vn_pow(double x, double y) { return __vn_pow(argd(x),argd(y))[0]; }
-static float Z_sinf(float x) { return _ZGVnN4v_sinf(argf(x))[0]; }
-static float Z_cosf(float x) { return _ZGVnN4v_cosf(argf(x))[0]; }
-static float Z_expf(float x) { return _ZGVnN4v_expf(argf(x))[0]; }
-static float Z_exp2f(float x) { return _ZGVnN4v_exp2f(argf(x))[0]; }
-static float Z_logf(float x) { return _ZGVnN4v_logf(argf(x))[0]; }
-static float Z_powf(float x, float y) { return _ZGVnN4vv_powf(argf(x),argf(y))[0]; }
-static double Z_sin(double x) { return _ZGVnN2v_sin(argd(x))[0]; }
-static double Z_cos(double x) { return _ZGVnN2v_cos(argd(x))[0]; }
-static double Z_exp(double x) { return _ZGVnN2v_exp(argd(x))[0]; }
-static double Z_log(double x) { return _ZGVnN2v_log(argd(x))[0]; }
-static double Z_pow(double x, double y) { return _ZGVnN2vv_pow(argd(x),argd(y))[0]; }
+#if WANT_SVE_MATH
+#include <arm_sve.h>
+typedef __SVFloat32_t sv_float;
+typedef __SVFloat64_t sv_double;
+
+static inline sv_float svargf(float x)  {
+	int n = svcntw();
+	float base[n];
+	for (int i=0; i<n; i++)
+		base[i] = (float)x;
+	base[n-1] = (float) fv[secondcall];
+	return svld1(svptrue_b32(), base);
+}
+static inline sv_double svargd(double x) {
+	int n = svcntd();
+	double base[n];
+	for (int i=0; i<n; i++)
+		base[i] = x;
+	base[n-1] = dv[secondcall];
+	return svld1(svptrue_b64(), base);
+}
+static inline float svretf(sv_float vec)  {
+	int n = svcntw();
+	float res[n];
+	svst1(svptrue_b32(), res, vec);
+	return res[0];
+}
+static inline double svretd(sv_double vec) {
+	int n = svcntd();
+	double res[n];
+	svst1(svptrue_b64(), res, vec);
+	return res[0];
+}
 #endif
 #endif
 
+#include "test/ulp_wrappers.h"
+
 struct fun
 {
   const char *name;
@@ -322,83 +306,44 @@ static const struct fun fun[] = {
 #define F2(x) F (x##f, x##f, x, mpfr_##x, 2, 1, f2, 0)
 #define D1(x) F (x, x, x##l, mpfr_##x, 1, 0, d1, 0)
 #define D2(x) F (x, x, x##l, mpfr_##x, 2, 0, d2, 0)
- F1 (sin)
- F1 (cos)
- F (sincosf_sinf, sincosf_sinf, sincos_sin, sincos_mpfr_sin, 1, 1, f1, 0)
- F (sincosf_cosf, sincosf_cosf, sincos_cos, sincos_mpfr_cos, 1, 1, f1, 0)
- F1 (exp)
- F1 (exp2)
- F1 (log)
- F1 (log2)
- F2 (pow)
- F1 (erf)
- D1 (exp)
- D1 (exp2)
- D1 (log)
- D1 (log2)
- D2 (pow)
- D1 (erf)
-#if WANT_VMATH
- F (__s_sinf, __s_sinf, sin, mpfr_sin, 1, 1, f1, 0)
- F (__s_cosf, __s_cosf, cos, mpfr_cos, 1, 1, f1, 0)
- F (__s_expf_1u, __s_expf_1u, exp, mpfr_exp, 1, 1, f1, 0)
- F (__s_expf, __s_expf, exp, mpfr_exp, 1, 1, f1, 0)
- F (__s_exp2f_1u, __s_exp2f_1u, exp2, mpfr_exp2, 1, 1, f1, 0)
- F (__s_exp2f, __s_exp2f, exp2, mpfr_exp2, 1, 1, f1, 0)
- F (__s_powf, __s_powf, pow, mpfr_pow, 2, 1, f2, 0)
- F (__s_logf, __s_logf, log, mpfr_log, 1, 1, f1, 0)
- F (__s_sin, __s_sin, sinl, mpfr_sin, 1, 0, d1, 0)
- F (__s_cos, __s_cos, cosl, mpfr_cos, 1, 0, d1, 0)
- F (__s_exp, __s_exp, expl, mpfr_exp, 1, 0, d1, 0)
- F (__s_log, __s_log, logl, mpfr_log, 1, 0, d1, 0)
- F (__s_pow, __s_pow, powl, mpfr_pow, 2, 0, d2, 0)
-#if __aarch64__
- F (__v_sinf, v_sinf, sin, mpfr_sin, 1, 1, f1, 1)
- F (__v_cosf, v_cosf, cos, mpfr_cos, 1, 1, f1, 1)
- F (__v_expf_1u, v_expf_1u, exp, mpfr_exp, 1, 1, f1, 1)
- F (__v_expf, v_expf, exp, mpfr_exp, 1, 1, f1, 1)
- F (__v_exp2f_1u, v_exp2f_1u, exp2, mpfr_exp2, 1, 1, f1, 1)
- F (__v_exp2f, v_exp2f, exp2, mpfr_exp2, 1, 1, f1, 1)
- F (__v_logf, v_logf, log, mpfr_log, 1, 1, f1, 1)
- F (__v_powf, v_powf, pow, mpfr_pow, 2, 1, f2, 1)
- F (__v_sin, v_sin, sinl, mpfr_sin, 1, 0, d1, 1)
- F (__v_cos, v_cos, cosl, mpfr_cos, 1, 0, d1, 1)
- F (__v_exp, v_exp, expl, mpfr_exp, 1, 0, d1, 1)
- F (__v_log, v_log, logl, mpfr_log, 1, 0, d1, 1)
- F (__v_pow, v_pow, powl, mpfr_pow, 2, 0, d2, 1)
-#ifdef __vpcs
- F (__vn_sinf, vn_sinf, sin, mpfr_sin, 1, 1, f1, 1)
- F (__vn_cosf, vn_cosf, cos, mpfr_cos, 1, 1, f1, 1)
- F (__vn_expf_1u, vn_expf_1u, exp, mpfr_exp, 1, 1, f1, 1)
- F (__vn_expf, vn_expf, exp, mpfr_exp, 1, 1, f1, 1)
- F (__vn_exp2f_1u, vn_exp2f_1u, exp2, mpfr_exp2, 1, 1, f1, 1)
- F (__vn_exp2f, vn_exp2f, exp2, mpfr_exp2, 1, 1, f1, 1)
- F (__vn_logf, vn_logf, log, mpfr_log, 1, 1, f1, 1)
- F (__vn_powf, vn_powf, pow, mpfr_pow, 2, 1, f2, 1)
- F (__vn_sin, vn_sin, sinl, mpfr_sin, 1, 0, d1, 1)
- F (__vn_cos, vn_cos, cosl, mpfr_cos, 1, 0, d1, 1)
- F (__vn_exp, vn_exp, expl, mpfr_exp, 1, 0, d1, 1)
- F (__vn_log, vn_log, logl, mpfr_log, 1, 0, d1, 1)
- F (__vn_pow, vn_pow, powl, mpfr_pow, 2, 0, d2, 1)
- F (_ZGVnN4v_sinf, Z_sinf, sin, mpfr_sin, 1, 1, f1, 1)
- F (_ZGVnN4v_cosf, Z_cosf, cos, mpfr_cos, 1, 1, f1, 1)
- F (_ZGVnN4v_expf, Z_expf, exp, mpfr_exp, 1, 1, f1, 1)
- F (_ZGVnN4v_exp2f, Z_exp2f, exp2, mpfr_exp2, 1, 1, f1, 1)
- F (_ZGVnN4v_logf, Z_logf, log, mpfr_log, 1, 1, f1, 1)
- F (_ZGVnN4vv_powf, Z_powf, pow, mpfr_pow, 2, 1, f2, 1)
- F (_ZGVnN2v_sin, Z_sin, sinl, mpfr_sin, 1, 0, d1, 1)
- F (_ZGVnN2v_cos, Z_cos, cosl, mpfr_cos, 1, 0, d1, 1)
- F (_ZGVnN2v_exp, Z_exp, expl, mpfr_exp, 1, 0, d1, 1)
- F (_ZGVnN2v_log, Z_log, logl, mpfr_log, 1, 0, d1, 1)
- F (_ZGVnN2vv_pow, Z_pow, powl, mpfr_pow, 2, 0, d2, 1)
-#endif
-#endif
-#endif
+/* Neon routines.  */
+#define VF1(x) F (__v_##x##f, v_##x##f, x, mpfr_##x, 1, 1, f1, 0)
+#define VF2(x) F (__v_##x##f, v_##x##f, x, mpfr_##x, 2, 1, f2, 0)
+#define VD1(x) F (__v_##x, v_##x, x##l, mpfr_##x, 1, 0, d1, 0)
+#define VD2(x) F (__v_##x, v_##x, x##l, mpfr_##x, 2, 0, d2, 0)
+#define VNF1(x) F (__vn_##x##f, vn_##x##f, x, mpfr_##x, 1, 1, f1, 0)
+#define VNF2(x) F (__vn_##x##f, vn_##x##f, x, mpfr_##x, 2, 1, f2, 0)
+#define VND1(x) F (__vn_##x, vn_##x, x##l, mpfr_##x, 1, 0, d1, 0)
+#define VND2(x) F (__vn_##x, vn_##x, x##l, mpfr_##x, 2, 0, d2, 0)
+#define ZVF1(x) F (_ZGVnN4v_##x##f, Z_##x##f, x, mpfr_##x, 1, 1, f1, 0)
+#define ZVF2(x) F (_ZGVnN4vv_##x##f, Z_##x##f, x, mpfr_##x, 2, 1, f2, 0)
+#define ZVD1(x) F (_ZGVnN2v_##x, Z_##x, x##l, mpfr_##x, 1, 0, d1, 0)
+#define ZVD2(x) F (_ZGVnN2vv_##x, Z_##x, x##l, mpfr_##x, 2, 0, d2, 0)
+#define ZVNF1(x) VNF1 (x) ZVF1 (x)
+#define ZVNF2(x) VNF2 (x) ZVF2 (x)
+#define ZVND1(x) VND1 (x) ZVD1 (x)
+#define ZVND2(x) VND2 (x) ZVD2 (x)
+/* SVE routines.  */
+#define SVF1(x) F (__sv_##x##f, sv_##x##f, x, mpfr_##x, 1, 1, f1, 0)
+#define SVF2(x) F (__sv_##x##f, sv_##x##f, x, mpfr_##x, 2, 1, f2, 0)
+#define SVD1(x) F (__sv_##x, sv_##x, x##l, mpfr_##x, 1, 0, d1, 0)
+#define SVD2(x) F (__sv_##x, sv_##x, x##l, mpfr_##x, 2, 0, d2, 0)
+#define ZSVF1(x) F (_ZGVsMxv_##x##f, Z_sv_##x##f, x, mpfr_##x, 1, 1, f1, 0)
+#define ZSVF2(x) F (_ZGVsMxvv_##x##f, Z_sv_##x##f, x, mpfr_##x, 2, 1, f2, 0)
+#define ZSVD1(x) F (_ZGVsMxv_##x, Z_sv_##x, x##l, mpfr_##x, 1, 0, d1, 0)
+#define ZSVD2(x) F (_ZGVsMxvv_##x, Z_sv_##x, x##l, mpfr_##x, 2, 0, d2, 0)
+
+#include "test/ulp_funcs.h"
+
 #undef F
 #undef F1
 #undef F2
 #undef D1
 #undef D2
+#undef SVF1
+#undef SVF2
+#undef SVD1
+#undef SVD2
  {0}};
 
 /* Boilerplate for generic calls.  */
@@ -639,12 +584,18 @@ call_mpfr_d2 (mpfr_t y, const struct fun *f, struct args_d2 a, mpfr_rnd_t r)
 static void
 usage (void)
 {
-  puts ("./ulp [-q] [-m] [-f] [-r nudz] [-l soft-ulplimit] [-e ulplimit] func "
+  puts ("./ulp [-q] [-m] [-f] [-r {n|u|d|z}] [-l soft-ulplimit] [-e ulplimit] func "
 	"lo [hi [x lo2 hi2] [count]]");
   puts ("Compares func against a higher precision implementation in [lo; hi].");
   puts ("-q: quiet.");
   puts ("-m: use mpfr even if faster method is available.");
-  puts ("-f: disable fenv testing (rounding modes and exceptions).");
+  puts ("-f: disable fenv exceptions testing.");
+#ifdef ___vpcs
+  puts ("-c: neutral 'control value' to test behaviour when one lane can affect another. \n"
+	"    This should be different from tested input in other lanes, and non-special \n"
+	"    (i.e. should not trigger fenv exceptions). Default is 1.");
+#endif
+  puts ("-z: ignore sign of 0.");
   puts ("Supported func:");
   for (const struct fun *f = fun; f->name; f++)
     printf ("\t%s\n", f->name);
@@ -768,6 +719,7 @@ main (int argc, char *argv[])
   conf.fenv = 1;
   conf.softlim = 0;
   conf.errlim = INFINITY;
+  conf.ignore_zero_sign = 0;
   for (;;)
     {
       argc--;
@@ -807,11 +759,22 @@ main (int argc, char *argv[])
 	    {
 	      argc--;
 	      argv++;
-	      if (argc < 1)
+	      if (argc < 1 || argv[0][1] != '\0')
 		usage ();
 	      conf.rc = argv[0][0];
 	    }
 	  break;
+	case 'z':
+	  conf.ignore_zero_sign = 1;
+	  break;
+#ifdef __vpcs
+	case 'c':
+	  argc--;
+	  argv++;
+	  fv[0] = strtof(argv[0], 0);
+	  dv[0] = strtod(argv[0], 0);
+	  break;
+#endif
 	default:
 	  usage ();
 	}
@@ -837,7 +800,19 @@ main (int argc, char *argv[])
     if (strcmp (argv[0], f->name) == 0)
       break;
   if (!f->name)
-    usage ();
+    {
+#ifndef __vpcs
+      /* Ignore vector math functions if vector math is not supported.  */
+      if (strncmp (argv[0], "_ZGVnN", 6) == 0)
+	exit (0);
+#endif
+#if !WANT_SVE_MATH
+      if (strncmp (argv[0], "_ZGVsMxv", 8) == 0)
+	exit (0);
+#endif
+      printf ("math function %s not supported\n", argv[0]);
+      exit (1);
+    }
   if (!f->singleprec && LDBL_MANT_DIG == DBL_MANT_DIG)
     conf.mpfr = 1; /* Use mpfr if long double has no extra precision.  */
   if (!USE_MPFR && conf.mpfr)
diff --git a/contrib/arm-optimized-routines/math/test/ulp.h b/contrib/arm-optimized-routines/math/test/ulp.h
index a0c301664321..b0bc59aeef8d 100644
--- a/contrib/arm-optimized-routines/math/test/ulp.h
+++ b/contrib/arm-optimized-routines/math/test/ulp.h
@@ -1,8 +1,8 @@
 /*
  * Generic functions for ULP error estimation.
  *
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 /* For each different math function type,
@@ -37,7 +37,8 @@ static int RT(ulpscale_mpfr) (mpfr_t x, int t)
 /* Difference between exact result and closest real number that
    gets rounded to got, i.e. error before rounding, for a correctly
    rounded result the difference is 0.  */
-static double RT(ulperr) (RT(float) got, const struct RT(ret) * p, int r)
+static double RT (ulperr) (RT (float) got, const struct RT (ret) * p, int r,
+			   int ignore_zero_sign)
 {
   RT(float) want = p->y;
   RT(float) d;
@@ -45,10 +46,18 @@ static double RT(ulperr) (RT(float) got, const struct RT(ret) * p, int r)
 
   if (RT(asuint) (got) == RT(asuint) (want))
     return 0.0;
+  if (isnan (got) && isnan (want))
+    /* Ignore sign of NaN.  */
+    return RT (issignaling) (got) == RT (issignaling) (want) ? 0 : INFINITY;
   if (signbit (got) != signbit (want))
-    /* May have false positives with NaN.  */
-    //return isnan(got) && isnan(want) ? 0 : INFINITY;
-    return INFINITY;
+    {
+      /* Fall through to ULP calculation if ignoring sign of zero and at
+	 exactly one of want and got is non-zero.  */
+      if (ignore_zero_sign && want == got)
+	return 0.0;
+      if (!ignore_zero_sign || (want != 0 && got != 0))
+	return INFINITY;
+    }
   if (!isfinite (want) || !isfinite (got))
     {
       if (isnan (got) != isnan (want))
@@ -114,8 +123,12 @@ static inline void T(call_fenv) (const struct fun *f, struct T(args) a, int r,
 static inline void T(call_nofenv) (const struct fun *f, struct T(args) a,
 				    int r, RT(float) * y, int *ex)
 {
+  if (r != FE_TONEAREST)
+    fesetround (r);
   *y = T(call) (f, a);
   *ex = 0;
+  if (r != FE_TONEAREST)
+    fesetround (FE_TONEAREST);
 }
 
 static inline int T(call_long_fenv) (const struct fun *f, struct T(args) a,
@@ -155,8 +168,12 @@ static inline int T(call_long_nofenv) (const struct fun *f, struct T(args) a,
 					int r, struct RT(ret) * p,
 					RT(float) ygot, int exgot)
 {
+  if (r != FE_TONEAREST)
+    fesetround (r);
   RT(double) yl = T(call_long) (f, a);
   p->y = (RT(float)) yl;
+  if (r != FE_TONEAREST)
+    fesetround (FE_TONEAREST);
   if (RT(isok_nofenv) (ygot, p->y))
     return 1;
   p->ulpexp = RT(ulpscale) (p->y);
@@ -288,7 +305,7 @@ static int T(cmp) (const struct fun *f, struct gen *gen,
       if (!ok)
 	{
 	  int print = 0;
-	  double err = RT(ulperr) (ygot, &want, r);
+	  double err = RT (ulperr) (ygot, &want, r, conf->ignore_zero_sign);
 	  double abserr = fabs (err);
 	  // TODO: count errors below accuracy limit.
 	  if (abserr > 0)
diff --git a/contrib/arm-optimized-routines/math/test/ulp_funcs.h b/contrib/arm-optimized-routines/math/test/ulp_funcs.h
new file mode 100644
index 000000000000..84f7927d3935
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/test/ulp_funcs.h
@@ -0,0 +1,40 @@
+/*
+ * Function entries for ulp.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+/* clang-format off */
+ F1 (sin)
+ F1 (cos)
+ F (sincosf_sinf, sincosf_sinf, sincos_sin, sincos_mpfr_sin, 1, 1, f1, 0)
+ F (sincosf_cosf, sincosf_cosf, sincos_cos, sincos_mpfr_cos, 1, 1, f1, 0)
+ F1 (exp)
+ F1 (exp2)
+ F1 (log)
+ F1 (log2)
+ F2 (pow)
+ F1 (erf)
+ D1 (exp)
+ D1 (exp10)
+ D1 (exp2)
+ D1 (log)
+ D1 (log2)
+ D2 (pow)
+ D1 (erf)
+#ifdef __vpcs
+ F (_ZGVnN4v_sinf, Z_sinf, sin, mpfr_sin, 1, 1, f1, 1)
+ F (_ZGVnN4v_cosf, Z_cosf, cos, mpfr_cos, 1, 1, f1, 1)
+ F (_ZGVnN4v_expf_1u, Z_expf_1u, exp, mpfr_exp, 1, 1, f1, 1)
+ F (_ZGVnN4v_expf, Z_expf, exp, mpfr_exp, 1, 1, f1, 1)
+ F (_ZGVnN4v_exp2f_1u, Z_exp2f_1u, exp2, mpfr_exp2, 1, 1, f1, 1)
+ F (_ZGVnN4v_exp2f, Z_exp2f, exp2, mpfr_exp2, 1, 1, f1, 1)
+ F (_ZGVnN4v_logf, Z_logf, log, mpfr_log, 1, 1, f1, 1)
+ F (_ZGVnN4vv_powf, Z_powf, pow, mpfr_pow, 2, 1, f2, 1)
+ F (_ZGVnN2v_sin, Z_sin, sinl, mpfr_sin, 1, 0, d1, 1)
+ F (_ZGVnN2v_cos, Z_cos, cosl, mpfr_cos, 1, 0, d1, 1)
+ F (_ZGVnN2v_exp, Z_exp, expl, mpfr_exp, 1, 0, d1, 1)
+ F (_ZGVnN2v_log, Z_log, logl, mpfr_log, 1, 0, d1, 1)
+ F (_ZGVnN2vv_pow, Z_pow, powl, mpfr_pow, 2, 0, d2, 1)
+#endif
+/* clang-format on */
diff --git a/contrib/arm-optimized-routines/math/test/ulp_wrappers.h b/contrib/arm-optimized-routines/math/test/ulp_wrappers.h
new file mode 100644
index 000000000000..60dc3d6dd652
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/test/ulp_wrappers.h
@@ -0,0 +1,37 @@
+/*
+ * Function wrappers for ulp.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+/* clang-format off */
+
+/* Wrappers for sincos.  */
+static float sincosf_sinf(float x) {(void)cosf(x); return sinf(x);}
+static float sincosf_cosf(float x) {(void)sinf(x); return cosf(x);}
+static double sincos_sin(double x) {(void)cos(x); return sin(x);}
+static double sincos_cos(double x) {(void)sin(x); return cos(x);}
+#if USE_MPFR
+static int sincos_mpfr_sin(mpfr_t y, const mpfr_t x, mpfr_rnd_t r) { mpfr_cos(y,x,r); return mpfr_sin(y,x,r); }
+static int sincos_mpfr_cos(mpfr_t y, const mpfr_t x, mpfr_rnd_t r) { mpfr_sin(y,x,r); return mpfr_cos(y,x,r); }
+#endif
+
+/* Wrappers for vector functions.  */
+#ifdef __vpcs
+static float Z_sinf(float x) { return _ZGVnN4v_sinf(argf(x))[0]; }
+static float Z_cosf(float x) { return _ZGVnN4v_cosf(argf(x))[0]; }
+static float Z_expf_1u(float x) { return _ZGVnN4v_expf_1u(argf(x))[0]; }
+static float Z_expf(float x) { return _ZGVnN4v_expf(argf(x))[0]; }
+static float Z_exp2f_1u(float x) { return _ZGVnN4v_exp2f_1u(argf(x))[0]; }
+static float Z_exp2f(float x) { return _ZGVnN4v_exp2f(argf(x))[0]; }
+static float Z_logf(float x) { return _ZGVnN4v_logf(argf(x))[0]; }
+static float Z_powf(float x, float y) { return _ZGVnN4vv_powf(argf(x),argf(y))[0]; }
+static double Z_sin(double x) { return _ZGVnN2v_sin(argd(x))[0]; }
+static double Z_cos(double x) { return _ZGVnN2v_cos(argd(x))[0]; }
+static double Z_exp(double x) { return _ZGVnN2v_exp(argd(x))[0]; }
+static double Z_log(double x) { return _ZGVnN2v_log(argd(x))[0]; }
+static double Z_pow(double x, double y) { return _ZGVnN2vv_pow(argd(x),argd(y))[0]; }
+#endif
+
+/* clang-format on */
diff --git a/contrib/arm-optimized-routines/math/tgamma128.c b/contrib/arm-optimized-routines/math/tgamma128.c
new file mode 100644
index 000000000000..65deacc49d99
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/tgamma128.c
@@ -0,0 +1,356 @@
+/*
+ * Implementation of the true gamma function (as opposed to lgamma)
+ * for 128-bit long double.
+ *
+ * Copyright (c) 2006-2024, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+/*
+ * This module implements the float128 gamma function under the name
+ * tgamma128. It's expected to be suitable for integration into system
+ * maths libraries under the standard name tgammal, if long double is
+ * 128-bit. Such a library will probably want to check the error
+ * handling and optimize the initial process of extracting the
+ * exponent, which is done here by simple and portable (but
+ * potentially slower) methods.
+ */
+
+#include <float.h>
+#include <math.h>
+#include <stdbool.h>
+#include <stddef.h>
+
+/* Only binary128 format is supported.  */
+#if LDBL_MANT_DIG == 113
+
+#include "tgamma128.h"
+
+#define lenof(x) (sizeof(x)/sizeof(*(x)))
+
+/*
+ * Helper routine to evaluate a polynomial via Horner's rule
+ */
+static long double poly(const long double *coeffs, size_t n, long double x)
+{
+    long double result = coeffs[--n];
+
+    while (n > 0)
+        result = (result * x) + coeffs[--n];
+
+    return result;
+}
+
+/*
+ * Compute sin(pi*x) / pi, for use in the reflection formula that
+ * relates gamma(-x) and gamma(x).
+ */
+static long double sin_pi_x_over_pi(long double x)
+{
+    int quo;
+    long double fracpart = remquol(x, 0.5L, &quo);
+
+    long double sign = 1.0L;
+    if (quo & 2)
+        sign = -sign;
+    quo &= 1;
+
+    if (quo == 0 && fabsl(fracpart) < 0x1.p-58L) {
+        /* For numbers this size, sin(pi*x) is so close to pi*x that
+         * sin(pi*x)/pi is indistinguishable from x in float128 */
+        return sign * fracpart;
+    }
+
+    if (quo == 0) {
+        return sign * sinl(pi*fracpart) / pi;
+    } else {
+        return sign * cosl(pi*fracpart) / pi;
+    }
+}
+
+/* Return tgamma(x) on the assumption that x >= 8. */
+static long double tgamma_large(long double x,
+                                bool negative, long double negadjust)
+{
+    /*
+     * In this range we compute gamma(x) as x^(x-1/2) * e^-x * K,
+     * where K is a correction factor computed as a polynomial in 1/x.
+     *
+     * (Vaguely inspired by the form of the Lanczos approximation, but
+     * I tried the Lanczos approximation itself and it suffers badly
+     * from big cancellation leading to loss of significance.)
+     */
+    long double t = 1/x;
+    long double p = poly(coeffs_large, lenof(coeffs_large), t);
+
+    /*
+     * To avoid overflow in cases where x^(x-0.5) does overflow
+     * but gamma(x) does not, we split x^(x-0.5) in half and
+     * multiply back up _after_ multiplying the shrinking factor
+     * of exp(-(x-0.5)).
+     *
+     * Note that computing x-0.5 and (x-0.5)/2 is exact for the
+     * relevant range of x, so the only sources of error are pow
+     * and exp themselves, plus the multiplications.
+     */
+    long double powhalf = powl(x, (x-0.5L)/2.0L);
+    long double expret = expl(-(x-0.5L));
+
+    if (!negative) {
+        return (expret * powhalf) * powhalf * p;
+    } else {
+        /*
+         * Apply the reflection formula as commented below, but
+         * carefully: negadjust has magnitude less than 1, so it can
+         * turn a case where gamma(+x) would overflow into a case
+         * where gamma(-x) doesn't underflow. Not only that, but the
+         * FP format has greater range in the tiny domain due to
+         * denormals. For both reasons, it's not good enough to
+         * compute the positive result and then adjust it.
+         */
+        long double ret = 1 / ((expret * powhalf) * (x * negadjust) * p);
+        return ret / powhalf;
+    }
+}
+
+/* Return tgamma(x) on the assumption that 0 <= x < 1/32. */
+static long double tgamma_tiny(long double x,
+                               bool negative, long double negadjust)
+{
+    /*
+     * For x near zero, we use a polynomial approximation to
+     * g = 1/(x*gamma(x)), and then return 1/(g*x).
+     */
+    long double g = poly(coeffs_tiny, lenof(coeffs_tiny), x);
+    if (!negative)
+        return 1.0L / (g*x);
+    else
+        return g / negadjust;
+}
+
+/* Return tgamma(x) on the assumption that 0 <= x < 2^-113. */
+static long double tgamma_ultratiny(long double x, bool negative,
+                                    long double negadjust)
+{
+    /* On this interval, gamma can't even be distinguished from 1/x,
+     * so we skip the polynomial evaluation in tgamma_tiny, partly to
+     * save time and partly to avoid the tiny intermediate values
+     * setting the underflow exception flag. */
+    if (!negative)
+        return 1.0L / x;
+    else
+        return 1.0L / negadjust;
+}
+
+/* Return tgamma(x) on the assumption that 1 <= x <= 2. */
+static long double tgamma_central(long double x)
+{
+    /*
+     * In this central interval, our strategy is to finding the
+     * difference between x and the point where gamma has a minimum,
+     * and approximate based on that.
+     */
+
+    /* The difference between the input x and the minimum x. The first
+     * subtraction is expected to be exact, since x and min_hi have
+     * the same exponent (unless x=2, in which case it will still be
+     * exact). */
+    long double t = (x - min_x_hi) - min_x_lo;
+
+    /*
+     * Now use two different polynomials for the intervals [1,m] and
+     * [m,2].
+     */
+    long double p;
+    if (t < 0)
+        p = poly(coeffs_central_neg, lenof(coeffs_central_neg), -t);
+    else
+        p = poly(coeffs_central_pos, lenof(coeffs_central_pos), t);
+
+    return (min_y_lo + p * (t*t)) + min_y_hi;
+}
+
+long double tgamma128(long double x)
+{
+    /*
+     * Start by extracting the number's sign and exponent, and ruling
+     * out cases of non-normalized numbers.
+     *
+     * For an implementation integrated into a system libm, it would
+     * almost certainly be quicker to do this by direct bitwise access
+     * to the input float128 value, using whatever is the local idiom
+     * for knowing its endianness.
+     *
+     * Integration into a system libc may also need to worry about
+     * setting errno, if that's the locally preferred way to report
+     * math.h errors.
+     */
+    int sign = signbit(x);
+    int exponent;
+    switch (fpclassify(x)) {
+      case FP_NAN:
+        return x+x; /* propagate QNaN, make SNaN throw an exception */
+      case FP_ZERO:
+        return 1/x; /* divide by zero on purpose to indicate a pole */
+      case FP_INFINITE:
+        if (sign) {
+            return x-x; /* gamma(-inf) has indeterminate sign, so provoke an
+                         * IEEE invalid operation exception to indicate that */
+        }
+        return x;     /* but gamma(+inf) is just +inf with no error */
+      case FP_SUBNORMAL:
+        exponent = -16384;
+        break;
+      default:
+        frexpl(x, &exponent);
+        exponent--;
+        break;
+    }
+
+    bool negative = false;
+    long double negadjust = 0.0L;
+
+    if (sign) {
+        /*
+         * Euler's reflection formula is
+         *
+         *    gamma(1-x) gamma(x) = pi/sin(pi*x)
+         *
+         *                        pi
+         * => gamma(x) = --------------------
+         *               gamma(1-x) sin(pi*x)
+         *
+         * But computing 1-x is going to lose a lot of accuracy when x
+         * is very small, so instead we transform using the recurrence
+         * gamma(t+1)=t gamma(t). Setting t=-x, this gives us
+         * gamma(1-x) = -x gamma(-x), so we now have
+         *
+         *                         pi
+         *    gamma(x) = ----------------------
+         *               -x gamma(-x) sin(pi*x)
+         *
+         * which relates gamma(x) to gamma(-x), which is much nicer,
+         * since x can be turned into -x without rounding.
+         */
+        negadjust = sin_pi_x_over_pi(x);
+        negative = true;
+        x = -x;
+
+        /*
+         * Now the ultimate answer we want is
+         *
+         *    1 / (gamma(x) * x * negadjust)
+         *
+         * where x is the positive value we've just turned it into.
+         *
+         * For some of the cases below, we'll compute gamma(x)
+         * normally and then compute this adjusted value afterwards.
+         * But for others, we can implement the reciprocal operation
+         * in this formula by _avoiding_ an inversion that the
+         * sub-case was going to do anyway.
+         */
+
+        if (negadjust == 0) {
+            /*
+             * Special case for negative integers. Applying the
+             * reflection formula would cause division by zero, but
+             * standards would prefer we treat this error case as an
+             * invalid operation and return NaN instead. (Possibly
+             * because otherwise you'd have to decide which sign of
+             * infinity to return, and unlike the x=0 case, there's no
+             * sign of zero available to disambiguate.)
+             */
+            return negadjust / negadjust;
+        }
+    }
+
+    /*
+     * Split the positive domain into various cases. For cases where
+     * we do the negative-number adjustment the usual way, we'll leave
+     * the answer in 'g' and drop out of the if statement.
+     */
+    long double g;
+
+    if (exponent >= 11) {
+        /*
+         * gamma of any positive value this large overflows, and gamma
+         * of any negative value underflows.
+         */
+        if (!negative) {
+            long double huge = 0x1p+12288L;
+            return huge * huge; /* provoke an overflow */
+        } else {
+            long double tiny = 0x1p-12288L;
+            return tiny * tiny * negadjust; /* underflow, of the right sign */
+        }
+    } else if (exponent >= 3) {
+        /* Negative-number adjustment happens inside here */
+        return tgamma_large(x, negative, negadjust);
+    } else if (exponent < -113) {
+        /* Negative-number adjustment happens inside here */
+        return tgamma_ultratiny(x, negative, negadjust);
+    } else if (exponent < -5) {
+        /* Negative-number adjustment happens inside here */
+        return tgamma_tiny(x, negative, negadjust);
+    } else if (exponent == 0) {
+        g = tgamma_central(x);
+    } else if (exponent < 0) {
+        /*
+         * For x in [1/32,1) we range-reduce upwards to the interval
+         * [1,2), using the inverse of the normal recurrence formula:
+         * gamma(x) = gamma(x+1)/x.
+         */
+        g = tgamma_central(1+x) / x;
+    } else {
+        /*
+         * For x in [2,8) we range-reduce downwards to the interval
+         * [1,2) by repeated application of the recurrence formula.
+         *
+         * Actually multiplying (x-1) by (x-2) by (x-3) and so on
+         * would introduce multiple ULPs of rounding error. We can get
+         * better accuracy by writing x = (k+1/2) + t, where k is an
+         * integer and |t|<1/2, and expanding out the obvious factor
+         * (x-1)(x-2)...(x-k+1) as a polynomial in t.
+         */
+        long double mult;
+        int i = x;
+        if (i == 2) { /* x in [2,3) */
+            mult = (x-1);
+        } else {
+            long double t = x - (i + 0.5L);
+            switch (i) {
+                /* E.g. for x=3.5+t, we want
+                 * (x-1)(x-2) = (2.5+t)(1.5+t) = 3.75 + 4t + t^2 */
+              case 3:
+                mult = 3.75L+t*(4.0L+t);
+                break;
+              case 4:
+                mult = 13.125L+t*(17.75L+t*(7.5L+t));
+                break;
+              case 5:
+                mult = 59.0625L+t*(93.0L+t*(51.50L+t*(12.0L+t)));
+                break;
+              case 6:
+                mult = 324.84375L+t*(570.5625L+t*(376.250L+t*(
+                    117.5L+t*(17.5L+t))));
+                break;
+              case 7:
+                mult = 2111.484375L+t*(4033.5L+t*(3016.1875L+t*(
+                    1140.0L+t*(231.25L+t*(24.0L+t)))));
+                break;
+            }
+        }
+
+        g = tgamma_central(x - (i-1)) * mult;
+    }
+
+    if (!negative) {
+        /* Positive domain: return g unmodified */
+        return g;
+    } else {
+        /* Negative domain: apply the reflection formula as commented above */
+        return 1.0L / (g * x * negadjust);
+    }
+}
+
+#endif
diff --git a/contrib/arm-optimized-routines/math/tgamma128.h b/contrib/arm-optimized-routines/math/tgamma128.h
new file mode 100644
index 000000000000..90875a22dce4
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/tgamma128.h
@@ -0,0 +1,141 @@
+/*
+ * Polynomial coefficients and other constants for tgamma128.c.
+ *
+ * Copyright (c) 2006-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+/* The largest positive value for which 128-bit tgamma does not overflow. */
+static const long double max_x =  0x1.b6e3180cd66a5c4206f128ba77f4p+10L;
+
+/* Coefficients of the polynomial used in the tgamma_large() subroutine */
+static const long double coeffs_large[] = {
+     0x1.8535745aa79569579b9eec0f3bbcp+0L,
+     0x1.0378f83c6fb8f0e51269f2b4a973p-3L,
+     0x1.59f6a05094f69686c3380f4e2783p-8L,
+    -0x1.0b291dee952a82764a4859b081a6p-8L,
+    -0x1.6dd301b2205bf936b5a3eaad0dbbp-12L,
+     0x1.387a8b5f38dd77e7f139b1021e86p-10L,
+     0x1.bca46637f65b13750c728cc29e40p-14L,
+    -0x1.d80401c00aef998c9e303151a51cp-11L,
+    -0x1.49cb6bb09f935a2053ccc2cf3711p-14L,
+     0x1.4e950204437dcaf2be77f73a6f45p-10L,
+     0x1.cb711a2d65f188bf60110934d6bep-14L,
+    -0x1.7d7ff4bc95dc7faefc5e767f70f1p-9L,
+    -0x1.0305ab9760cddb0d833e73766836p-12L,
+     0x1.3ef6c84bf1cd5c3f65ac2693bb5bp-7L,
+     0x1.bb4144740ad9290123fdcea684aap-11L,
+    -0x1.72ab4e88272a229bfafd192450f0p-5L,
+     0x1.80c70ac6eb3b7a698983d25a62b8p-12L,
+     0x1.e222791c6743ce3e3cae220fb236p-3L,
+     0x1.1a2dca1c82a9326c52b465f7cb7ap-2L,
+    -0x1.9d204fa235a42cd901b123d2ad47p+1L,
+     0x1.55b56d1158f77ddb1c95fc44ab02p+0L,
+     0x1.37f900a11dbd892abd7dde533e2dp+5L,
+    -0x1.2da49f4188dd89cb958369ef2401p+7L,
+     0x1.fdae5ec3ec6eb7dffc09edbe6c14p+7L,
+    -0x1.61433cebe649098c9611c4c7774ap+7L,
+};
+
+/* Coefficients of the polynomial used in the tgamma_tiny() subroutine */
+static const long double coeffs_tiny[] = {
+     0x1.0000000000000000000000000000p+0L,
+     0x1.2788cfc6fb618f49a37c7f0201fep-1L,
+    -0x1.4fcf4026afa2dceb8490ade22796p-1L,
+    -0x1.5815e8fa27047c8f42b5d9217244p-5L,
+     0x1.5512320b43fbe5dfa771333518f7p-3L,
+    -0x1.59af103c340927bffdd44f954bfcp-5L,
+    -0x1.3b4af28483e210479657e5543366p-7L,
+     0x1.d919c527f6070bfce9b29c2ace9cp-8L,
+    -0x1.317112ce35337def3556a18aa178p-10L,
+    -0x1.c364fe77a6f27677b985b1fa2e1dp-13L,
+     0x1.0c8a7a19a3fd40fe1f7e867efe7bp-13L,
+    -0x1.51cf9f090b5dc398ba86305e3634p-16L,
+    -0x1.4e80f64c04a339740de06ca9fa4ap-20L,
+     0x1.241ddc2aef2ec20e58b08f2fda17p-20L,
+};
+
+/* The location within the interval [1,2] where gamma has a minimum.
+ * Specified as the sum of two 128-bit values, for extra precision. */
+static const long double min_x_hi =  0x1.762d86356be3f6e1a9c8865e0a4fp+0L;
+static const long double min_x_lo =  0x1.ac54d7d218de21303a7c60f08840p-118L;
+
+/* The actual minimum value that gamma takes at that location.
+ * Again specified as the sum of two 128-bit values. */
+static const long double min_y_hi =  0x1.c56dc82a74aee8d8851566d40f32p-1L;
+static const long double min_y_lo =  0x1.8ed98685742c353ce55e5794686fp-114L;
+
+/* Coefficients of the polynomial used in the tgamma_central() subroutine
+ * for computing gamma on the interval [1,min_x] */
+static const long double coeffs_central_neg[] = {
+     0x1.b6c53f7377b83839c8a292e43b69p-2L,
+     0x1.0bae9f40c7d09ed76e732045850ap-3L,
+     0x1.4981175e14d04c3530e51d01c5fep-3L,
+     0x1.79f77aaf032c948af3a9edbd2061p-4L,
+     0x1.1e97bd10821095a5b79fbfdfa1a3p-4L,
+     0x1.8071ce0935e4dcf0b33b0fbec7c1p-5L,
+     0x1.0b44c2f92982f887b55ec36dfdb0p-5L,
+     0x1.6df1de1e178ef72ca7bd63d40870p-6L,
+     0x1.f63f502bde27e81c0f5e13479b43p-7L,
+     0x1.57fd67d901f40ea011353ad89a0ap-7L,
+     0x1.d7151376eed187eb753e2273cafcp-8L,
+     0x1.427162b5c6ff1d904c71ef53e37cp-8L,
+     0x1.b954b8c3a56cf93e49ef6538928ap-9L,
+     0x1.2dff2ec26a3ae5cd3aaccae7a09ep-9L,
+     0x1.9d35250d9b9378d9b59df734537ap-10L,
+     0x1.1b2c0c48b9855a28f6dbd6fdff3cp-10L,
+     0x1.7e0db39bb99cdb52b028d9359380p-11L,
+     0x1.2164b5e1d364a0b5eaf97c436aa7p-11L,
+     0x1.27521cf5fd24dcdf43524e6add11p-13L,
+     0x1.06461d62243bf9a826b42349672fp-10L,
+    -0x1.2b852abead28209b4e0c756dc46ep-9L,
+     0x1.be673c11a72c826115ec6d286c14p-8L,
+    -0x1.fd9ce330c215c31fcd3cb53c42ebp-7L,
+     0x1.fa362bd2dc68f41abef2d8600acdp-6L,
+    -0x1.a21585b2f52f8b23855de8e452edp-5L,
+     0x1.1f234431ed032052fc92e64e0493p-4L,
+    -0x1.40d332476ca0199c60cdae3f9132p-4L,
+     0x1.1d45dc665d86012eba2eea199cefp-4L,
+    -0x1.8491016cdd08dc9be7ade9b5fef3p-5L,
+     0x1.7e7e2fbc6d49ad484300d6add324p-6L,
+    -0x1.e63fe3f874a37276a8d7d8b705ecp-8L,
+     0x1.30a2a73944f8c84998314d69c23fp-10L,
+};
+
+/* Coefficients of the polynomial used in the tgamma_central() subroutine
+ * for computing gamma on the interval [min_x,2] */
+static const long double coeffs_central_pos[] = {
+     0x1.b6c53f7377b83839c8a292e22aa2p-2L,
+    -0x1.0bae9f40c7d09ed76e72e1c955dep-3L,
+     0x1.4981175e14d04c3530ee5e1ecebcp-3L,
+    -0x1.79f77aaf032c948ac983d77f3e07p-4L,
+     0x1.1e97bd10821095ab7dc94936cc11p-4L,
+    -0x1.8071ce0935e4d7edef8cbf2a1cf1p-5L,
+     0x1.0b44c2f929837fafef7b5d9e80f1p-5L,
+    -0x1.6df1de1e175fe2a51faa25cddbb4p-6L,
+     0x1.f63f502be57d11aed2cfe90843ffp-7L,
+    -0x1.57fd67d852f230015b9f64770273p-7L,
+     0x1.d715138adc07e5fce81077070357p-8L,
+    -0x1.4271618e9fda8992a667adb15f4fp-8L,
+     0x1.b954d15d9eb772e80fdd760672d7p-9L,
+    -0x1.2dfe391241d3cb79c8c15182843dp-9L,
+     0x1.9d44396fcd48451c3ba924cee814p-10L,
+    -0x1.1ac195fb99739e341589e39803e6p-10L,
+     0x1.82e46127b68f002770826e25f146p-11L,
+    -0x1.089dacd90d9f41493119ac178359p-11L,
+     0x1.6993c007b20394a057d21f3d37f8p-12L,
+    -0x1.ec43a709f4446560c099dec8e31bp-13L,
+     0x1.4ba36322f4074e9add9450f003cap-13L,
+    -0x1.b3f83a977965ca1b7937bf5b34cap-14L,
+     0x1.10af346abc09cb25a6d9fe810b6ep-14L,
+    -0x1.38d8ea1188f242f50203edc395bdp-15L,
+     0x1.39add987a948ec56f62b721a4475p-16L,
+    -0x1.02a4e141f286c8a967e2df9bc9adp-17L,
+     0x1.433b50af22425f546e87113062d7p-19L,
+    -0x1.0c7b73cb0013f00aafc103e8e382p-21L,
+     0x1.b852de313ec38da2297f6deaa6b4p-25L,
+};
+
+/* 128-bit float value of pi, used by the sin_pi_x_over_pi subroutine
+ */
+static const long double pi =  0x1.921fb54442d18469898cc51701b8p+1L;
diff --git a/contrib/arm-optimized-routines/math/tools/cos.sollya b/contrib/arm-optimized-routines/math/tools/cos.sollya
index bd72d6b74820..6690adfcbb9b 100644
--- a/contrib/arm-optimized-routines/math/tools/cos.sollya
+++ b/contrib/arm-optimized-routines/math/tools/cos.sollya
@@ -1,7 +1,7 @@
 // polynomial for approximating cos(x)
 //
 // Copyright (c) 2019, Arm Limited.
-// SPDX-License-Identifier: MIT
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 8;   // polynomial degree
 a = -pi/4; // interval
diff --git a/contrib/arm-optimized-routines/math/tools/exp.sollya b/contrib/arm-optimized-routines/math/tools/exp.sollya
index b7a462cda5a4..0668bdb5b3d3 100644
--- a/contrib/arm-optimized-routines/math/tools/exp.sollya
+++ b/contrib/arm-optimized-routines/math/tools/exp.sollya
@@ -1,7 +1,7 @@
 // polynomial for approximating e^x
 //
 // Copyright (c) 2019, Arm Limited.
-// SPDX-License-Identifier: MIT
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 5; // poly degree
 N = 128; // table entries
diff --git a/contrib/arm-optimized-routines/math/tools/exp2.sollya b/contrib/arm-optimized-routines/math/tools/exp2.sollya
index e760769601d4..bd0a42d6bbcb 100644
--- a/contrib/arm-optimized-routines/math/tools/exp2.sollya
+++ b/contrib/arm-optimized-routines/math/tools/exp2.sollya
@@ -1,7 +1,7 @@
 // polynomial for approximating 2^x
 //
 // Copyright (c) 2019, Arm Limited.
-// SPDX-License-Identifier: MIT
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 // exp2f parameters
 deg = 3; // poly degree
diff --git a/contrib/arm-optimized-routines/math/tools/log.sollya b/contrib/arm-optimized-routines/math/tools/log.sollya
index 6df4db44b6f3..5288f5572925 100644
--- a/contrib/arm-optimized-routines/math/tools/log.sollya
+++ b/contrib/arm-optimized-routines/math/tools/log.sollya
@@ -1,7 +1,7 @@
 // polynomial for approximating log(1+x)
 //
 // Copyright (c) 2019, Arm Limited.
-// SPDX-License-Identifier: MIT
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 12; // poly degree
 // |log(1+x)| > 0x1p-4 outside the interval
diff --git a/contrib/arm-optimized-routines/math/tools/log2.sollya b/contrib/arm-optimized-routines/math/tools/log2.sollya
index 4a364c0f111f..85811be5d90c 100644
--- a/contrib/arm-optimized-routines/math/tools/log2.sollya
+++ b/contrib/arm-optimized-routines/math/tools/log2.sollya
@@ -1,7 +1,7 @@
 // polynomial for approximating log2(1+x)
 //
 // Copyright (c) 2019, Arm Limited.
-// SPDX-License-Identifier: MIT
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 11; // poly degree
 // |log2(1+x)| > 0x1p-4 outside the interval
diff --git a/contrib/arm-optimized-routines/math/tools/log2_abs.sollya b/contrib/arm-optimized-routines/math/tools/log2_abs.sollya
index 82c4dac26fa1..d018ba0145d2 100644
--- a/contrib/arm-optimized-routines/math/tools/log2_abs.sollya
+++ b/contrib/arm-optimized-routines/math/tools/log2_abs.sollya
@@ -1,7 +1,7 @@
 // polynomial for approximating log2(1+x)
 //
 // Copyright (c) 2019, Arm Limited.
-// SPDX-License-Identifier: MIT
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 7; // poly degree
 // interval ~= 1/(2*N), where N is the table entries
diff --git a/contrib/arm-optimized-routines/math/tools/log_abs.sollya b/contrib/arm-optimized-routines/math/tools/log_abs.sollya
index a2ac190fc497..5f9bfe41a683 100644
--- a/contrib/arm-optimized-routines/math/tools/log_abs.sollya
+++ b/contrib/arm-optimized-routines/math/tools/log_abs.sollya
@@ -1,7 +1,7 @@
 // polynomial for approximating log(1+x)
 //
 // Copyright (c) 2019, Arm Limited.
-// SPDX-License-Identifier: MIT
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 6; // poly degree
 // interval ~= 1/(2*N), where N is the table entries
diff --git a/contrib/arm-optimized-routines/math/tools/plot.py b/contrib/arm-optimized-routines/math/tools/plot.py
index 6c8b89ff284b..a0fa02322560 100755
--- a/contrib/arm-optimized-routines/math/tools/plot.py
+++ b/contrib/arm-optimized-routines/math/tools/plot.py
@@ -3,7 +3,7 @@
 # ULP error plot tool.
 #
 # Copyright (c) 2019, Arm Limited.
-# SPDX-License-Identifier: MIT
+# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 import numpy as np
 import matplotlib.pyplot as plt
diff --git a/contrib/arm-optimized-routines/math/tools/remez.jl b/contrib/arm-optimized-routines/math/tools/remez.jl
index 2ff436f5287f..1deab67d0660 100755
--- a/contrib/arm-optimized-routines/math/tools/remez.jl
+++ b/contrib/arm-optimized-routines/math/tools/remez.jl
@@ -4,7 +4,7 @@
 # remez.jl - implementation of the Remez algorithm for polynomial approximation
 #
 # Copyright (c) 2015-2019, Arm Limited.
-# SPDX-License-Identifier: MIT
+# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 import Base.\
 
diff --git a/contrib/arm-optimized-routines/math/tools/sin.sollya b/contrib/arm-optimized-routines/math/tools/sin.sollya
index a6e851145c11..a19300019867 100644
--- a/contrib/arm-optimized-routines/math/tools/sin.sollya
+++ b/contrib/arm-optimized-routines/math/tools/sin.sollya
@@ -1,7 +1,7 @@
 // polynomial for approximating sin(x)
 //
 // Copyright (c) 2019, Arm Limited.
-// SPDX-License-Identifier: MIT
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 7;   // polynomial degree
 a = -pi/4; // interval
diff --git a/contrib/arm-optimized-routines/math/tools/tgamma128_gen.jl b/contrib/arm-optimized-routines/math/tools/tgamma128_gen.jl
new file mode 100644
index 000000000000..ecec174110ea
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/tools/tgamma128_gen.jl
@@ -0,0 +1,212 @@
+# -*- julia -*-
+#
+# Generate tgamma128.h, containing polynomials and constants used by
+# tgamma128.c.
+#
+# Copyright (c) 2006-2023, Arm Limited.
+# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+# This Julia program depends on the 'Remez' and 'SpecialFunctions'
+# library packages. To install them, run this at the interactive Julia
+# prompt:
+#
+#   import Pkg; Pkg.add(["Remez", "SpecialFunctions"])
+#
+# Tested on Julia 1.4.1 (Ubuntu 20.04) and 1.9.0 (22.04).
+
+import Printf
+import Remez
+import SpecialFunctions
+
+# Round a BigFloat to 128-bit long double and format it as a C99 hex
+# float literal.
+function quadhex(x)
+    sign = " "
+    if x < 0
+        sign = "-"
+        x = -x
+    end
+
+    exponent = BigInt(floor(log2(x)))
+    exponent = max(exponent, -16382)
+    @assert(exponent <= 16383) # else overflow
+
+    x /= BigFloat(2)^exponent
+    @assert(1 <= x < 2)
+    x *= BigFloat(2)^112
+    mantissa = BigInt(round(x))
+
+    mantstr = string(mantissa, base=16, pad=29)
+    return Printf.@sprintf("%s0x%s.%sp%+dL", sign, mantstr[1], mantstr[2:end],
+                           exponent)
+end
+
+# Round a BigFloat to 128-bit long double and return it still as a
+# BigFloat.
+function quadval(x, round=0)
+    sign = +1
+    if x.sign < 0
+        sign = -1
+        x = -x
+    end
+
+    exponent = BigInt(floor(log2(x)))
+    exponent = max(exponent, -16382)
+    @assert(exponent <= 16383) # else overflow
+
+    x /= BigFloat(2)^exponent
+    @assert(1 <= x < 2)
+    x *= BigFloat(2)^112
+    if round < 0
+        mantissa = floor(x)
+    elseif round > 0
+        mantissa = ceil(x)
+    else
+        mantissa = round(x)
+    end
+
+    return sign * mantissa * BigFloat(2)^(exponent - 112)
+end
+
+# Output an array of BigFloats as a C array declaration.
+function dumparray(a, name)
+    println("static const long double ", name, "[] = {")
+    for x in N
+        println("    ", quadhex(x), ",")
+    end
+    println("};")
+end
+
+print("/*
+ * Polynomial coefficients and other constants for tgamma128.c.
+ *
+ * Copyright (c) 2006,2009,2023 Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+")
+
+Base.MPFR.setprecision(512)
+
+e = exp(BigFloat(1))
+
+print("
+/* The largest positive value for which 128-bit tgamma does not overflow. */
+")
+lo = BigFloat("1000")
+hi = BigFloat("2000")
+while true
+    global lo
+    global hi
+    global max_x
+
+    mid = (lo + hi) / 2
+    if mid == lo || mid == hi
+        max_x = mid
+        break
+    end
+    if SpecialFunctions.logabsgamma(mid)[1] < 16384 * log(BigFloat(2))
+        lo = mid
+    else
+        hi = mid
+    end
+end
+max_x = quadval(max_x, -1)
+println("static const long double max_x = ", quadhex(max_x), ";")
+
+print("
+/* Coefficients of the polynomial used in the tgamma_large() subroutine */
+")
+N, D, E, X = Remez.ratfn_minimax(
+    x -> x==0 ? sqrt(BigFloat(2)*pi/e) :
+                exp(SpecialFunctions.logabsgamma(1/x)[1] +
+                    (1/x-0.5)*(1+log(x))),
+    (0, 1/BigFloat(8)),
+    24, 0,
+    (x, y) -> 1/y
+)
+dumparray(N, "coeffs_large")
+
+print("
+/* Coefficients of the polynomial used in the tgamma_tiny() subroutine */
+")
+N, D, E, X = Remez.ratfn_minimax(
+    x -> x==0 ? 1 : 1/(x*SpecialFunctions.gamma(x)),
+    (0, 1/BigFloat(32)),
+    13, 0,
+)
+dumparray(N, "coeffs_tiny")
+
+print("
+/* The location within the interval [1,2] where gamma has a minimum.
+ * Specified as the sum of two 128-bit values, for extra precision. */
+")
+lo = BigFloat("1.4")
+hi = BigFloat("1.5")
+while true
+    global lo
+    global hi
+    global min_x
+
+    mid = (lo + hi) / 2
+    if mid == lo || mid == hi
+        min_x = mid
+        break
+    end
+    if SpecialFunctions.digamma(mid) < 0
+        lo = mid
+    else
+        hi = mid
+    end
+end
+min_x_hi = quadval(min_x, -1)
+println("static const long double min_x_hi = ", quadhex(min_x_hi), ";")
+println("static const long double min_x_lo = ", quadhex(min_x - min_x_hi), ";")
+
+print("
+/* The actual minimum value that gamma takes at that location.
+ * Again specified as the sum of two 128-bit values. */
+")
+min_y = SpecialFunctions.gamma(min_x)
+min_y_hi = quadval(min_y, -1)
+println("static const long double min_y_hi = ", quadhex(min_y_hi), ";")
+println("static const long double min_y_lo = ", quadhex(min_y - min_y_hi), ";")
+
+function taylor_bodge(x)
+    # Taylor series generated by Wolfram Alpha for (gamma(min_x+x)-min_y)/x^2.
+    # Used in the Remez calls below for x values very near the origin, to avoid
+    # significance loss problems when trying to compute it directly via that
+    # formula (even in MPFR's extra precision).
+    return BigFloat("0.428486815855585429730209907810650582960483696962660010556335457558784421896667728014324097132413696263704801646004585959298743677879606168187061990204432200")+x*(-BigFloat("0.130704158939785761928008749242671025181542078105370084716141350308119418619652583986015464395882363802104154017741656168641240436089858504560718773026275797")+x*(BigFloat("0.160890753325112844190519489594363387594505844658437718135952967735294789599989664428071656484587979507034160383271974554122934842441540146372016567834062876")+x*(-BigFloat("0.092277030213334350126864106458600575084335085690780082222880945224248438672595248111704471182201673989215223667543694847795410779036800385804729955729659506"))))
+end
+
+print("
+/* Coefficients of the polynomial used in the tgamma_central() subroutine
+ * for computing gamma on the interval [1,min_x] */
+")
+N, D, E, X = Remez.ratfn_minimax(
+    x -> x < BigFloat(0x1p-64) ? taylor_bodge(-x) :
+        (SpecialFunctions.gamma(min_x - x) - min_y) / (x*x),
+    (0, min_x - 1),
+    31, 0,
+    (x, y) -> x^2,
+)
+dumparray(N, "coeffs_central_neg")
+
+print("
+/* Coefficients of the polynomial used in the tgamma_central() subroutine
+ * for computing gamma on the interval [min_x,2] */
+")
+N, D, E, X = Remez.ratfn_minimax(
+    x -> x < BigFloat(0x1p-64) ? taylor_bodge(x) :
+        (SpecialFunctions.gamma(min_x + x) - min_y) / (x*x),
+    (0, 2 - min_x),
+    28, 0,
+    (x, y) -> x^2,
+)
+dumparray(N, "coeffs_central_pos")
+
+print("
+/* 128-bit float value of pi, used by the sin_pi_x_over_pi subroutine
+ */
+")
+println("static const long double pi = ", quadhex(BigFloat(pi)), ";")
diff --git a/contrib/arm-optimized-routines/math/tools/v_exp.sollya b/contrib/arm-optimized-routines/math/tools/v_exp.sollya
index c0abb63fb642..5fa7de7435a9 100644
--- a/contrib/arm-optimized-routines/math/tools/v_exp.sollya
+++ b/contrib/arm-optimized-routines/math/tools/v_exp.sollya
@@ -1,7 +1,7 @@
 // polynomial for approximating e^x
 //
 // Copyright (c) 2019, Arm Limited.
-// SPDX-License-Identifier: MIT
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 4; // poly degree
 N = 128; // table entries
diff --git a/contrib/arm-optimized-routines/math/tools/v_log.sollya b/contrib/arm-optimized-routines/math/tools/v_log.sollya
index cc3d2c4ae72a..d982524eb920 100644
--- a/contrib/arm-optimized-routines/math/tools/v_log.sollya
+++ b/contrib/arm-optimized-routines/math/tools/v_log.sollya
@@ -1,7 +1,7 @@
 // polynomial used for __v_log(x)
 //
 // Copyright (c) 2019, Arm Limited.
-// SPDX-License-Identifier: MIT
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 6; // poly degree
 a = -0x1.fc1p-9;
diff --git a/contrib/arm-optimized-routines/math/tools/v_sin.sollya b/contrib/arm-optimized-routines/math/tools/v_sin.sollya
index 65cc9957c624..63b9d65a1ac3 100644
--- a/contrib/arm-optimized-routines/math/tools/v_sin.sollya
+++ b/contrib/arm-optimized-routines/math/tools/v_sin.sollya
@@ -1,7 +1,7 @@
 // polynomial for approximating sin(x)
 //
 // Copyright (c) 2019, Arm Limited.
-// SPDX-License-Identifier: MIT
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 
 deg = 15;  // polynomial degree
 a = -pi/2; // interval
diff --git a/contrib/arm-optimized-routines/math/v_cos.c b/contrib/arm-optimized-routines/math/v_cos.c
deleted file mode 100644
index 20ba6bd0d0d9..000000000000
--- a/contrib/arm-optimized-routines/math/v_cos.c
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Double-precision vector cos function.
- *
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-
-#include "mathlib.h"
-#include "v_math.h"
-#if V_SUPPORTED
-
-static const double Poly[] = {
-/* worst-case error is 3.5 ulp.
-   abs error: 0x1.be222a58p-53 in [-pi/2, pi/2].  */
--0x1.9f4a9c8b21dc9p-41,
- 0x1.60e88a10163f2p-33,
--0x1.ae6361b7254e7p-26,
- 0x1.71de382e8d62bp-19,
--0x1.a01a019aeb4ffp-13,
- 0x1.111111110b25ep-7,
--0x1.55555555554c3p-3,
-};
-
-#define C7 v_f64 (Poly[0])
-#define C6 v_f64 (Poly[1])
-#define C5 v_f64 (Poly[2])
-#define C4 v_f64 (Poly[3])
-#define C3 v_f64 (Poly[4])
-#define C2 v_f64 (Poly[5])
-#define C1 v_f64 (Poly[6])
-
-#define InvPi v_f64 (0x1.45f306dc9c883p-2)
-#define HalfPi v_f64 (0x1.921fb54442d18p+0)
-#define Pi1 v_f64 (0x1.921fb54442d18p+1)
-#define Pi2 v_f64 (0x1.1a62633145c06p-53)
-#define Pi3 v_f64 (0x1.c1cd129024e09p-106)
-#define Shift v_f64 (0x1.8p52)
-#define RangeVal v_f64 (0x1p23)
-#define AbsMask v_u64 (0x7fffffffffffffff)
-
-VPCS_ATTR
-__attribute__ ((noinline)) static v_f64_t
-specialcase (v_f64_t x, v_f64_t y, v_u64_t cmp)
-{
-  return v_call_f64 (cos, x, y, cmp);
-}
-
-VPCS_ATTR
-v_f64_t
-V_NAME(cos) (v_f64_t x)
-{
-  v_f64_t n, r, r2, y;
-  v_u64_t odd, cmp;
-
-  r = v_as_f64_u64 (v_as_u64_f64 (x) & AbsMask);
-  cmp = v_cond_u64 (v_as_u64_f64 (r) >= v_as_u64_f64 (RangeVal));
-
-  /* n = rint((|x|+pi/2)/pi) - 0.5.  */
-  n = v_fma_f64 (InvPi, r + HalfPi, Shift);
-  odd = v_as_u64_f64 (n) << 63;
-  n -= Shift;
-  n -= v_f64 (0.5);
-
-  /* r = |x| - n*pi  (range reduction into -pi/2 .. pi/2).  */
-  r = v_fma_f64 (-Pi1, n, r);
-  r = v_fma_f64 (-Pi2, n, r);
-  r = v_fma_f64 (-Pi3, n, r);
-
-  /* sin(r) poly approx.  */
-  r2 = r * r;
-  y = v_fma_f64 (C7, r2, C6);
-  y = v_fma_f64 (y, r2, C5);
-  y = v_fma_f64 (y, r2, C4);
-  y = v_fma_f64 (y, r2, C3);
-  y = v_fma_f64 (y, r2, C2);
-  y = v_fma_f64 (y, r2, C1);
-  y = v_fma_f64 (y * r2, r, r);
-
-  /* sign.  */
-  y = v_as_f64_u64 (v_as_u64_f64 (y) ^ odd);
-
-  if (unlikely (v_any_u64 (cmp)))
-    return specialcase (x, y, cmp);
-  return y;
-}
-VPCS_ALIAS
-#endif
diff --git a/contrib/arm-optimized-routines/math/v_cosf.c b/contrib/arm-optimized-routines/math/v_cosf.c
deleted file mode 100644
index 150294b8845e..000000000000
--- a/contrib/arm-optimized-routines/math/v_cosf.c
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Single-precision vector cos function.
- *
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-
-#include "mathlib.h"
-#include "v_math.h"
-#if V_SUPPORTED
-
-static const float Poly[] = {
-  /* 1.886 ulp error */
-  0x1.5b2e76p-19f,
-  -0x1.9f42eap-13f,
-  0x1.110df4p-7f,
-  -0x1.555548p-3f,
-};
-#define Pi1 v_f32 (0x1.921fb6p+1f)
-#define Pi2 v_f32 (-0x1.777a5cp-24f)
-#define Pi3 v_f32 (-0x1.ee59dap-49f)
-#define A3 v_f32 (Poly[3])
-#define A5 v_f32 (Poly[2])
-#define A7 v_f32 (Poly[1])
-#define A9 v_f32 (Poly[0])
-#define RangeVal v_f32 (0x1p20f)
-#define InvPi v_f32 (0x1.45f306p-2f)
-#define Shift v_f32 (0x1.8p+23f)
-#define AbsMask v_u32 (0x7fffffff)
-#define HalfPi v_f32 (0x1.921fb6p0f)
-
-VPCS_ATTR
-static v_f32_t
-specialcase (v_f32_t x, v_f32_t y, v_u32_t cmp)
-{
-  /* Fall back to scalar code.  */
-  return v_call_f32 (cosf, x, y, cmp);
-}
-
-VPCS_ATTR
-v_f32_t
-V_NAME(cosf) (v_f32_t x)
-{
-  v_f32_t n, r, r2, y;
-  v_u32_t odd, cmp;
-
-  r = v_as_f32_u32 (v_as_u32_f32 (x) & AbsMask);
-  cmp = v_cond_u32 (v_as_u32_f32 (r) >= v_as_u32_f32 (RangeVal));
-
-  /* n = rint((|x|+pi/2)/pi) - 0.5 */
-  n = v_fma_f32 (InvPi, r + HalfPi, Shift);
-  odd = v_as_u32_f32 (n) << 31;
-  n -= Shift;
-  n -= v_f32 (0.5f);
-
-  /* r = |x| - n*pi  (range reduction into -pi/2 .. pi/2) */
-  r = v_fma_f32 (-Pi1, n, r);
-  r = v_fma_f32 (-Pi2, n, r);
-  r = v_fma_f32 (-Pi3, n, r);
-
-  /* y = sin(r) */
-  r2 = r * r;
-  y = v_fma_f32 (A9, r2, A7);
-  y = v_fma_f32 (y, r2, A5);
-  y = v_fma_f32 (y, r2, A3);
-  y = v_fma_f32 (y * r2, r, r);
-
-  /* sign fix */
-  y = v_as_f32_u32 (v_as_u32_f32 (y) ^ odd);
-
-  if (unlikely (v_any_u32 (cmp)))
-    return specialcase (x, y, cmp);
-  return y;
-}
-VPCS_ALIAS
-#endif
diff --git a/contrib/arm-optimized-routines/math/v_exp.c b/contrib/arm-optimized-routines/math/v_exp.c
deleted file mode 100644
index e459d53fddd2..000000000000
--- a/contrib/arm-optimized-routines/math/v_exp.c
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Double-precision vector e^x function.
- *
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-
-#include "mathlib.h"
-#include "v_math.h"
-#if V_SUPPORTED
-#include "v_exp.h"
-
-#if V_EXP_TABLE_BITS == 7
-/* maxerr: 1.88 +0.5 ulp
-   rel error: 1.4337*2^-53
-   abs error: 1.4299*2^-53 in [ -ln2/256, ln2/256 ].  */
-#define C1 v_f64 (0x1.ffffffffffd43p-2)
-#define C2 v_f64 (0x1.55555c75adbb2p-3)
-#define C3 v_f64 (0x1.55555da646206p-5)
-#define InvLn2 v_f64 (0x1.71547652b82fep7) /* N/ln2.  */
-#define Ln2hi v_f64 (0x1.62e42fefa39efp-8) /* ln2/N.  */
-#define Ln2lo v_f64 (0x1.abc9e3b39803f3p-63)
-#elif V_EXP_TABLE_BITS == 8
-/* maxerr: 0.54 +0.5 ulp
-   rel error: 1.4318*2^-58
-   abs error: 1.4299*2^-58 in [ -ln2/512, ln2/512 ].  */
-#define C1 v_f64 (0x1.fffffffffffd4p-2)
-#define C2 v_f64 (0x1.5555571d6b68cp-3)
-#define C3 v_f64 (0x1.5555576a59599p-5)
-#define InvLn2 v_f64 (0x1.71547652b82fep8)
-#define Ln2hi v_f64 (0x1.62e42fefa39efp-9)
-#define Ln2lo v_f64 (0x1.abc9e3b39803f3p-64)
-#endif
-
-#define N (1 << V_EXP_TABLE_BITS)
-#define Tab __v_exp_data
-#define IndexMask v_u64 (N - 1)
-#define Shift v_f64 (0x1.8p+52)
-#define Thres v_f64 (704.0)
-
-VPCS_ATTR
-static v_f64_t
-specialcase (v_f64_t s, v_f64_t y, v_f64_t n)
-{
-  v_f64_t absn = v_abs_f64 (n);
-
-  /* 2^(n/N) may overflow, break it up into s1*s2.  */
-  v_u64_t b = v_cond_u64 (n <= v_f64 (0.0)) & v_u64 (0x6000000000000000);
-  v_f64_t s1 = v_as_f64_u64 (v_u64 (0x7000000000000000) - b);
-  v_f64_t s2 = v_as_f64_u64 (v_as_u64_f64 (s) - v_u64 (0x3010000000000000) + b);
-  v_u64_t cmp = v_cond_u64 (absn > v_f64 (1280.0 * N));
-  v_f64_t r1 = s1 * s1;
-  v_f64_t r0 = v_fma_f64 (y, s2, s2) * s1;
-  return v_as_f64_u64 ((cmp & v_as_u64_f64 (r1)) | (~cmp & v_as_u64_f64 (r0)));
-}
-
-VPCS_ATTR
-v_f64_t
-V_NAME(exp) (v_f64_t x)
-{
-  v_f64_t n, r, r2, s, y, z;
-  v_u64_t cmp, u, e, i;
-
-  cmp = v_cond_u64 (v_abs_f64 (x) > Thres);
-
-  /* n = round(x/(ln2/N)).  */
-  z = v_fma_f64 (x, InvLn2, Shift);
-  u = v_as_u64_f64 (z);
-  n = z - Shift;
-
-  /* r = x - n*ln2/N.  */
-  r = x;
-  r = v_fma_f64 (-Ln2hi, n, r);
-  r = v_fma_f64 (-Ln2lo, n, r);
-
-  e = u << (52 - V_EXP_TABLE_BITS);
-  i = u & IndexMask;
-
-  /* y = exp(r) - 1 ~= r + C1 r^2 + C2 r^3 + C3 r^4.  */
-  r2 = r * r;
-  y = v_fma_f64 (C2, r, C1);
-  y = v_fma_f64 (C3, r2, y);
-  y = v_fma_f64 (y, r2, r);
-
-  /* s = 2^(n/N).  */
-  u = v_lookup_u64 (Tab, i);
-  s = v_as_f64_u64 (u + e);
-
-  if (unlikely (v_any_u64 (cmp)))
-    return specialcase (s, y, n);
-  return v_fma_f64 (y, s, s);
-}
-VPCS_ALIAS
-#endif
diff --git a/contrib/arm-optimized-routines/math/v_exp.h b/contrib/arm-optimized-routines/math/v_exp.h
deleted file mode 100644
index 305da19c0a53..000000000000
--- a/contrib/arm-optimized-routines/math/v_exp.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * Declarations for double-precision e^x vector function.
- *
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-
-#include "v_math.h"
-#if WANT_VMATH
-
-#define V_EXP_TABLE_BITS 7
-
-extern const u64_t __v_exp_data[1 << V_EXP_TABLE_BITS] HIDDEN;
-#endif
diff --git a/contrib/arm-optimized-routines/math/v_exp2f.c b/contrib/arm-optimized-routines/math/v_exp2f.c
deleted file mode 100644
index e3ea5af3414d..000000000000
--- a/contrib/arm-optimized-routines/math/v_exp2f.c
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Single-precision vector 2^x function.
- *
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-
-#include "mathlib.h"
-#include "v_math.h"
-#if V_SUPPORTED
-
-static const float Poly[] = {
-  /* maxerr: 1.962 ulp.  */
-  0x1.59977ap-10f,
-  0x1.3ce9e4p-7f,
-  0x1.c6bd32p-5f,
-  0x1.ebf9bcp-3f,
-  0x1.62e422p-1f,
-};
-#define C0 v_f32 (Poly[0])
-#define C1 v_f32 (Poly[1])
-#define C2 v_f32 (Poly[2])
-#define C3 v_f32 (Poly[3])
-#define C4 v_f32 (Poly[4])
-
-#define Shift v_f32 (0x1.8p23f)
-
-VPCS_ATTR
-static v_f32_t
-specialcase (v_f32_t poly, v_f32_t n, v_u32_t e, v_f32_t absn, v_u32_t cmp1, v_f32_t scale)
-{
-  /* 2^n may overflow, break it up into s1*s2.  */
-  v_u32_t b = v_cond_u32 (n <= v_f32 (0.0f)) & v_u32 (0x82000000);
-  v_f32_t s1 = v_as_f32_u32 (v_u32 (0x7f000000) + b);
-  v_f32_t s2 = v_as_f32_u32 (e - b);
-  v_u32_t cmp2 = v_cond_u32 (absn > v_f32 (192.0f));
-  v_u32_t r2 = v_as_u32_f32 (s1 * s1);
-  v_u32_t r1 = v_as_u32_f32 (v_fma_f32 (poly, s2, s2) * s1);
-  /* Similar to r1 but avoids double rounding in the subnormal range.  */
-  v_u32_t r0 = v_as_u32_f32 (v_fma_f32 (poly, scale, scale));
-  return v_as_f32_u32 ((cmp2 & r2) | (~cmp2 & cmp1 & r1) | (~cmp1 & r0));
-}
-
-VPCS_ATTR
-v_f32_t
-V_NAME(exp2f) (v_f32_t x)
-{
-  v_f32_t n, r, r2, scale, p, q, poly, absn;
-  v_u32_t cmp, e;
-
-  /* exp2(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
-     x = n + r, with r in [-1/2, 1/2].  */
-#if 0
-  v_f32_t z;
-  z = x + Shift;
-  n = z - Shift;
-  r = x - n;
-  e = v_as_u32_f32 (z) << 23;
-#else
-  n = v_round_f32 (x);
-  r = x - n;
-  e = v_as_u32_s32 (v_round_s32 (x)) << 23;
-#endif
-  scale = v_as_f32_u32 (e + v_u32 (0x3f800000));
-  absn = v_abs_f32 (n);
-  cmp = v_cond_u32 (absn > v_f32 (126.0f));
-  r2 = r * r;
-  p = v_fma_f32 (C0, r, C1);
-  q = v_fma_f32 (C2, r, C3);
-  q = v_fma_f32 (p, r2, q);
-  p = C4 * r;
-  poly = v_fma_f32 (q, r2, p);
-  if (unlikely (v_any_u32 (cmp)))
-    return specialcase (poly, n, e, absn, cmp, scale);
-  return v_fma_f32 (poly, scale, scale);
-}
-VPCS_ALIAS
-#endif
diff --git a/contrib/arm-optimized-routines/math/v_exp2f_1u.c b/contrib/arm-optimized-routines/math/v_exp2f_1u.c
deleted file mode 100644
index 1caa14d9bfff..000000000000
--- a/contrib/arm-optimized-routines/math/v_exp2f_1u.c
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Single-precision vector 2^x function.
- *
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-
-#include "mathlib.h"
-#include "v_math.h"
-#if V_SUPPORTED
-
-static const float Poly[] = {
-  /*  maxerr: 0.878 ulp.  */
-  0x1.416b5ep-13f, 0x1.5f082ep-10f, 0x1.3b2dep-7f, 0x1.c6af7cp-5f, 0x1.ebfbdcp-3f, 0x1.62e43p-1f
-};
-#define C0 v_f32 (Poly[0])
-#define C1 v_f32 (Poly[1])
-#define C2 v_f32 (Poly[2])
-#define C3 v_f32 (Poly[3])
-#define C4 v_f32 (Poly[4])
-#define C5 v_f32 (Poly[5])
-
-#define Shift v_f32 (0x1.8p23f)
-#define InvLn2 v_f32 (0x1.715476p+0f)
-#define Ln2hi v_f32 (0x1.62e4p-1f)
-#define Ln2lo v_f32 (0x1.7f7d1cp-20f)
-
-VPCS_ATTR
-static v_f32_t
-specialcase (v_f32_t poly, v_f32_t n, v_u32_t e, v_f32_t absn)
-{
-  /* 2^n may overflow, break it up into s1*s2.  */
-  v_u32_t b = v_cond_u32 (n <= v_f32 (0.0f)) & v_u32 (0x83000000);
-  v_f32_t s1 = v_as_f32_u32 (v_u32 (0x7f000000) + b);
-  v_f32_t s2 = v_as_f32_u32 (e - b);
-  v_u32_t cmp = v_cond_u32 (absn > v_f32 (192.0f));
-  v_f32_t r1 = s1 * s1;
-  v_f32_t r0 = poly * s1 * s2;
-  return v_as_f32_u32 ((cmp & v_as_u32_f32 (r1)) | (~cmp & v_as_u32_f32 (r0)));
-}
-
-VPCS_ATTR
-v_f32_t
-V_NAME(exp2f_1u) (v_f32_t x)
-{
-  v_f32_t n, r, scale, poly, absn;
-  v_u32_t cmp, e;
-
-  /* exp2(x) = 2^n * poly(r), with poly(r) in [1/sqrt(2),sqrt(2)]
-     x = n + r, with r in [-1/2, 1/2].  */
-#if 0
-  v_f32_t z;
-  z = x + Shift;
-  n = z - Shift;
-  r = x - n;
-  e = v_as_u32_f32 (z) << 23;
-#else
-  n = v_round_f32 (x);
-  r = x - n;
-  e = v_as_u32_s32 (v_round_s32 (x)) << 23;
-#endif
-  scale = v_as_f32_u32 (e + v_u32 (0x3f800000));
-  absn = v_abs_f32 (n);
-  cmp = v_cond_u32 (absn > v_f32 (126.0f));
-  poly = v_fma_f32 (C0, r, C1);
-  poly = v_fma_f32 (poly, r, C2);
-  poly = v_fma_f32 (poly, r, C3);
-  poly = v_fma_f32 (poly, r, C4);
-  poly = v_fma_f32 (poly, r, C5);
-  poly = v_fma_f32 (poly, r, v_f32 (1.0f));
-  if (unlikely (v_any_u32 (cmp)))
-    return specialcase (poly, n, e, absn);
-  return scale * poly;
-}
-#endif
diff --git a/contrib/arm-optimized-routines/math/v_exp_data.c b/contrib/arm-optimized-routines/math/v_exp_data.c
deleted file mode 100644
index 365355497e95..000000000000
--- a/contrib/arm-optimized-routines/math/v_exp_data.c
+++ /dev/null
@@ -1,403 +0,0 @@
-/*
- * Lookup table for double-precision e^x vector function.
- *
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-
-#include "v_exp.h"
-#if WANT_VMATH
-
-#define N (1 << V_EXP_TABLE_BITS)
-
-/* 2^(j/N), j=0..N.  */
-const u64_t __v_exp_data[] = {
-#if N == 128
-0x3ff0000000000000,
-0x3feff63da9fb3335,
-0x3fefec9a3e778061,
-0x3fefe315e86e7f85,
-0x3fefd9b0d3158574,
-0x3fefd06b29ddf6de,
-0x3fefc74518759bc8,
-0x3fefbe3ecac6f383,
-0x3fefb5586cf9890f,
-0x3fefac922b7247f7,
-0x3fefa3ec32d3d1a2,
-0x3fef9b66affed31b,
-0x3fef9301d0125b51,
-0x3fef8abdc06c31cc,
-0x3fef829aaea92de0,
-0x3fef7a98c8a58e51,
-0x3fef72b83c7d517b,
-0x3fef6af9388c8dea,
-0x3fef635beb6fcb75,
-0x3fef5be084045cd4,
-0x3fef54873168b9aa,
-0x3fef4d5022fcd91d,
-0x3fef463b88628cd6,
-0x3fef3f49917ddc96,
-0x3fef387a6e756238,
-0x3fef31ce4fb2a63f,
-0x3fef2b4565e27cdd,
-0x3fef24dfe1f56381,
-0x3fef1e9df51fdee1,
-0x3fef187fd0dad990,
-0x3fef1285a6e4030b,
-0x3fef0cafa93e2f56,
-0x3fef06fe0a31b715,
-0x3fef0170fc4cd831,
-0x3feefc08b26416ff,
-0x3feef6c55f929ff1,
-0x3feef1a7373aa9cb,
-0x3feeecae6d05d866,
-0x3feee7db34e59ff7,
-0x3feee32dc313a8e5,
-0x3feedea64c123422,
-0x3feeda4504ac801c,
-0x3feed60a21f72e2a,
-0x3feed1f5d950a897,
-0x3feece086061892d,
-0x3feeca41ed1d0057,
-0x3feec6a2b5c13cd0,
-0x3feec32af0d7d3de,
-0x3feebfdad5362a27,
-0x3feebcb299fddd0d,
-0x3feeb9b2769d2ca7,
-0x3feeb6daa2cf6642,
-0x3feeb42b569d4f82,
-0x3feeb1a4ca5d920f,
-0x3feeaf4736b527da,
-0x3feead12d497c7fd,
-0x3feeab07dd485429,
-0x3feea9268a5946b7,
-0x3feea76f15ad2148,
-0x3feea5e1b976dc09,
-0x3feea47eb03a5585,
-0x3feea34634ccc320,
-0x3feea23882552225,
-0x3feea155d44ca973,
-0x3feea09e667f3bcd,
-0x3feea012750bdabf,
-0x3fee9fb23c651a2f,
-0x3fee9f7df9519484,
-0x3fee9f75e8ec5f74,
-0x3fee9f9a48a58174,
-0x3fee9feb564267c9,
-0x3feea0694fde5d3f,
-0x3feea11473eb0187,
-0x3feea1ed0130c132,
-0x3feea2f336cf4e62,
-0x3feea427543e1a12,
-0x3feea589994cce13,
-0x3feea71a4623c7ad,
-0x3feea8d99b4492ed,
-0x3feeaac7d98a6699,
-0x3feeace5422aa0db,
-0x3feeaf3216b5448c,
-0x3feeb1ae99157736,
-0x3feeb45b0b91ffc6,
-0x3feeb737b0cdc5e5,
-0x3feeba44cbc8520f,
-0x3feebd829fde4e50,
-0x3feec0f170ca07ba,
-0x3feec49182a3f090,
-0x3feec86319e32323,
-0x3feecc667b5de565,
-0x3feed09bec4a2d33,
-0x3feed503b23e255d,
-0x3feed99e1330b358,
-0x3feede6b5579fdbf,
-0x3feee36bbfd3f37a,
-0x3feee89f995ad3ad,
-0x3feeee07298db666,
-0x3feef3a2b84f15fb,
-0x3feef9728de5593a,
-0x3feeff76f2fb5e47,
-0x3fef05b030a1064a,
-0x3fef0c1e904bc1d2,
-0x3fef12c25bd71e09,
-0x3fef199bdd85529c,
-0x3fef20ab5fffd07a,
-0x3fef27f12e57d14b,
-0x3fef2f6d9406e7b5,
-0x3fef3720dcef9069,
-0x3fef3f0b555dc3fa,
-0x3fef472d4a07897c,
-0x3fef4f87080d89f2,
-0x3fef5818dcfba487,
-0x3fef60e316c98398,
-0x3fef69e603db3285,
-0x3fef7321f301b460,
-0x3fef7c97337b9b5f,
-0x3fef864614f5a129,
-0x3fef902ee78b3ff6,
-0x3fef9a51fbc74c83,
-0x3fefa4afa2a490da,
-0x3fefaf482d8e67f1,
-0x3fefba1bee615a27,
-0x3fefc52b376bba97,
-0x3fefd0765b6e4540,
-0x3fefdbfdad9cbe14,
-0x3fefe7c1819e90d8,
-0x3feff3c22b8f71f1,
-#elif N == 256
-0x3ff0000000000000,
-0x3feffb1afa5abcbf,
-0x3feff63da9fb3335,
-0x3feff168143b0281,
-0x3fefec9a3e778061,
-0x3fefe7d42e11bbcc,
-0x3fefe315e86e7f85,
-0x3fefde5f72f654b1,
-0x3fefd9b0d3158574,
-0x3fefd50a0e3c1f89,
-0x3fefd06b29ddf6de,
-0x3fefcbd42b72a836,
-0x3fefc74518759bc8,
-0x3fefc2bdf66607e0,
-0x3fefbe3ecac6f383,
-0x3fefb9c79b1f3919,
-0x3fefb5586cf9890f,
-0x3fefb0f145e46c85,
-0x3fefac922b7247f7,
-0x3fefa83b23395dec,
-0x3fefa3ec32d3d1a2,
-0x3fef9fa55fdfa9c5,
-0x3fef9b66affed31b,
-0x3fef973028d7233e,
-0x3fef9301d0125b51,
-0x3fef8edbab5e2ab6,
-0x3fef8abdc06c31cc,
-0x3fef86a814f204ab,
-0x3fef829aaea92de0,
-0x3fef7e95934f312e,
-0x3fef7a98c8a58e51,
-0x3fef76a45471c3c2,
-0x3fef72b83c7d517b,
-0x3fef6ed48695bbc0,
-0x3fef6af9388c8dea,
-0x3fef672658375d2f,
-0x3fef635beb6fcb75,
-0x3fef5f99f8138a1c,
-0x3fef5be084045cd4,
-0x3fef582f95281c6b,
-0x3fef54873168b9aa,
-0x3fef50e75eb44027,
-0x3fef4d5022fcd91d,
-0x3fef49c18438ce4d,
-0x3fef463b88628cd6,
-0x3fef42be3578a819,
-0x3fef3f49917ddc96,
-0x3fef3bdda27912d1,
-0x3fef387a6e756238,
-0x3fef351ffb82140a,
-0x3fef31ce4fb2a63f,
-0x3fef2e85711ece75,
-0x3fef2b4565e27cdd,
-0x3fef280e341ddf29,
-0x3fef24dfe1f56381,
-0x3fef21ba7591bb70,
-0x3fef1e9df51fdee1,
-0x3fef1b8a66d10f13,
-0x3fef187fd0dad990,
-0x3fef157e39771b2f,
-0x3fef1285a6e4030b,
-0x3fef0f961f641589,
-0x3fef0cafa93e2f56,
-0x3fef09d24abd886b,
-0x3fef06fe0a31b715,
-0x3fef0432edeeb2fd,
-0x3fef0170fc4cd831,
-0x3feefeb83ba8ea32,
-0x3feefc08b26416ff,
-0x3feef96266e3fa2d,
-0x3feef6c55f929ff1,
-0x3feef431a2de883b,
-0x3feef1a7373aa9cb,
-0x3feeef26231e754a,
-0x3feeecae6d05d866,
-0x3feeea401b7140ef,
-0x3feee7db34e59ff7,
-0x3feee57fbfec6cf4,
-0x3feee32dc313a8e5,
-0x3feee0e544ede173,
-0x3feedea64c123422,
-0x3feedc70df1c5175,
-0x3feeda4504ac801c,
-0x3feed822c367a024,
-0x3feed60a21f72e2a,
-0x3feed3fb2709468a,
-0x3feed1f5d950a897,
-0x3feecffa3f84b9d4,
-0x3feece086061892d,
-0x3feecc2042a7d232,
-0x3feeca41ed1d0057,
-0x3feec86d668b3237,
-0x3feec6a2b5c13cd0,
-0x3feec4e1e192aed2,
-0x3feec32af0d7d3de,
-0x3feec17dea6db7d7,
-0x3feebfdad5362a27,
-0x3feebe41b817c114,
-0x3feebcb299fddd0d,
-0x3feebb2d81d8abff,
-0x3feeb9b2769d2ca7,
-0x3feeb8417f4531ee,
-0x3feeb6daa2cf6642,
-0x3feeb57de83f4eef,
-0x3feeb42b569d4f82,
-0x3feeb2e2f4f6ad27,
-0x3feeb1a4ca5d920f,
-0x3feeb070dde910d2,
-0x3feeaf4736b527da,
-0x3feeae27dbe2c4cf,
-0x3feead12d497c7fd,
-0x3feeac0827ff07cc,
-0x3feeab07dd485429,
-0x3feeaa11fba87a03,
-0x3feea9268a5946b7,
-0x3feea84590998b93,
-0x3feea76f15ad2148,
-0x3feea6a320dceb71,
-0x3feea5e1b976dc09,
-0x3feea52ae6cdf6f4,
-0x3feea47eb03a5585,
-0x3feea3dd1d1929fd,
-0x3feea34634ccc320,
-0x3feea2b9febc8fb7,
-0x3feea23882552225,
-0x3feea1c1c70833f6,
-0x3feea155d44ca973,
-0x3feea0f4b19e9538,
-0x3feea09e667f3bcd,
-0x3feea052fa75173e,
-0x3feea012750bdabf,
-0x3fee9fdcddd47645,
-0x3fee9fb23c651a2f,
-0x3fee9f9298593ae5,
-0x3fee9f7df9519484,
-0x3fee9f7466f42e87,
-0x3fee9f75e8ec5f74,
-0x3fee9f8286ead08a,
-0x3fee9f9a48a58174,
-0x3fee9fbd35d7cbfd,
-0x3fee9feb564267c9,
-0x3feea024b1ab6e09,
-0x3feea0694fde5d3f,
-0x3feea0b938ac1cf6,
-0x3feea11473eb0187,
-0x3feea17b0976cfdb,
-0x3feea1ed0130c132,
-0x3feea26a62ff86f0,
-0x3feea2f336cf4e62,
-0x3feea3878491c491,
-0x3feea427543e1a12,
-0x3feea4d2add106d9,
-0x3feea589994cce13,
-0x3feea64c1eb941f7,
-0x3feea71a4623c7ad,
-0x3feea7f4179f5b21,
-0x3feea8d99b4492ed,
-0x3feea9cad931a436,
-0x3feeaac7d98a6699,
-0x3feeabd0a478580f,
-0x3feeace5422aa0db,
-0x3feeae05bad61778,
-0x3feeaf3216b5448c,
-0x3feeb06a5e0866d9,
-0x3feeb1ae99157736,
-0x3feeb2fed0282c8a,
-0x3feeb45b0b91ffc6,
-0x3feeb5c353aa2fe2,
-0x3feeb737b0cdc5e5,
-0x3feeb8b82b5f98e5,
-0x3feeba44cbc8520f,
-0x3feebbdd9a7670b3,
-0x3feebd829fde4e50,
-0x3feebf33e47a22a2,
-0x3feec0f170ca07ba,
-0x3feec2bb4d53fe0d,
-0x3feec49182a3f090,
-0x3feec674194bb8d5,
-0x3feec86319e32323,
-0x3feeca5e8d07f29e,
-0x3feecc667b5de565,
-0x3feece7aed8eb8bb,
-0x3feed09bec4a2d33,
-0x3feed2c980460ad8,
-0x3feed503b23e255d,
-0x3feed74a8af46052,
-0x3feed99e1330b358,
-0x3feedbfe53c12e59,
-0x3feede6b5579fdbf,
-0x3feee0e521356eba,
-0x3feee36bbfd3f37a,
-0x3feee5ff3a3c2774,
-0x3feee89f995ad3ad,
-0x3feeeb4ce622f2ff,
-0x3feeee07298db666,
-0x3feef0ce6c9a8952,
-0x3feef3a2b84f15fb,
-0x3feef68415b749b1,
-0x3feef9728de5593a,
-0x3feefc6e29f1c52a,
-0x3feeff76f2fb5e47,
-0x3fef028cf22749e4,
-0x3fef05b030a1064a,
-0x3fef08e0b79a6f1f,
-0x3fef0c1e904bc1d2,
-0x3fef0f69c3f3a207,
-0x3fef12c25bd71e09,
-0x3fef16286141b33d,
-0x3fef199bdd85529c,
-0x3fef1d1cd9fa652c,
-0x3fef20ab5fffd07a,
-0x3fef244778fafb22,
-0x3fef27f12e57d14b,
-0x3fef2ba88988c933,
-0x3fef2f6d9406e7b5,
-0x3fef33405751c4db,
-0x3fef3720dcef9069,
-0x3fef3b0f2e6d1675,
-0x3fef3f0b555dc3fa,
-0x3fef43155b5bab74,
-0x3fef472d4a07897c,
-0x3fef4b532b08c968,
-0x3fef4f87080d89f2,
-0x3fef53c8eacaa1d6,
-0x3fef5818dcfba487,
-0x3fef5c76e862e6d3,
-0x3fef60e316c98398,
-0x3fef655d71ff6075,
-0x3fef69e603db3285,
-0x3fef6e7cd63a8315,
-0x3fef7321f301b460,
-0x3fef77d5641c0658,
-0x3fef7c97337b9b5f,
-0x3fef81676b197d17,
-0x3fef864614f5a129,
-0x3fef8b333b16ee12,
-0x3fef902ee78b3ff6,
-0x3fef953924676d76,
-0x3fef9a51fbc74c83,
-0x3fef9f7977cdb740,
-0x3fefa4afa2a490da,
-0x3fefa9f4867cca6e,
-0x3fefaf482d8e67f1,
-0x3fefb4aaa2188510,
-0x3fefba1bee615a27,
-0x3fefbf9c1cb6412a,
-0x3fefc52b376bba97,
-0x3fefcac948dd7274,
-0x3fefd0765b6e4540,
-0x3fefd632798844f8,
-0x3fefdbfdad9cbe14,
-0x3fefe1d802243c89,
-0x3fefe7c1819e90d8,
-0x3fefedba3692d514,
-0x3feff3c22b8f71f1,
-0x3feff9d96b2a23d9,
-#endif
-};
-#endif
diff --git a/contrib/arm-optimized-routines/math/v_expf.c b/contrib/arm-optimized-routines/math/v_expf.c
deleted file mode 100644
index d403e00534f0..000000000000
--- a/contrib/arm-optimized-routines/math/v_expf.c
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Single-precision vector e^x function.
- *
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-
-#include "mathlib.h"
-#include "v_math.h"
-#if V_SUPPORTED
-
-static const float Poly[] = {
-  /* maxerr: 1.45358 +0.5 ulp.  */
-  0x1.0e4020p-7f,
-  0x1.573e2ep-5f,
-  0x1.555e66p-3f,
-  0x1.fffdb6p-2f,
-  0x1.ffffecp-1f,
-};
-#define C0 v_f32 (Poly[0])
-#define C1 v_f32 (Poly[1])
-#define C2 v_f32 (Poly[2])
-#define C3 v_f32 (Poly[3])
-#define C4 v_f32 (Poly[4])
-
-#define Shift v_f32 (0x1.8p23f)
-#define InvLn2 v_f32 (0x1.715476p+0f)
-#define Ln2hi v_f32 (0x1.62e4p-1f)
-#define Ln2lo v_f32 (0x1.7f7d1cp-20f)
-
-VPCS_ATTR
-static v_f32_t
-specialcase (v_f32_t poly, v_f32_t n, v_u32_t e, v_f32_t absn, v_u32_t cmp1, v_f32_t scale)
-{
-  /* 2^n may overflow, break it up into s1*s2.  */
-  v_u32_t b = v_cond_u32 (n <= v_f32 (0.0f)) & v_u32 (0x82000000);
-  v_f32_t s1 = v_as_f32_u32 (v_u32 (0x7f000000) + b);
-  v_f32_t s2 = v_as_f32_u32 (e - b);
-  v_u32_t cmp2 = v_cond_u32 (absn > v_f32 (192.0f));
-  v_u32_t r2 = v_as_u32_f32 (s1 * s1);
-  v_u32_t r1 = v_as_u32_f32 (v_fma_f32 (poly, s2, s2) * s1);
-  /* Similar to r1 but avoids double rounding in the subnormal range.  */
-  v_u32_t r0 = v_as_u32_f32 (v_fma_f32 (poly, scale, scale));
-  return v_as_f32_u32 ((cmp2 & r2) | (~cmp2 & cmp1 & r1) | (~cmp1 & r0));
-}
-
-VPCS_ATTR
-v_f32_t
-V_NAME(expf) (v_f32_t x)
-{
-  v_f32_t n, r, r2, scale, p, q, poly, absn, z;
-  v_u32_t cmp, e;
-
-  /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
-     x = ln2*n + r, with r in [-ln2/2, ln2/2].  */
-#if 1
-  z = v_fma_f32 (x, InvLn2, Shift);
-  n = z - Shift;
-  r = v_fma_f32 (n, -Ln2hi, x);
-  r = v_fma_f32 (n, -Ln2lo, r);
-  e = v_as_u32_f32 (z) << 23;
-#else
-  z = x * InvLn2;
-  n = v_round_f32 (z);
-  r = v_fma_f32 (n, -Ln2hi, x);
-  r = v_fma_f32 (n, -Ln2lo, r);
-  e = v_as_u32_s32 (v_round_s32 (z)) << 23;
-#endif
-  scale = v_as_f32_u32 (e + v_u32 (0x3f800000));
-  absn = v_abs_f32 (n);
-  cmp = v_cond_u32 (absn > v_f32 (126.0f));
-  r2 = r * r;
-  p = v_fma_f32 (C0, r, C1);
-  q = v_fma_f32 (C2, r, C3);
-  q = v_fma_f32 (p, r2, q);
-  p = C4 * r;
-  poly = v_fma_f32 (q, r2, p);
-  if (unlikely (v_any_u32 (cmp)))
-    return specialcase (poly, n, e, absn, cmp, scale);
-  return v_fma_f32 (poly, scale, scale);
-}
-VPCS_ALIAS
-#endif
diff --git a/contrib/arm-optimized-routines/math/v_expf_1u.c b/contrib/arm-optimized-routines/math/v_expf_1u.c
deleted file mode 100644
index 023bd248c9ac..000000000000
--- a/contrib/arm-optimized-routines/math/v_expf_1u.c
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Single-precision vector e^x function.
- *
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-
-#include "mathlib.h"
-#include "v_math.h"
-#if V_SUPPORTED
-
-static const float Poly[] = {
-  /*  maxerr: 0.36565 +0.5 ulp.  */
-  0x1.6a6000p-10f,
-  0x1.12718ep-7f,
-  0x1.555af0p-5f,
-  0x1.555430p-3f,
-  0x1.fffff4p-2f,
-};
-#define C0 v_f32 (Poly[0])
-#define C1 v_f32 (Poly[1])
-#define C2 v_f32 (Poly[2])
-#define C3 v_f32 (Poly[3])
-#define C4 v_f32 (Poly[4])
-
-#define Shift v_f32 (0x1.8p23f)
-#define InvLn2 v_f32 (0x1.715476p+0f)
-#define Ln2hi v_f32 (0x1.62e4p-1f)
-#define Ln2lo v_f32 (0x1.7f7d1cp-20f)
-
-VPCS_ATTR
-static v_f32_t
-specialcase (v_f32_t poly, v_f32_t n, v_u32_t e, v_f32_t absn)
-{
-  /* 2^n may overflow, break it up into s1*s2.  */
-  v_u32_t b = v_cond_u32 (n <= v_f32 (0.0f)) & v_u32 (0x83000000);
-  v_f32_t s1 = v_as_f32_u32 (v_u32 (0x7f000000) + b);
-  v_f32_t s2 = v_as_f32_u32 (e - b);
-  v_u32_t cmp = v_cond_u32 (absn > v_f32 (192.0f));
-  v_f32_t r1 = s1 * s1;
-  v_f32_t r0 = poly * s1 * s2;
-  return v_as_f32_u32 ((cmp & v_as_u32_f32 (r1)) | (~cmp & v_as_u32_f32 (r0)));
-}
-
-VPCS_ATTR
-v_f32_t
-V_NAME(expf_1u) (v_f32_t x)
-{
-  v_f32_t n, r, scale, poly, absn, z;
-  v_u32_t cmp, e;
-
-  /* exp(x) = 2^n * poly(r), with poly(r) in [1/sqrt(2),sqrt(2)]
-     x = ln2*n + r, with r in [-ln2/2, ln2/2].  */
-#if 1
-  z = v_fma_f32 (x, InvLn2, Shift);
-  n = z - Shift;
-  r = v_fma_f32 (n, -Ln2hi, x);
-  r = v_fma_f32 (n, -Ln2lo, r);
-  e = v_as_u32_f32 (z) << 23;
-#else
-  z = x * InvLn2;
-  n = v_round_f32 (z);
-  r = v_fma_f32 (n, -Ln2hi, x);
-  r = v_fma_f32 (n, -Ln2lo, r);
-  e = v_as_u32_s32 (v_round_s32 (z)) << 23;
-#endif
-  scale = v_as_f32_u32 (e + v_u32 (0x3f800000));
-  absn = v_abs_f32 (n);
-  cmp = v_cond_u32 (absn > v_f32 (126.0f));
-  poly = v_fma_f32 (C0, r, C1);
-  poly = v_fma_f32 (poly, r, C2);
-  poly = v_fma_f32 (poly, r, C3);
-  poly = v_fma_f32 (poly, r, C4);
-  poly = v_fma_f32 (poly, r, v_f32 (1.0f));
-  poly = v_fma_f32 (poly, r, v_f32 (1.0f));
-  if (unlikely (v_any_u32 (cmp)))
-    return specialcase (poly, n, e, absn);
-  return scale * poly;
-}
-#endif
diff --git a/contrib/arm-optimized-routines/math/v_log.c b/contrib/arm-optimized-routines/math/v_log.c
deleted file mode 100644
index d84c740d2b6b..000000000000
--- a/contrib/arm-optimized-routines/math/v_log.c
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Double-precision vector log(x) function.
- *
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-
-#include "mathlib.h"
-#include "v_math.h"
-#include "v_log.h"
-#if V_SUPPORTED
-
-/* Worst-case error: 1.17 + 0.5 ulp.  */
-
-static const f64_t Poly[] = {
-  /* rel error: 0x1.6272e588p-56 in [ -0x1.fc1p-9 0x1.009p-8 ].  */
-  -0x1.ffffffffffff7p-2,
-   0x1.55555555170d4p-2,
-  -0x1.0000000399c27p-2,
-   0x1.999b2e90e94cap-3,
-  -0x1.554e550bd501ep-3,
-};
-
-#define A0 v_f64 (Poly[0])
-#define A1 v_f64 (Poly[1])
-#define A2 v_f64 (Poly[2])
-#define A3 v_f64 (Poly[3])
-#define A4 v_f64 (Poly[4])
-#define Ln2 v_f64 (0x1.62e42fefa39efp-1)
-#define N (1 << V_LOG_TABLE_BITS)
-#define OFF v_u64 (0x3fe6900900000000)
-
-struct entry
-{
-  v_f64_t invc;
-  v_f64_t logc;
-};
-
-static inline struct entry
-lookup (v_u64_t i)
-{
-  struct entry e;
-#ifdef SCALAR
-  e.invc = __v_log_data[i].invc;
-  e.logc = __v_log_data[i].logc;
-#else
-  e.invc[0] = __v_log_data[i[0]].invc;
-  e.logc[0] = __v_log_data[i[0]].logc;
-  e.invc[1] = __v_log_data[i[1]].invc;
-  e.logc[1] = __v_log_data[i[1]].logc;
-#endif
-  return e;
-}
-
-VPCS_ATTR
-__attribute__ ((noinline)) static v_f64_t
-specialcase (v_f64_t x, v_f64_t y, v_u64_t cmp)
-{
-  return v_call_f64 (log, x, y, cmp);
-}
-
-VPCS_ATTR
-v_f64_t
-V_NAME(log) (v_f64_t x)
-{
-  v_f64_t z, r, r2, p, y, kd, hi;
-  v_u64_t ix, iz, tmp, top, i, cmp;
-  v_s64_t k;
-  struct entry e;
-
-  ix = v_as_u64_f64 (x);
-  top = ix >> 48;
-  cmp = v_cond_u64 (top - v_u64 (0x0010) >= v_u64 (0x7ff0 - 0x0010));
-
-  /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
-     The range is split into N subintervals.
-     The ith subinterval contains z and c is near its center.  */
-  tmp = ix - OFF;
-  i = (tmp >> (52 - V_LOG_TABLE_BITS)) % N;
-  k = v_as_s64_u64 (tmp) >> 52; /* arithmetic shift */
-  iz = ix - (tmp & v_u64 (0xfffULL << 52));
-  z = v_as_f64_u64 (iz);
-  e = lookup (i);
-
-  /* log(x) = log1p(z/c-1) + log(c) + k*Ln2.  */
-  r = v_fma_f64 (z, e.invc, v_f64 (-1.0));
-  kd = v_to_f64_s64 (k);
-
-  /* hi = r + log(c) + k*Ln2.  */
-  hi = v_fma_f64 (kd, Ln2, e.logc + r);
-  /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi.  */
-  r2 = r * r;
-  y = v_fma_f64 (A3, r, A2);
-  p = v_fma_f64 (A1, r, A0);
-  y = v_fma_f64 (A4, r2, y);
-  y = v_fma_f64 (y, r2, p);
-  y = v_fma_f64 (y, r2, hi);
-
-  if (unlikely (v_any_u64 (cmp)))
-    return specialcase (x, y, cmp);
-  return y;
-}
-VPCS_ALIAS
-#endif
diff --git a/contrib/arm-optimized-routines/math/v_log.h b/contrib/arm-optimized-routines/math/v_log.h
deleted file mode 100644
index bcc2fa6fa930..000000000000
--- a/contrib/arm-optimized-routines/math/v_log.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Declarations for double-precision log(x) vector function.
- *
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-
-#include "v_math.h"
-#if WANT_VMATH
-
-#define V_LOG_TABLE_BITS 7
-
-extern const struct v_log_data
-{
-  f64_t invc;
-  f64_t logc;
-} __v_log_data[1 << V_LOG_TABLE_BITS] HIDDEN;
-#endif
diff --git a/contrib/arm-optimized-routines/math/v_log_data.c b/contrib/arm-optimized-routines/math/v_log_data.c
deleted file mode 100644
index 97ee5b09c6a9..000000000000
--- a/contrib/arm-optimized-routines/math/v_log_data.c
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
- * Lookup table for double-precision log(x) vector function.
- *
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-
-#include "v_log.h"
-#if WANT_VMATH
-
-#define N (1 << V_LOG_TABLE_BITS)
-
-/* Algorithm:
-
-	x = 2^k z
-	log(x) = k ln2 + log(c) + poly(z/c - 1)
-
-where z is in [a;2a) which is split into N subintervals (a=0x1.69009p-1,N=128)
-and log(c) and 1/c for the ith subinterval comes from a lookup table:
-
-	tab[i].invc = 1/c
-	tab[i].logc = (double)log(c)
-
-where c is near the center of the subinterval and is chosen by trying several
-floating point invc candidates around 1/center and selecting one for which
-the error in (double)log(c) is minimized (< 0x1p-74), except the subinterval
-that contains 1 and the previous one got tweaked to avoid cancellation.  */
-const struct v_log_data __v_log_data[N] = {
-{0x1.6a133d0dec120p+0, -0x1.62fe995eb963ap-2},
-{0x1.6815f2f3e42edp+0, -0x1.5d5a48dad6b67p-2},
-{0x1.661e39be1ac9ep+0, -0x1.57bde257d2769p-2},
-{0x1.642bfa30ac371p+0, -0x1.52294fbf2af55p-2},
-{0x1.623f1d916f323p+0, -0x1.4c9c7b598aa38p-2},
-{0x1.60578da220f65p+0, -0x1.47174fc5ff560p-2},
-{0x1.5e75349dea571p+0, -0x1.4199b7fa7b5cap-2},
-{0x1.5c97fd387a75ap+0, -0x1.3c239f48cfb99p-2},
-{0x1.5abfd2981f200p+0, -0x1.36b4f154d2aebp-2},
-{0x1.58eca051dc99cp+0, -0x1.314d9a0ff32fbp-2},
-{0x1.571e526d9df12p+0, -0x1.2bed85cca3cffp-2},
-{0x1.5554d555b3fcbp+0, -0x1.2694a11421af9p-2},
-{0x1.539015e2a20cdp+0, -0x1.2142d8d014fb2p-2},
-{0x1.51d0014ee0164p+0, -0x1.1bf81a2c77776p-2},
-{0x1.50148538cd9eep+0, -0x1.16b452a39c6a4p-2},
-{0x1.4e5d8f9f698a1p+0, -0x1.11776ffa6c67ep-2},
-{0x1.4cab0edca66bep+0, -0x1.0c416035020e0p-2},
-{0x1.4afcf1a9db874p+0, -0x1.071211aa10fdap-2},
-{0x1.495327136e16fp+0, -0x1.01e972e293b1bp-2},
-{0x1.47ad9e84af28fp+0, -0x1.f98ee587fd434p-3},
-{0x1.460c47b39ae15p+0, -0x1.ef5800ad716fbp-3},
-{0x1.446f12b278001p+0, -0x1.e52e160484698p-3},
-{0x1.42d5efdd720ecp+0, -0x1.db1104b19352ep-3},
-{0x1.4140cfe001a0fp+0, -0x1.d100ac59e0bd6p-3},
-{0x1.3fafa3b421f69p+0, -0x1.c6fced287c3bdp-3},
-{0x1.3e225c9c8ece5p+0, -0x1.bd05a7b317c29p-3},
-{0x1.3c98ec29a211ap+0, -0x1.b31abd229164fp-3},
-{0x1.3b13442a413fep+0, -0x1.a93c0edadb0a3p-3},
-{0x1.399156baa3c54p+0, -0x1.9f697ee30d7ddp-3},
-{0x1.38131639b4cdbp+0, -0x1.95a2efa9aa40ap-3},
-{0x1.36987540fbf53p+0, -0x1.8be843d796044p-3},
-{0x1.352166b648f61p+0, -0x1.82395ecc477edp-3},
-{0x1.33adddb3eb575p+0, -0x1.7896240966422p-3},
-{0x1.323dcd99fc1d3p+0, -0x1.6efe77aca8c55p-3},
-{0x1.30d129fefc7d2p+0, -0x1.65723e117ec5cp-3},
-{0x1.2f67e6b72fe7dp+0, -0x1.5bf15c0955706p-3},
-{0x1.2e01f7cf8b187p+0, -0x1.527bb6c111da1p-3},
-{0x1.2c9f518ddc86ep+0, -0x1.491133c939f8fp-3},
-{0x1.2b3fe86e5f413p+0, -0x1.3fb1b90c7fc58p-3},
-{0x1.29e3b1211b25cp+0, -0x1.365d2cc485f8dp-3},
-{0x1.288aa08b373cfp+0, -0x1.2d13758970de7p-3},
-{0x1.2734abcaa8467p+0, -0x1.23d47a721fd47p-3},
-{0x1.25e1c82459b81p+0, -0x1.1aa0229f25ec2p-3},
-{0x1.2491eb1ad59c5p+0, -0x1.117655ddebc3bp-3},
-{0x1.23450a54048b5p+0, -0x1.0856fbf83ab6bp-3},
-{0x1.21fb1bb09e578p+0, -0x1.fe83fabbaa106p-4},
-{0x1.20b415346d8f7p+0, -0x1.ec6e8507a56cdp-4},
-{0x1.1f6fed179a1acp+0, -0x1.da6d68c7cc2eap-4},
-{0x1.1e2e99b93c7b3p+0, -0x1.c88078462be0cp-4},
-{0x1.1cf011a7a882ap+0, -0x1.b6a786a423565p-4},
-{0x1.1bb44b97dba5ap+0, -0x1.a4e2676ac7f85p-4},
-{0x1.1a7b3e66cdd4fp+0, -0x1.9330eea777e76p-4},
-{0x1.1944e11dc56cdp+0, -0x1.8192f134d5ad9p-4},
-{0x1.18112aebb1a6ep+0, -0x1.70084464f0538p-4},
-{0x1.16e013231b7e9p+0, -0x1.5e90bdec5cb1fp-4},
-{0x1.15b1913f156cfp+0, -0x1.4d2c3433c5536p-4},
-{0x1.14859cdedde13p+0, -0x1.3bda7e219879ap-4},
-{0x1.135c2dc68cfa4p+0, -0x1.2a9b732d27194p-4},
-{0x1.12353bdb01684p+0, -0x1.196eeb2b10807p-4},
-{0x1.1110bf25b85b4p+0, -0x1.0854be8ef8a7ep-4},
-{0x1.0feeafd2f8577p+0, -0x1.ee998cb277432p-5},
-{0x1.0ecf062c51c3bp+0, -0x1.ccadb79919fb9p-5},
-{0x1.0db1baa076c8bp+0, -0x1.aae5b1d8618b0p-5},
-{0x1.0c96c5bb3048ep+0, -0x1.89413015d7442p-5},
-{0x1.0b7e20263e070p+0, -0x1.67bfe7bf158dep-5},
-{0x1.0a67c2acd0ce3p+0, -0x1.46618f83941bep-5},
-{0x1.0953a6391e982p+0, -0x1.2525df1b0618ap-5},
-{0x1.0841c3caea380p+0, -0x1.040c8e2f77c6ap-5},
-{0x1.07321489b13eap+0, -0x1.c62aad39f738ap-6},
-{0x1.062491aee9904p+0, -0x1.847fe3bdead9cp-6},
-{0x1.05193497a7cc5p+0, -0x1.43183683400acp-6},
-{0x1.040ff6b5f5e9fp+0, -0x1.01f31c4e1d544p-6},
-{0x1.0308d19aa6127p+0, -0x1.82201d1e6b69ap-7},
-{0x1.0203beedb0c67p+0, -0x1.00dd0f3e1bfd6p-7},
-{0x1.010037d38bcc2p+0, -0x1.ff6fe1feb4e53p-9},
-{1.0, 0.0},
-{0x1.fc06d493cca10p-1, 0x1.fe91885ec8e20p-8},
-{0x1.f81e6ac3b918fp-1, 0x1.fc516f716296dp-7},
-{0x1.f44546ef18996p-1, 0x1.7bb4dd70a015bp-6},
-{0x1.f07b10382c84bp-1, 0x1.f84c99b34b674p-6},
-{0x1.ecbf7070e59d4p-1, 0x1.39f9ce4fb2d71p-5},
-{0x1.e91213f715939p-1, 0x1.7756c0fd22e78p-5},
-{0x1.e572a9a75f7b7p-1, 0x1.b43ee82db8f3ap-5},
-{0x1.e1e0e2c530207p-1, 0x1.f0b3fced60034p-5},
-{0x1.de5c72d8a8be3p-1, 0x1.165bd78d4878ep-4},
-{0x1.dae50fa5658ccp-1, 0x1.3425d2715ebe6p-4},
-{0x1.d77a71145a2dap-1, 0x1.51b8bd91b7915p-4},
-{0x1.d41c51166623ep-1, 0x1.6f15632c76a47p-4},
-{0x1.d0ca6ba0bb29fp-1, 0x1.8c3c88ecbe503p-4},
-{0x1.cd847e8e59681p-1, 0x1.a92ef077625dap-4},
-{0x1.ca4a499693e00p-1, 0x1.c5ed5745fa006p-4},
-{0x1.c71b8e399e821p-1, 0x1.e27876de1c993p-4},
-{0x1.c3f80faf19077p-1, 0x1.fed104fce4cdcp-4},
-{0x1.c0df92dc2b0ecp-1, 0x1.0d7bd9c17d78bp-3},
-{0x1.bdd1de3cbb542p-1, 0x1.1b76986cef97bp-3},
-{0x1.baceb9e1007a3p-1, 0x1.295913d24f750p-3},
-{0x1.b7d5ef543e55ep-1, 0x1.37239fa295d17p-3},
-{0x1.b4e749977d953p-1, 0x1.44d68dd78714bp-3},
-{0x1.b20295155478ep-1, 0x1.52722ebe5d780p-3},
-{0x1.af279f8e82be2p-1, 0x1.5ff6d12671f98p-3},
-{0x1.ac5638197fdf3p-1, 0x1.6d64c2389484bp-3},
-{0x1.a98e2f102e087p-1, 0x1.7abc4da40fddap-3},
-{0x1.a6cf5606d05c1p-1, 0x1.87fdbda1e8452p-3},
-{0x1.a4197fc04d746p-1, 0x1.95295b06a5f37p-3},
-{0x1.a16c80293dc01p-1, 0x1.a23f6d34abbc5p-3},
-{0x1.9ec82c4dc5bc9p-1, 0x1.af403a28e04f2p-3},
-{0x1.9c2c5a491f534p-1, 0x1.bc2c06a85721ap-3},
-{0x1.9998e1480b618p-1, 0x1.c903161240163p-3},
-{0x1.970d9977c6c2dp-1, 0x1.d5c5aa93287ebp-3},
-{0x1.948a5c023d212p-1, 0x1.e274051823fa9p-3},
-{0x1.920f0303d6809p-1, 0x1.ef0e656300c16p-3},
-{0x1.8f9b698a98b45p-1, 0x1.fb9509f05aa2ap-3},
-{0x1.8d2f6b81726f6p-1, 0x1.04041821f37afp-2},
-{0x1.8acae5bb55badp-1, 0x1.0a340a49b3029p-2},
-{0x1.886db5d9275b8p-1, 0x1.105a7918a126dp-2},
-{0x1.8617ba567c13cp-1, 0x1.1677819812b84p-2},
-{0x1.83c8d27487800p-1, 0x1.1c8b405b40c0ep-2},
-{0x1.8180de3c5dbe7p-1, 0x1.2295d16cfa6b1p-2},
-{0x1.7f3fbe71cdb71p-1, 0x1.28975066318a2p-2},
-{0x1.7d055498071c1p-1, 0x1.2e8fd855d86fcp-2},
-{0x1.7ad182e54f65ap-1, 0x1.347f83d605e59p-2},
-{0x1.78a42c3c90125p-1, 0x1.3a666d1244588p-2},
-{0x1.767d342f76944p-1, 0x1.4044adb6f8ec4p-2},
-{0x1.745c7ef26b00ap-1, 0x1.461a5f077558cp-2},
-{0x1.7241f15769d0fp-1, 0x1.4be799e20b9c8p-2},
-{0x1.702d70d396e41p-1, 0x1.51ac76a6b79dfp-2},
-{0x1.6e1ee3700cd11p-1, 0x1.57690d5744a45p-2},
-{0x1.6c162fc9cbe02p-1, 0x1.5d1d758e45217p-2},
-};
-#endif
diff --git a/contrib/arm-optimized-routines/math/v_logf.c b/contrib/arm-optimized-routines/math/v_logf.c
deleted file mode 100644
index 7373192f03fa..000000000000
--- a/contrib/arm-optimized-routines/math/v_logf.c
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Single-precision vector log function.
- *
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-
-#include "mathlib.h"
-#include "v_math.h"
-#if V_SUPPORTED
-
-static const float Poly[] = {
-  /* 3.34 ulp error */
-  -0x1.3e737cp-3f, 0x1.5a9aa2p-3f, -0x1.4f9934p-3f, 0x1.961348p-3f,
-  -0x1.00187cp-2f, 0x1.555d7cp-2f, -0x1.ffffc8p-2f,
-};
-#define P7 v_f32 (Poly[0])
-#define P6 v_f32 (Poly[1])
-#define P5 v_f32 (Poly[2])
-#define P4 v_f32 (Poly[3])
-#define P3 v_f32 (Poly[4])
-#define P2 v_f32 (Poly[5])
-#define P1 v_f32 (Poly[6])
-
-#define Ln2 v_f32 (0x1.62e43p-1f) /* 0x3f317218 */
-#define Min v_u32 (0x00800000)
-#define Max v_u32 (0x7f800000)
-#define Mask v_u32 (0x007fffff)
-#define Off v_u32 (0x3f2aaaab) /* 0.666667 */
-
-VPCS_ATTR
-__attribute__ ((noinline)) static v_f32_t
-specialcase (v_f32_t x, v_f32_t y, v_u32_t cmp)
-{
-  /* Fall back to scalar code.  */
-  return v_call_f32 (logf, x, y, cmp);
-}
-
-VPCS_ATTR
-v_f32_t
-V_NAME(logf) (v_f32_t x)
-{
-  v_f32_t n, p, q, r, r2, y;
-  v_u32_t u, cmp;
-
-  u = v_as_u32_f32 (x);
-  cmp = v_cond_u32 (u - Min >= Max - Min);
-
-  /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3 */
-  u -= Off;
-  n = v_to_f32_s32 (v_as_s32_u32 (u) >> 23); /* signextend */
-  u &= Mask;
-  u += Off;
-  r = v_as_f32_u32 (u) - v_f32 (1.0f);
-
-  /* y = log(1+r) + n*ln2.  */
-  r2 = r * r;
-  /* n*ln2 + r + r2*(P1 + r*P2 + r2*(P3 + r*P4 + r2*(P5 + r*P6 + r2*P7))).  */
-  p = v_fma_f32 (P6, r, P5);
-  q = v_fma_f32 (P4, r, P3);
-  y = v_fma_f32 (P2, r, P1);
-  p = v_fma_f32 (P7, r2, p);
-  q = v_fma_f32 (p, r2, q);
-  y = v_fma_f32 (q, r2, y);
-  p = v_fma_f32 (Ln2, n, r);
-  y = v_fma_f32 (y, r2, p);
-
-  if (unlikely (v_any_u32 (cmp)))
-    return specialcase (x, y, cmp);
-  return y;
-}
-VPCS_ALIAS
-#endif
diff --git a/contrib/arm-optimized-routines/math/v_math.h b/contrib/arm-optimized-routines/math/v_math.h
deleted file mode 100644
index f2cc4670bb9b..000000000000
--- a/contrib/arm-optimized-routines/math/v_math.h
+++ /dev/null
@@ -1,641 +0,0 @@
-/*
- * Vector math abstractions.
- *
- * Copyright (c) 2019-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-
-#ifndef _V_MATH_H
-#define _V_MATH_H
-
-#ifndef WANT_VMATH
-/* Enable the build of vector math code.  */
-# define WANT_VMATH 1
-#endif
-#if WANT_VMATH
-
-/* The goal of this header is to allow vector and scalar
-   build of the same algorithm, the provided intrinsic
-   wrappers are also vector length agnostic so they can
-   be implemented for SVE too (or other simd architectures)
-   and then the code should work on those targets too.  */
-
-#if SCALAR
-#define V_NAME(x) __s_##x
-#elif VPCS && __aarch64__
-#define V_NAME(x) __vn_##x
-#define VPCS_ATTR __attribute__ ((aarch64_vector_pcs))
-#else
-#define V_NAME(x) __v_##x
-#endif
-
-#ifndef VPCS_ATTR
-#define VPCS_ATTR
-#endif
-#ifndef VPCS_ALIAS
-#define VPCS_ALIAS
-#endif
-
-#include <stdint.h>
-#include "math_config.h"
-
-typedef float f32_t;
-typedef uint32_t u32_t;
-typedef int32_t s32_t;
-typedef double f64_t;
-typedef uint64_t u64_t;
-typedef int64_t s64_t;
-
-/* reinterpret as type1 from type2.  */
-static inline u32_t
-as_u32_f32 (f32_t x)
-{
-  union { f32_t f; u32_t u; } r = {x};
-  return r.u;
-}
-static inline f32_t
-as_f32_u32 (u32_t x)
-{
-  union { u32_t u; f32_t f; } r = {x};
-  return r.f;
-}
-static inline s32_t
-as_s32_u32 (u32_t x)
-{
-  union { u32_t u; s32_t i; } r = {x};
-  return r.i;
-}
-static inline u32_t
-as_u32_s32 (s32_t x)
-{
-  union { s32_t i; u32_t u; } r = {x};
-  return r.u;
-}
-static inline u64_t
-as_u64_f64 (f64_t x)
-{
-  union { f64_t f; u64_t u; } r = {x};
-  return r.u;
-}
-static inline f64_t
-as_f64_u64 (u64_t x)
-{
-  union { u64_t u; f64_t f; } r = {x};
-  return r.f;
-}
-static inline s64_t
-as_s64_u64 (u64_t x)
-{
-  union { u64_t u; s64_t i; } r = {x};
-  return r.i;
-}
-static inline u64_t
-as_u64_s64 (s64_t x)
-{
-  union { s64_t i; u64_t u; } r = {x};
-  return r.u;
-}
-
-#if SCALAR
-#define V_SUPPORTED 1
-typedef f32_t v_f32_t;
-typedef u32_t v_u32_t;
-typedef s32_t v_s32_t;
-typedef f64_t v_f64_t;
-typedef u64_t v_u64_t;
-typedef s64_t v_s64_t;
-
-static inline int
-v_lanes32 (void)
-{
-  return 1;
-}
-
-static inline v_f32_t
-v_f32 (f32_t x)
-{
-  return x;
-}
-static inline v_u32_t
-v_u32 (u32_t x)
-{
-  return x;
-}
-static inline v_s32_t
-v_s32 (s32_t x)
-{
-  return x;
-}
-
-static inline f32_t
-v_get_f32 (v_f32_t x, int i)
-{
-  return x;
-}
-static inline u32_t
-v_get_u32 (v_u32_t x, int i)
-{
-  return x;
-}
-static inline s32_t
-v_get_s32 (v_s32_t x, int i)
-{
-  return x;
-}
-
-static inline void
-v_set_f32 (v_f32_t *x, int i, f32_t v)
-{
-  *x = v;
-}
-static inline void
-v_set_u32 (v_u32_t *x, int i, u32_t v)
-{
-  *x = v;
-}
-static inline void
-v_set_s32 (v_s32_t *x, int i, s32_t v)
-{
-  *x = v;
-}
-
-/* true if any elements of a v_cond result is non-zero.  */
-static inline int
-v_any_u32 (v_u32_t x)
-{
-  return x != 0;
-}
-/* to wrap the result of relational operators.  */
-static inline v_u32_t
-v_cond_u32 (v_u32_t x)
-{
-  return x ? -1 : 0;
-}
-static inline v_f32_t
-v_abs_f32 (v_f32_t x)
-{
-  return __builtin_fabsf (x);
-}
-static inline v_f32_t
-v_fma_f32 (v_f32_t x, v_f32_t y, v_f32_t z)
-{
-  return __builtin_fmaf (x, y, z);
-}
-static inline v_f32_t
-v_round_f32 (v_f32_t x)
-{
-  return __builtin_roundf (x);
-}
-static inline v_s32_t
-v_round_s32 (v_f32_t x)
-{
-  return __builtin_lroundf (x); /* relies on -fno-math-errno.  */
-}
-/* convert to type1 from type2.  */
-static inline v_f32_t
-v_to_f32_s32 (v_s32_t x)
-{
-  return x;
-}
-static inline v_f32_t
-v_to_f32_u32 (v_u32_t x)
-{
-  return x;
-}
-/* reinterpret as type1 from type2.  */
-static inline v_u32_t
-v_as_u32_f32 (v_f32_t x)
-{
-  union { v_f32_t f; v_u32_t u; } r = {x};
-  return r.u;
-}
-static inline v_f32_t
-v_as_f32_u32 (v_u32_t x)
-{
-  union { v_u32_t u; v_f32_t f; } r = {x};
-  return r.f;
-}
-static inline v_s32_t
-v_as_s32_u32 (v_u32_t x)
-{
-  union { v_u32_t u; v_s32_t i; } r = {x};
-  return r.i;
-}
-static inline v_u32_t
-v_as_u32_s32 (v_s32_t x)
-{
-  union { v_s32_t i; v_u32_t u; } r = {x};
-  return r.u;
-}
-static inline v_f32_t
-v_lookup_f32 (const f32_t *tab, v_u32_t idx)
-{
-  return tab[idx];
-}
-static inline v_u32_t
-v_lookup_u32 (const u32_t *tab, v_u32_t idx)
-{
-  return tab[idx];
-}
-static inline v_f32_t
-v_call_f32 (f32_t (*f) (f32_t), v_f32_t x, v_f32_t y, v_u32_t p)
-{
-  return f (x);
-}
-static inline v_f32_t
-v_call2_f32 (f32_t (*f) (f32_t, f32_t), v_f32_t x1, v_f32_t x2, v_f32_t y,
-	     v_u32_t p)
-{
-  return f (x1, x2);
-}
-
-static inline int
-v_lanes64 (void)
-{
-  return 1;
-}
-static inline v_f64_t
-v_f64 (f64_t x)
-{
-  return x;
-}
-static inline v_u64_t
-v_u64 (u64_t x)
-{
-  return x;
-}
-static inline v_s64_t
-v_s64 (s64_t x)
-{
-  return x;
-}
-static inline f64_t
-v_get_f64 (v_f64_t x, int i)
-{
-  return x;
-}
-static inline void
-v_set_f64 (v_f64_t *x, int i, f64_t v)
-{
-  *x = v;
-}
-/* true if any elements of a v_cond result is non-zero.  */
-static inline int
-v_any_u64 (v_u64_t x)
-{
-  return x != 0;
-}
-/* to wrap the result of relational operators.  */
-static inline v_u64_t
-v_cond_u64 (v_u64_t x)
-{
-  return x ? -1 : 0;
-}
-static inline v_f64_t
-v_abs_f64 (v_f64_t x)
-{
-  return __builtin_fabs (x);
-}
-static inline v_f64_t
-v_fma_f64 (v_f64_t x, v_f64_t y, v_f64_t z)
-{
-  return __builtin_fma (x, y, z);
-}
-static inline v_f64_t
-v_round_f64 (v_f64_t x)
-{
-  return __builtin_round (x);
-}
-static inline v_s64_t
-v_round_s64 (v_f64_t x)
-{
-  return __builtin_lround (x); /* relies on -fno-math-errno.  */
-}
-/* convert to type1 from type2.  */
-static inline v_f64_t
-v_to_f64_s64 (v_s64_t x)
-{
-  return x;
-}
-static inline v_f64_t
-v_to_f64_u64 (v_u64_t x)
-{
-  return x;
-}
-/* reinterpret as type1 from type2.  */
-static inline v_u64_t
-v_as_u64_f64 (v_f64_t x)
-{
-  union { v_f64_t f; v_u64_t u; } r = {x};
-  return r.u;
-}
-static inline v_f64_t
-v_as_f64_u64 (v_u64_t x)
-{
-  union { v_u64_t u; v_f64_t f; } r = {x};
-  return r.f;
-}
-static inline v_s64_t
-v_as_s64_u64 (v_u64_t x)
-{
-  union { v_u64_t u; v_s64_t i; } r = {x};
-  return r.i;
-}
-static inline v_u64_t
-v_as_u64_s64 (v_s64_t x)
-{
-  union { v_s64_t i; v_u64_t u; } r = {x};
-  return r.u;
-}
-static inline v_f64_t
-v_lookup_f64 (const f64_t *tab, v_u64_t idx)
-{
-  return tab[idx];
-}
-static inline v_u64_t
-v_lookup_u64 (const u64_t *tab, v_u64_t idx)
-{
-  return tab[idx];
-}
-static inline v_f64_t
-v_call_f64 (f64_t (*f) (f64_t), v_f64_t x, v_f64_t y, v_u64_t p)
-{
-  return f (x);
-}
-
-#elif __aarch64__
-#define V_SUPPORTED 1
-#include <arm_neon.h>
-typedef float32x4_t v_f32_t;
-typedef uint32x4_t v_u32_t;
-typedef int32x4_t v_s32_t;
-typedef float64x2_t v_f64_t;
-typedef uint64x2_t v_u64_t;
-typedef int64x2_t v_s64_t;
-
-static inline int
-v_lanes32 (void)
-{
-  return 4;
-}
-
-static inline v_f32_t
-v_f32 (f32_t x)
-{
-  return (v_f32_t){x, x, x, x};
-}
-static inline v_u32_t
-v_u32 (u32_t x)
-{
-  return (v_u32_t){x, x, x, x};
-}
-static inline v_s32_t
-v_s32 (s32_t x)
-{
-  return (v_s32_t){x, x, x, x};
-}
-
-static inline f32_t
-v_get_f32 (v_f32_t x, int i)
-{
-  return x[i];
-}
-static inline u32_t
-v_get_u32 (v_u32_t x, int i)
-{
-  return x[i];
-}
-static inline s32_t
-v_get_s32 (v_s32_t x, int i)
-{
-  return x[i];
-}
-
-static inline void
-v_set_f32 (v_f32_t *x, int i, f32_t v)
-{
-  (*x)[i] = v;
-}
-static inline void
-v_set_u32 (v_u32_t *x, int i, u32_t v)
-{
-  (*x)[i] = v;
-}
-static inline void
-v_set_s32 (v_s32_t *x, int i, s32_t v)
-{
-  (*x)[i] = v;
-}
-
-/* true if any elements of a v_cond result is non-zero.  */
-static inline int
-v_any_u32 (v_u32_t x)
-{
-  /* assume elements in x are either 0 or -1u.  */
-  return vpaddd_u64 (vreinterpretq_u64_u32 (x)) != 0;
-}
-/* to wrap the result of relational operators.  */
-static inline v_u32_t
-v_cond_u32 (v_u32_t x)
-{
-  return x;
-}
-static inline v_f32_t
-v_abs_f32 (v_f32_t x)
-{
-  return vabsq_f32 (x);
-}
-static inline v_f32_t
-v_fma_f32 (v_f32_t x, v_f32_t y, v_f32_t z)
-{
-  return vfmaq_f32 (z, x, y);
-}
-static inline v_f32_t
-v_round_f32 (v_f32_t x)
-{
-  return vrndaq_f32 (x);
-}
-static inline v_s32_t
-v_round_s32 (v_f32_t x)
-{
-  return vcvtaq_s32_f32 (x);
-}
-/* convert to type1 from type2.  */
-static inline v_f32_t
-v_to_f32_s32 (v_s32_t x)
-{
-  return (v_f32_t){x[0], x[1], x[2], x[3]};
-}
-static inline v_f32_t
-v_to_f32_u32 (v_u32_t x)
-{
-  return (v_f32_t){x[0], x[1], x[2], x[3]};
-}
-/* reinterpret as type1 from type2.  */
-static inline v_u32_t
-v_as_u32_f32 (v_f32_t x)
-{
-  union { v_f32_t f; v_u32_t u; } r = {x};
-  return r.u;
-}
-static inline v_f32_t
-v_as_f32_u32 (v_u32_t x)
-{
-  union { v_u32_t u; v_f32_t f; } r = {x};
-  return r.f;
-}
-static inline v_s32_t
-v_as_s32_u32 (v_u32_t x)
-{
-  union { v_u32_t u; v_s32_t i; } r = {x};
-  return r.i;
-}
-static inline v_u32_t
-v_as_u32_s32 (v_s32_t x)
-{
-  union { v_s32_t i; v_u32_t u; } r = {x};
-  return r.u;
-}
-static inline v_f32_t
-v_lookup_f32 (const f32_t *tab, v_u32_t idx)
-{
-  return (v_f32_t){tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]};
-}
-static inline v_u32_t
-v_lookup_u32 (const u32_t *tab, v_u32_t idx)
-{
-  return (v_u32_t){tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]};
-}
-static inline v_f32_t
-v_call_f32 (f32_t (*f) (f32_t), v_f32_t x, v_f32_t y, v_u32_t p)
-{
-  return (v_f32_t){p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1],
-		   p[2] ? f (x[2]) : y[2], p[3] ? f (x[3]) : y[3]};
-}
-static inline v_f32_t
-v_call2_f32 (f32_t (*f) (f32_t, f32_t), v_f32_t x1, v_f32_t x2, v_f32_t y,
-	     v_u32_t p)
-{
-  return (
-    v_f32_t){p[0] ? f (x1[0], x2[0]) : y[0], p[1] ? f (x1[1], x2[1]) : y[1],
-	     p[2] ? f (x1[2], x2[2]) : y[2], p[3] ? f (x1[3], x2[3]) : y[3]};
-}
-
-static inline int
-v_lanes64 (void)
-{
-  return 2;
-}
-static inline v_f64_t
-v_f64 (f64_t x)
-{
-  return (v_f64_t){x, x};
-}
-static inline v_u64_t
-v_u64 (u64_t x)
-{
-  return (v_u64_t){x, x};
-}
-static inline v_s64_t
-v_s64 (s64_t x)
-{
-  return (v_s64_t){x, x};
-}
-static inline f64_t
-v_get_f64 (v_f64_t x, int i)
-{
-  return x[i];
-}
-static inline void
-v_set_f64 (v_f64_t *x, int i, f64_t v)
-{
-  (*x)[i] = v;
-}
-/* true if any elements of a v_cond result is non-zero.  */
-static inline int
-v_any_u64 (v_u64_t x)
-{
-  /* assume elements in x are either 0 or -1u.  */
-  return vpaddd_u64 (x) != 0;
-}
-/* to wrap the result of relational operators.  */
-static inline v_u64_t
-v_cond_u64 (v_u64_t x)
-{
-  return x;
-}
-static inline v_f64_t
-v_abs_f64 (v_f64_t x)
-{
-  return vabsq_f64 (x);
-}
-static inline v_f64_t
-v_fma_f64 (v_f64_t x, v_f64_t y, v_f64_t z)
-{
-  return vfmaq_f64 (z, x, y);
-}
-static inline v_f64_t
-v_round_f64 (v_f64_t x)
-{
-  return vrndaq_f64 (x);
-}
-static inline v_s64_t
-v_round_s64 (v_f64_t x)
-{
-  return vcvtaq_s64_f64 (x);
-}
-/* convert to type1 from type2.  */
-static inline v_f64_t
-v_to_f64_s64 (v_s64_t x)
-{
-  return (v_f64_t){x[0], x[1]};
-}
-static inline v_f64_t
-v_to_f64_u64 (v_u64_t x)
-{
-  return (v_f64_t){x[0], x[1]};
-}
-/* reinterpret as type1 from type2.  */
-static inline v_u64_t
-v_as_u64_f64 (v_f64_t x)
-{
-  union { v_f64_t f; v_u64_t u; } r = {x};
-  return r.u;
-}
-static inline v_f64_t
-v_as_f64_u64 (v_u64_t x)
-{
-  union { v_u64_t u; v_f64_t f; } r = {x};
-  return r.f;
-}
-static inline v_s64_t
-v_as_s64_u64 (v_u64_t x)
-{
-  union {  v_u64_t u; v_s64_t i; } r = {x};
-  return r.i;
-}
-static inline v_u64_t
-v_as_u64_s64 (v_s64_t x)
-{
-  union { v_s64_t i; v_u64_t u; } r = {x};
-  return r.u;
-}
-static inline v_f64_t
-v_lookup_f64 (const f64_t *tab, v_u64_t idx)
-{
-  return (v_f64_t){tab[idx[0]], tab[idx[1]]};
-}
-static inline v_u64_t
-v_lookup_u64 (const u64_t *tab, v_u64_t idx)
-{
-  return (v_u64_t){tab[idx[0]], tab[idx[1]]};
-}
-static inline v_f64_t
-v_call_f64 (f64_t (*f) (f64_t), v_f64_t x, v_f64_t y, v_u64_t p)
-{
-  return (v_f64_t){p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1]};
-}
-#endif
-
-#endif
-#endif
diff --git a/contrib/arm-optimized-routines/math/v_pow.c b/contrib/arm-optimized-routines/math/v_pow.c
deleted file mode 100644
index a209d57f41ce..000000000000
--- a/contrib/arm-optimized-routines/math/v_pow.c
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Double-precision vector pow function.
- *
- * Copyright (c) 2020, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-
-#include "mathlib.h"
-#include "v_math.h"
-#if V_SUPPORTED
-
-VPCS_ATTR
-v_f64_t
-V_NAME(pow) (v_f64_t x, v_f64_t y)
-{
-  v_f64_t z;
-  for (int lane = 0; lane < v_lanes64 (); lane++)
-    {
-      f64_t sx = v_get_f64 (x, lane);
-      f64_t sy = v_get_f64 (y, lane);
-      f64_t sz = pow (sx, sy);
-      v_set_f64 (&z, lane, sz);
-    }
-  return z;
-}
-VPCS_ALIAS
-#endif
diff --git a/contrib/arm-optimized-routines/math/v_powf.c b/contrib/arm-optimized-routines/math/v_powf.c
deleted file mode 100644
index fb80fa6f1846..000000000000
--- a/contrib/arm-optimized-routines/math/v_powf.c
+++ /dev/null
@@ -1,235 +0,0 @@
-/*
- * Single-precision vector powf function.
- *
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-
-#include "mathlib.h"
-#include "v_math.h"
-#if V_SUPPORTED
-
-#define Min v_u32 (0x00800000)
-#define Max v_u32 (0x7f800000)
-#define SBITS 5
-#define Tlog v__powf_log2_data.tab
-#define Texp v__exp2f_data.tab
-#define A v__powf_log2_data.poly
-#define C v__exp2f_data.poly
-#define LOGDEG 4
-
-#if LOGDEG == 5
-/* 1.01 ulp */
-#define OFF v_u32 (0x3f330000)
-#define TBITS 4
-#elif LOGDEG == 4
-/* 2.6 ulp ~ 0.5 + 2^24 (128*Ln2*relerr_log2 + relerr_exp2) */
-#define OFF v_u32 (0x3f35d000)
-#define TBITS 5
-#endif
-
-#define V_EXP2F_TABLE_BITS SBITS
-#define V_EXP2F_POLY_ORDER 3
-struct v_exp2f_data
-{
-  uint64_t tab[1 << V_EXP2F_TABLE_BITS];
-  double poly[V_EXP2F_POLY_ORDER];
-};
-
-#define V_POWF_LOG2_TABLE_BITS TBITS
-#define V_POWF_LOG2_POLY_ORDER LOGDEG
-#define SCALE ((double) (1 << SBITS))
-struct v_powf_log2_data
-{
-  struct
-  {
-    double invc, logc;
-  } tab[1 << V_POWF_LOG2_TABLE_BITS];
-  double poly[V_POWF_LOG2_POLY_ORDER];
-};
-
-static const struct v_powf_log2_data v__powf_log2_data = {
-#if LOGDEG == 5
-  .tab = {
-{ 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 * SCALE },
-{ 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 * SCALE },
-{ 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 * SCALE },
-{ 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 * SCALE },
-{ 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 * SCALE },
-{ 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 * SCALE },
-{ 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 * SCALE },
-{ 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 * SCALE },
-{ 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 * SCALE },
-{ 0x1p+0, 0x0p+0 * SCALE },
-{ 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 * SCALE },
-{ 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 * SCALE },
-{ 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 * SCALE },
-{ 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 * SCALE },
-{ 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 * SCALE },
-{ 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 * SCALE },
-  },
-/* rel err: 1.46 * 2^-32 */
-  .poly = {
-0x1.27616c9496e0bp-2 * SCALE, -0x1.71969a075c67ap-2 * SCALE,
-0x1.ec70a6ca7baddp-2 * SCALE, -0x1.7154748bef6c8p-1 * SCALE,
-0x1.71547652ab82bp0 * SCALE,
-  }
-#elif LOGDEG == 4
-  .tab = {
-{0x1.6489890582816p+0, -0x1.e960f97b22702p-2 * SCALE},
-{0x1.5cf19b35e3472p+0, -0x1.c993406cd4db6p-2 * SCALE},
-{0x1.55aac0e956d65p+0, -0x1.aa711d9a7d0f3p-2 * SCALE},
-{0x1.4eb0022977e01p+0, -0x1.8bf37bacdce9bp-2 * SCALE},
-{0x1.47fcccda1dd1fp+0, -0x1.6e13b3519946ep-2 * SCALE},
-{0x1.418ceabab68c1p+0, -0x1.50cb8281e4089p-2 * SCALE},
-{0x1.3b5c788f1edb3p+0, -0x1.341504a237e2bp-2 * SCALE},
-{0x1.3567de48e9c9ap+0, -0x1.17eaab624ffbbp-2 * SCALE},
-{0x1.2fabc80fd19bap+0, -0x1.f88e708f8c853p-3 * SCALE},
-{0x1.2a25200ce536bp+0, -0x1.c24b6da113914p-3 * SCALE},
-{0x1.24d108e0152e3p+0, -0x1.8d02ee397cb1dp-3 * SCALE},
-{0x1.1facd8ab2fbe1p+0, -0x1.58ac1223408b3p-3 * SCALE},
-{0x1.1ab614a03efdfp+0, -0x1.253e6fd190e89p-3 * SCALE},
-{0x1.15ea6d03af9ffp+0, -0x1.e5641882c12ffp-4 * SCALE},
-{0x1.1147b994bb776p+0, -0x1.81fea712926f7p-4 * SCALE},
-{0x1.0ccbf650593aap+0, -0x1.203e240de64a3p-4 * SCALE},
-{0x1.0875408477302p+0, -0x1.8029b86a78281p-5 * SCALE},
-{0x1.0441d42a93328p+0, -0x1.85d713190fb9p-6 * SCALE},
-{0x1p+0, 0x0p+0 * SCALE},
-{0x1.f1d006c855e86p-1, 0x1.4c1cc07312997p-5 * SCALE},
-{0x1.e28c3341aa301p-1, 0x1.5e1848ccec948p-4 * SCALE},
-{0x1.d4bdf9aa64747p-1, 0x1.04cfcb7f1196fp-3 * SCALE},
-{0x1.c7b45a24e5803p-1, 0x1.582813d463c21p-3 * SCALE},
-{0x1.bb5f5eb2ed60ap-1, 0x1.a936fa68760ccp-3 * SCALE},
-{0x1.afb0bff8fe6b4p-1, 0x1.f81bc31d6cc4ep-3 * SCALE},
-{0x1.a49badf7ab1f5p-1, 0x1.2279a09fae6b1p-2 * SCALE},
-{0x1.9a14a111fc4c9p-1, 0x1.47ec0b6df5526p-2 * SCALE},
-{0x1.901131f5b2fdcp-1, 0x1.6c71762280f1p-2 * SCALE},
-{0x1.8687f73f6d865p-1, 0x1.90155070798dap-2 * SCALE},
-{0x1.7d7067eb77986p-1, 0x1.b2e23b1d3068cp-2 * SCALE},
-{0x1.74c2c1cf97b65p-1, 0x1.d4e21b0daa86ap-2 * SCALE},
-{0x1.6c77f37cff2a1p-1, 0x1.f61e2a2f67f3fp-2 * SCALE},
-  },
-/* rel err: 1.5 * 2^-30 */
-  .poly = {
- -0x1.6ff5daa3b3d7cp-2 * SCALE,
- 0x1.ec81d03c01aebp-2 * SCALE,
- -0x1.71547bb43f101p-1 * SCALE,
- 0x1.7154764a815cbp0 * SCALE,
-  }
-#endif
-};
-
-static const struct v_exp2f_data v__exp2f_data = {
-  .tab = {
-0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51,
-0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1,
-0x3fef06fe0a31b715, 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,
-0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429, 0x3feea47eb03a5585,
-0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74, 0x3feea11473eb0187, 0x3feea589994cce13,
-0x3feeace5422aa0db, 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,
-0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c, 0x3fef3720dcef9069,
-0x3fef5818dcfba487, 0x3fef7c97337b9b5f, 0x3fefa4afa2a490da, 0x3fefd0765b6e4540,
-  },
-/* rel err: 1.69 * 2^-34 */
-  .poly = {
-0x1.c6af84b912394p-5/SCALE/SCALE/SCALE, 0x1.ebfce50fac4f3p-3/SCALE/SCALE, 0x1.62e42ff0c52d6p-1/SCALE
-  },
-};
-
-VPCS_ATTR
-__attribute__ ((noinline)) static v_f32_t
-specialcase (v_f32_t x, v_f32_t y, v_f32_t ret, v_u32_t cmp)
-{
-  return v_call2_f32 (powf, x, y, ret, cmp);
-}
-
-VPCS_ATTR
-v_f32_t
-V_NAME(powf) (v_f32_t x, v_f32_t y)
-{
-  v_u32_t u, tmp, cmp, i, top, iz;
-  v_s32_t k;
-  v_f32_t ret;
-
-  u = v_as_u32_f32 (x);
-  cmp = v_cond_u32 (u - Min >= Max - Min);
-  tmp = u - OFF;
-  i = (tmp >> (23 - TBITS)) % (1 << TBITS);
-  top = tmp & 0xff800000;
-  iz = u - top;
-  k = v_as_s32_u32 (top) >> (23 - SBITS); /* arithmetic shift */
-
-  for (int lane = 0; lane < v_lanes32 (); lane++)
-    {
-      uint32_t si, siz;
-      int32_t sk;
-      float sy;
-
-      /* Use double precision for each lane.  */
-      double invc, logc, z, r, p, y0, logx, ylogx, kd, s;
-      uint64_t ki, t;
-
-      si = v_get_u32 (i, lane);
-      siz = v_get_u32 (iz, lane);
-      sk = v_get_s32 (k, lane);
-      sy = v_get_f32 (y, lane);
-
-      invc = Tlog[si].invc;
-      logc = Tlog[si].logc;
-      z = (double) as_f32_u32 (siz);
-
-      /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
-      r = __builtin_fma (z, invc, -1.0);
-      y0 = logc + (double) sk;
-
-      /* Polynomial to approximate log1p(r)/ln2.  */
-#if LOGDEG == 5
-      logx = A[0];
-      logx = r * logx + A[1];
-      logx = r * logx + A[2];
-      logx = r * logx + A[3];
-      logx = r * logx + A[4];
-      logx = r * logx + y0;
-#elif LOGDEG == 4
-      logx = A[0];
-      logx = r * logx + A[1];
-      logx = r * logx + A[2];
-      logx = r * logx + A[3];
-      logx = r * logx + y0;
-#endif
-      ylogx = sy * logx;
-      v_set_u32 (&cmp, lane,
-		 (as_u64_f64 (ylogx) >> 47 & 0xffff)
-		     >= as_u64_f64 (126.0 * (1 << SBITS)) >> 47
-		   ? 1
-		   : v_get_u32 (cmp, lane));
-
-      /* N*x = k + r with r in [-1/2, 1/2] */
-#if TOINT_INTRINSICS
-      kd = roundtoint (ylogx); /* k */
-      ki = converttoint (ylogx);
-#else
-# define SHIFT 0x1.8p52
-      kd = eval_as_double (ylogx + SHIFT);
-      ki = asuint64 (kd);
-      kd -= SHIFT;
-#endif
-      r = ylogx - kd;
-
-      /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
-      t = Texp[ki % (1 << SBITS)];
-      t += ki << (52 - SBITS);
-      s = as_f64_u64 (t);
-      p = C[0];
-      p = __builtin_fma (p, r, C[1]);
-      p = __builtin_fma (p, r, C[2]);
-      p = __builtin_fma (p, s * r, s);
-
-      v_set_f32 (&ret, lane, p);
-    }
-  if (unlikely (v_any_u32 (cmp)))
-    return specialcase (x, y, ret, cmp);
-  return ret;
-}
-VPCS_ALIAS
-#endif
diff --git a/contrib/arm-optimized-routines/math/v_sin.c b/contrib/arm-optimized-routines/math/v_sin.c
deleted file mode 100644
index 2b9ed059189c..000000000000
--- a/contrib/arm-optimized-routines/math/v_sin.c
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Double-precision vector sin function.
- *
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-
-#include "mathlib.h"
-#include "v_math.h"
-#if V_SUPPORTED
-
-static const double Poly[] = {
-/* worst-case error is 3.5 ulp.
-   abs error: 0x1.be222a58p-53 in [-pi/2, pi/2].  */
--0x1.9f4a9c8b21dc9p-41,
- 0x1.60e88a10163f2p-33,
--0x1.ae6361b7254e7p-26,
- 0x1.71de382e8d62bp-19,
--0x1.a01a019aeb4ffp-13,
- 0x1.111111110b25ep-7,
--0x1.55555555554c3p-3,
-};
-
-#define C7 v_f64 (Poly[0])
-#define C6 v_f64 (Poly[1])
-#define C5 v_f64 (Poly[2])
-#define C4 v_f64 (Poly[3])
-#define C3 v_f64 (Poly[4])
-#define C2 v_f64 (Poly[5])
-#define C1 v_f64 (Poly[6])
-
-#define InvPi v_f64 (0x1.45f306dc9c883p-2)
-#define Pi1 v_f64 (0x1.921fb54442d18p+1)
-#define Pi2 v_f64 (0x1.1a62633145c06p-53)
-#define Pi3 v_f64 (0x1.c1cd129024e09p-106)
-#define Shift v_f64 (0x1.8p52)
-#define RangeVal v_f64 (0x1p23)
-#define AbsMask v_u64 (0x7fffffffffffffff)
-
-VPCS_ATTR
-__attribute__ ((noinline)) static v_f64_t
-specialcase (v_f64_t x, v_f64_t y, v_u64_t cmp)
-{
-  return v_call_f64 (sin, x, y, cmp);
-}
-
-VPCS_ATTR
-v_f64_t
-V_NAME(sin) (v_f64_t x)
-{
-  v_f64_t n, r, r2, y;
-  v_u64_t sign, odd, cmp;
-
-  r = v_as_f64_u64 (v_as_u64_f64 (x) & AbsMask);
-  sign = v_as_u64_f64 (x) & ~AbsMask;
-  cmp = v_cond_u64 (v_as_u64_f64 (r) >= v_as_u64_f64 (RangeVal));
-
-  /* n = rint(|x|/pi).  */
-  n = v_fma_f64 (InvPi, r, Shift);
-  odd = v_as_u64_f64 (n) << 63;
-  n -= Shift;
-
-  /* r = |x| - n*pi  (range reduction into -pi/2 .. pi/2).  */
-  r = v_fma_f64 (-Pi1, n, r);
-  r = v_fma_f64 (-Pi2, n, r);
-  r = v_fma_f64 (-Pi3, n, r);
-
-  /* sin(r) poly approx.  */
-  r2 = r * r;
-  y = v_fma_f64 (C7, r2, C6);
-  y = v_fma_f64 (y, r2, C5);
-  y = v_fma_f64 (y, r2, C4);
-  y = v_fma_f64 (y, r2, C3);
-  y = v_fma_f64 (y, r2, C2);
-  y = v_fma_f64 (y, r2, C1);
-  y = v_fma_f64 (y * r2, r, r);
-
-  /* sign.  */
-  y = v_as_f64_u64 (v_as_u64_f64 (y) ^ sign ^ odd);
-
-  if (unlikely (v_any_u64 (cmp)))
-    return specialcase (x, y, cmp);
-  return y;
-}
-VPCS_ALIAS
-#endif
diff --git a/contrib/arm-optimized-routines/math/v_sinf.c b/contrib/arm-optimized-routines/math/v_sinf.c
deleted file mode 100644
index e66bfce6d8aa..000000000000
--- a/contrib/arm-optimized-routines/math/v_sinf.c
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Single-precision vector sin function.
- *
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-
-#include "mathlib.h"
-#include "v_math.h"
-#if V_SUPPORTED
-
-static const float Poly[] = {
-  /* 1.886 ulp error */
-  0x1.5b2e76p-19f,
-  -0x1.9f42eap-13f,
-  0x1.110df4p-7f,
-  -0x1.555548p-3f,
-};
-#define Pi1 v_f32 (0x1.921fb6p+1f)
-#define Pi2 v_f32 (-0x1.777a5cp-24f)
-#define Pi3 v_f32 (-0x1.ee59dap-49f)
-#define A3 v_f32 (Poly[3])
-#define A5 v_f32 (Poly[2])
-#define A7 v_f32 (Poly[1])
-#define A9 v_f32 (Poly[0])
-#define RangeVal v_f32 (0x1p20f)
-#define InvPi v_f32 (0x1.45f306p-2f)
-#define Shift v_f32 (0x1.8p+23f)
-#define AbsMask v_u32 (0x7fffffff)
-
-VPCS_ATTR
-static v_f32_t
-specialcase (v_f32_t x, v_f32_t y, v_u32_t cmp)
-{
-  /* Fall back to scalar code.  */
-  return v_call_f32 (sinf, x, y, cmp);
-}
-
-VPCS_ATTR
-v_f32_t
-V_NAME(sinf) (v_f32_t x)
-{
-  v_f32_t n, r, r2, y;
-  v_u32_t sign, odd, cmp;
-
-  r = v_as_f32_u32 (v_as_u32_f32 (x) & AbsMask);
-  sign = v_as_u32_f32 (x) & ~AbsMask;
-  cmp = v_cond_u32 (v_as_u32_f32 (r) >= v_as_u32_f32 (RangeVal));
-
-  /* n = rint(|x|/pi) */
-  n = v_fma_f32 (InvPi, r, Shift);
-  odd = v_as_u32_f32 (n) << 31;
-  n -= Shift;
-
-  /* r = |x| - n*pi  (range reduction into -pi/2 .. pi/2) */
-  r = v_fma_f32 (-Pi1, n, r);
-  r = v_fma_f32 (-Pi2, n, r);
-  r = v_fma_f32 (-Pi3, n, r);
-
-  /* y = sin(r) */
-  r2 = r * r;
-  y = v_fma_f32 (A9, r2, A7);
-  y = v_fma_f32 (y, r2, A5);
-  y = v_fma_f32 (y, r2, A3);
-  y = v_fma_f32 (y * r2, r, r);
-
-  /* sign fix */
-  y = v_as_f32_u32 (v_as_u32_f32 (y) ^ sign ^ odd);
-
-  if (unlikely (v_any_u32 (cmp)))
-    return specialcase (x, y, cmp);
-  return y;
-}
-VPCS_ALIAS
-#endif
diff --git a/contrib/arm-optimized-routines/math/vn_cos.c b/contrib/arm-optimized-routines/math/vn_cos.c
deleted file mode 100644
index b57a549eba68..000000000000
--- a/contrib/arm-optimized-routines/math/vn_cos.c
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- * AdvSIMD vector PCS variant of __v_cos.
- *
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-#include "mathlib.h"
-#ifdef __vpcs
-#define VPCS 1
-#define VPCS_ALIAS strong_alias (__vn_cos, _ZGVnN2v_cos)
-#include "v_cos.c"
-#endif
diff --git a/contrib/arm-optimized-routines/math/vn_cosf.c b/contrib/arm-optimized-routines/math/vn_cosf.c
deleted file mode 100644
index 6321d4620fa7..000000000000
--- a/contrib/arm-optimized-routines/math/vn_cosf.c
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- * AdvSIMD vector PCS variant of __v_cosf.
- *
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-#include "mathlib.h"
-#ifdef __vpcs
-#define VPCS 1
-#define VPCS_ALIAS strong_alias (__vn_cosf, _ZGVnN4v_cosf)
-#include "v_cosf.c"
-#endif
diff --git a/contrib/arm-optimized-routines/math/vn_exp.c b/contrib/arm-optimized-routines/math/vn_exp.c
deleted file mode 100644
index 06e269d41766..000000000000
--- a/contrib/arm-optimized-routines/math/vn_exp.c
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- * AdvSIMD vector PCS variant of __v_exp.
- *
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-#include "mathlib.h"
-#ifdef __vpcs
-#define VPCS 1
-#define VPCS_ALIAS strong_alias (__vn_exp, _ZGVnN2v_exp)
-#include "v_exp.c"
-#endif
diff --git a/contrib/arm-optimized-routines/math/vn_exp2f.c b/contrib/arm-optimized-routines/math/vn_exp2f.c
deleted file mode 100644
index db9707e86f16..000000000000
--- a/contrib/arm-optimized-routines/math/vn_exp2f.c
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- * AdvSIMD vector PCS variant of __v_exp2f.
- *
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-#include "mathlib.h"
-#ifdef __vpcs
-#define VPCS 1
-#define VPCS_ALIAS strong_alias (__vn_exp2f, _ZGVnN4v_exp2f)
-#include "v_exp2f.c"
-#endif
diff --git a/contrib/arm-optimized-routines/math/vn_exp2f_1u.c b/contrib/arm-optimized-routines/math/vn_exp2f_1u.c
deleted file mode 100644
index 17bd0abd7a60..000000000000
--- a/contrib/arm-optimized-routines/math/vn_exp2f_1u.c
+++ /dev/null
@@ -1,11 +0,0 @@
-/*
- * AdvSIMD vector PCS variant of __v_exp2f_1u.
- *
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-#include "mathlib.h"
-#ifdef __vpcs
-#define VPCS 1
-#include "v_exp2f_1u.c"
-#endif
diff --git a/contrib/arm-optimized-routines/math/vn_expf.c b/contrib/arm-optimized-routines/math/vn_expf.c
deleted file mode 100644
index 0652907225d9..000000000000
--- a/contrib/arm-optimized-routines/math/vn_expf.c
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- * AdvSIMD vector PCS variant of __v_expf.
- *
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-#include "mathlib.h"
-#ifdef __vpcs
-#define VPCS 1
-#define VPCS_ALIAS strong_alias (__vn_expf, _ZGVnN4v_expf)
-#include "v_expf.c"
-#endif
diff --git a/contrib/arm-optimized-routines/math/vn_expf_1u.c b/contrib/arm-optimized-routines/math/vn_expf_1u.c
deleted file mode 100644
index 3be776814822..000000000000
--- a/contrib/arm-optimized-routines/math/vn_expf_1u.c
+++ /dev/null
@@ -1,11 +0,0 @@
-/*
- * AdvSIMD vector PCS variant of __v_expf_1u.
- *
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-#include "mathlib.h"
-#ifdef __vpcs
-#define VPCS 1
-#include "v_expf_1u.c"
-#endif
diff --git a/contrib/arm-optimized-routines/math/vn_log.c b/contrib/arm-optimized-routines/math/vn_log.c
deleted file mode 100644
index b58fe8ff820a..000000000000
--- a/contrib/arm-optimized-routines/math/vn_log.c
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- * AdvSIMD vector PCS variant of __v_log.
- *
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-#include "mathlib.h"
-#ifdef __vpcs
-#define VPCS 1
-#define VPCS_ALIAS strong_alias (__vn_log, _ZGVnN2v_log)
-#include "v_log.c"
-#endif
diff --git a/contrib/arm-optimized-routines/math/vn_logf.c b/contrib/arm-optimized-routines/math/vn_logf.c
deleted file mode 100644
index cc5b8ae3ed55..000000000000
--- a/contrib/arm-optimized-routines/math/vn_logf.c
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- * AdvSIMD vector PCS variant of __v_logf.
- *
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-#include "mathlib.h"
-#ifdef __vpcs
-#define VPCS 1
-#define VPCS_ALIAS strong_alias (__vn_logf, _ZGVnN4v_logf)
-#include "v_logf.c"
-#endif
diff --git a/contrib/arm-optimized-routines/math/vn_pow.c b/contrib/arm-optimized-routines/math/vn_pow.c
deleted file mode 100644
index 260950113b04..000000000000
--- a/contrib/arm-optimized-routines/math/vn_pow.c
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- * AdvSIMD vector PCS variant of __v_pow.
- *
- * Copyright (c) 2020, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-#include "mathlib.h"
-#ifdef __vpcs
-#define VPCS 1
-#define VPCS_ALIAS strong_alias (__vn_pow, _ZGVnN2vv_pow)
-#include "v_pow.c"
-#endif
diff --git a/contrib/arm-optimized-routines/math/vn_powf.c b/contrib/arm-optimized-routines/math/vn_powf.c
deleted file mode 100644
index 095d07e337ad..000000000000
--- a/contrib/arm-optimized-routines/math/vn_powf.c
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- * AdvSIMD vector PCS variant of __v_powf.
- *
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-#include "mathlib.h"
-#ifdef __vpcs
-#define VPCS 1
-#define VPCS_ALIAS strong_alias (__vn_powf, _ZGVnN4vv_powf)
-#include "v_powf.c"
-#endif
diff --git a/contrib/arm-optimized-routines/math/vn_sin.c b/contrib/arm-optimized-routines/math/vn_sin.c
deleted file mode 100644
index 905c79623350..000000000000
--- a/contrib/arm-optimized-routines/math/vn_sin.c
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- * AdvSIMD vector PCS variant of __v_sin.
- *
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-#include "mathlib.h"
-#ifdef __vpcs
-#define VPCS 1
-#define VPCS_ALIAS strong_alias (__vn_sin, _ZGVnN2v_sin)
-#include "v_sin.c"
-#endif
diff --git a/contrib/arm-optimized-routines/math/vn_sinf.c b/contrib/arm-optimized-routines/math/vn_sinf.c
deleted file mode 100644
index 1214e1a55638..000000000000
--- a/contrib/arm-optimized-routines/math/vn_sinf.c
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- * AdvSIMD vector PCS variant of __v_sinf.
- *
- * Copyright (c) 2019, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-#include "mathlib.h"
-#ifdef __vpcs
-#define VPCS 1
-#define VPCS_ALIAS strong_alias (__vn_sinf, _ZGVnN4v_sinf)
-#include "v_sinf.c"
-#endif