aboutsummaryrefslogtreecommitdiff
path: root/contrib/arm-optimized-routines/math/tools
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/arm-optimized-routines/math/tools')
-rw-r--r--contrib/arm-optimized-routines/math/tools/asin.sollya29
-rw-r--r--contrib/arm-optimized-routines/math/tools/asinf.sollya36
-rw-r--r--contrib/arm-optimized-routines/math/tools/asinh.sollya28
-rw-r--r--contrib/arm-optimized-routines/math/tools/asinhf.sollya29
-rw-r--r--contrib/arm-optimized-routines/math/tools/atan.sollya23
-rw-r--r--contrib/arm-optimized-routines/math/tools/atanf.sollya20
-rw-r--r--contrib/arm-optimized-routines/math/tools/cbrt.sollya20
-rw-r--r--contrib/arm-optimized-routines/math/tools/cbrtf.sollya20
-rw-r--r--contrib/arm-optimized-routines/math/tools/cos.sollya2
-rw-r--r--contrib/arm-optimized-routines/math/tools/erf.sollya25
-rw-r--r--contrib/arm-optimized-routines/math/tools/erfc.sollya51
-rw-r--r--contrib/arm-optimized-routines/math/tools/erfcf.sollya22
-rw-r--r--contrib/arm-optimized-routines/math/tools/erff.sollya20
-rw-r--r--contrib/arm-optimized-routines/math/tools/exp.sollya2
-rw-r--r--contrib/arm-optimized-routines/math/tools/exp10.sollya55
-rw-r--r--contrib/arm-optimized-routines/math/tools/exp2.sollya2
-rw-r--r--contrib/arm-optimized-routines/math/tools/expm1.sollya21
-rw-r--r--contrib/arm-optimized-routines/math/tools/expm1f.sollya21
-rw-r--r--contrib/arm-optimized-routines/math/tools/log.sollya2
-rw-r--r--contrib/arm-optimized-routines/math/tools/log10.sollya44
-rw-r--r--contrib/arm-optimized-routines/math/tools/log10f.sollya37
-rw-r--r--contrib/arm-optimized-routines/math/tools/log1p.sollya30
-rw-r--r--contrib/arm-optimized-routines/math/tools/log1pf.sollya21
-rw-r--r--contrib/arm-optimized-routines/math/tools/log2.sollya2
-rw-r--r--contrib/arm-optimized-routines/math/tools/log2_abs.sollya2
-rw-r--r--contrib/arm-optimized-routines/math/tools/log_abs.sollya2
-rwxr-xr-xcontrib/arm-optimized-routines/math/tools/plot.py2
-rwxr-xr-xcontrib/arm-optimized-routines/math/tools/remez.jl2
-rw-r--r--contrib/arm-optimized-routines/math/tools/sin.sollya2
-rw-r--r--contrib/arm-optimized-routines/math/tools/sincos.sollya33
-rw-r--r--contrib/arm-optimized-routines/math/tools/sincosf.sollya33
-rw-r--r--contrib/arm-optimized-routines/math/tools/sinpi.sollya33
-rw-r--r--contrib/arm-optimized-routines/math/tools/tan.sollya20
-rw-r--r--contrib/arm-optimized-routines/math/tools/tanf.sollya78
-rw-r--r--contrib/arm-optimized-routines/math/tools/tanpi.sollya48
-rw-r--r--contrib/arm-optimized-routines/math/tools/tgamma128_gen.jl212
-rw-r--r--contrib/arm-optimized-routines/math/tools/v_erf.sollya20
-rw-r--r--contrib/arm-optimized-routines/math/tools/v_erfc.sollya46
-rw-r--r--contrib/arm-optimized-routines/math/tools/v_exp.sollya2
-rw-r--r--contrib/arm-optimized-routines/math/tools/v_log.sollya2
-rw-r--r--contrib/arm-optimized-routines/math/tools/v_log10.sollya38
-rw-r--r--contrib/arm-optimized-routines/math/tools/v_log10f.sollya45
-rw-r--r--contrib/arm-optimized-routines/math/tools/v_log2f.sollya38
-rw-r--r--contrib/arm-optimized-routines/math/tools/v_sin.sollya2
44 files changed, 1209 insertions, 13 deletions
diff --git a/contrib/arm-optimized-routines/math/tools/asin.sollya b/contrib/arm-optimized-routines/math/tools/asin.sollya
new file mode 100644
index 000000000000..02c4a93356c3
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/tools/asin.sollya
@@ -0,0 +1,29 @@
+// polynomial for approximating asin(x)
+//
+// Copyright (c) 2023-2024, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+f = asin(x);
+dtype = double;
+
+prec=256;
+
+a = 0x1p-106;
+b = 0.25;
+
+deg = 11;
+
+backward = proc(poly, d) {
+ return d + d ^ 3 * poly(d * d);
+};
+
+forward = proc(f, d) {
+ return (f(sqrt(d))-sqrt(d))/(d*sqrt(d));
+};
+
+poly = fpminimax(forward(f, x), [|0,...,deg|], [|dtype ...|], [a;b], relative, floating);
+
+display = hexadecimal!;
+print("rel error:", dirtyinfnorm(1-backward(poly, x)/f(x), [a;b]));
+print("in [", a, b, "]");
+for i from 0 to deg do print(coeff(poly, i));
diff --git a/contrib/arm-optimized-routines/math/tools/asinf.sollya b/contrib/arm-optimized-routines/math/tools/asinf.sollya
new file mode 100644
index 000000000000..69d1803875d1
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/tools/asinf.sollya
@@ -0,0 +1,36 @@
+// polynomial for approximating asinf(x)
+//
+// Copyright (c) 2023-2024, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+f = asin(x);
+dtype = single;
+
+a = 0x1p-24;
+b = 0.25;
+
+deg = 4;
+
+backward = proc(poly, d) {
+ return d + d ^ 3 * poly(d * d);
+};
+
+forward = proc(f, d) {
+ return (f(sqrt(d))-sqrt(d))/(d*sqrt(d));
+};
+
+approx = proc(poly, d) {
+ return remez(1 - poly(x) / forward(f, x), deg - d, [a;b], x^d/forward(f, x), 1e-16);
+};
+
+poly = 0;
+for i from 0 to deg do {
+ i;
+ p = roundcoefficients(approx(poly,i), [|dtype ...|]);
+ poly = poly + x^i*coeff(p,0);
+};
+
+display = hexadecimal!;
+print("rel error:", accurateinfnorm(1-backward(poly, x)/f(x), [a;b], 30));
+print("in [", a, b, "]");
+for i from 0 to deg do print(coeff(poly, i));
diff --git a/contrib/arm-optimized-routines/math/tools/asinh.sollya b/contrib/arm-optimized-routines/math/tools/asinh.sollya
new file mode 100644
index 000000000000..eea9b8081168
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/tools/asinh.sollya
@@ -0,0 +1,28 @@
+// polynomial for approximating asinh(x)
+//
+// Copyright (c) 2022-2024, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+// Polynomial is used in [2^-26, 1]. However it is least accurate close to 1, so
+// we use 2^-6 as the lower bound for coeff generation, which yields sufficiently
+// accurate results in [2^-26, 2^-6].
+a = 0x1p-6;
+b = 1.0;
+
+f = (asinh(sqrt(x)) - sqrt(x))/x^(3/2);
+
+approx = proc(poly, d) {
+ return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
+};
+
+poly = 0;
+for i from 0 to deg do {
+ i;
+ p = roundcoefficients(approx(poly,i), [|D ...|]);
+ poly = poly + x^i*coeff(p,0);
+};
+
+
+display = hexadecimal;
+print("coeffs:");
+for i from 0 to deg do coeff(poly,i);
diff --git a/contrib/arm-optimized-routines/math/tools/asinhf.sollya b/contrib/arm-optimized-routines/math/tools/asinhf.sollya
new file mode 100644
index 000000000000..5f1580fce883
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/tools/asinhf.sollya
@@ -0,0 +1,29 @@
+// polynomial for approximating asinh(x)
+//
+// Copyright (c) 2022-2024, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 9;
+
+a = 0x1.0p-12;
+b = 1.0;
+
+f = proc(y) {
+ return asinh(x);
+};
+
+approx = proc(poly, d) {
+ return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
+};
+
+poly = x;
+for i from 2 to deg do {
+ p = roundcoefficients(approx(poly,i), [|SG ...|]);
+ poly = poly + x^i*coeff(p,0);
+};
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 2 to deg do coeff(poly,i);
diff --git a/contrib/arm-optimized-routines/math/tools/atan.sollya b/contrib/arm-optimized-routines/math/tools/atan.sollya
new file mode 100644
index 000000000000..048017d8d269
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/tools/atan.sollya
@@ -0,0 +1,23 @@
+// polynomial for approximating atan(x) and atan2(y, x)
+//
+// Copyright (c) 2022-2024, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+// atan is odd, so approximate with an odd polynomial:
+// x + ax^3 + bx^5 + cx^7 + ...
+// We generate a, b, c, ... such that we can approximate atan(x) by:
+// x + x^3 * (a + bx^2 + cx^4 + ...)
+
+// Assemble monomials
+deg = 20;
+mons = [|1,...,deg|];
+for i from 0 to deg-1 do mons[i] = mons[i] * 2 + 1;
+
+a = 0x1.0p-1022;
+b = 1;
+
+poly = fpminimax(atan(x)-x, mons, [|double ...|], [a;b]);
+
+display = hexadecimal;
+print("coeffs:");
+for i from 0 to deg-1 do coeff(poly,mons[i]);
diff --git a/contrib/arm-optimized-routines/math/tools/atanf.sollya b/contrib/arm-optimized-routines/math/tools/atanf.sollya
new file mode 100644
index 000000000000..21c3ba2bfa1d
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/tools/atanf.sollya
@@ -0,0 +1,20 @@
+// polynomial for approximating atanf(x)
+//
+// Copyright (c) 2022-2024, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+// Generate list of monomials:
+// Taylor series of atan is of the form x + ax^3 + bx^5 + cx^7 + ...
+// So generate a, b, c, ... such that we can approximate atan(x) by:
+// x + x^3 * (a + bx^2 + cx^4 + ...)
+
+deg = 7;
+
+a = 1.1754943508222875e-38;
+b = 1;
+
+poly = fpminimax((atan(sqrt(x))-sqrt(x))/x^(3/2), deg, [|single ...|], [a;b]);
+
+display = hexadecimal;
+print("coeffs:");
+for i from 0 to deg do coeff(poly,i);
diff --git a/contrib/arm-optimized-routines/math/tools/cbrt.sollya b/contrib/arm-optimized-routines/math/tools/cbrt.sollya
new file mode 100644
index 000000000000..2490a69ac029
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/tools/cbrt.sollya
@@ -0,0 +1,20 @@
+// polynomial for approximating cbrt(x) in double precision
+//
+// Copyright (c) 2022-2024, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 3;
+
+a = 0.5;
+b = 1;
+
+
+f = x^(1/3);
+
+poly = fpminimax(f, deg, [|double ...|], [a;b]);
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do round(coeff(poly,i), D, RN);
diff --git a/contrib/arm-optimized-routines/math/tools/cbrtf.sollya b/contrib/arm-optimized-routines/math/tools/cbrtf.sollya
new file mode 100644
index 000000000000..1debf930e722
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/tools/cbrtf.sollya
@@ -0,0 +1,20 @@
+// polynomial for approximating cbrt(x) in single precision
+//
+// Copyright (c) 2022-2024, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 3;
+
+a = 0.5;
+b = 1;
+
+
+f = x^(1/3);
+
+poly = fpminimax(f, deg, [|single ...|], [a;b]);
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do round(coeff(poly,i), SG, RN);
diff --git a/contrib/arm-optimized-routines/math/tools/cos.sollya b/contrib/arm-optimized-routines/math/tools/cos.sollya
index bd72d6b74820..6690adfcbb9b 100644
--- a/contrib/arm-optimized-routines/math/tools/cos.sollya
+++ b/contrib/arm-optimized-routines/math/tools/cos.sollya
@@ -1,7 +1,7 @@
// polynomial for approximating cos(x)
//
// Copyright (c) 2019, Arm Limited.
-// SPDX-License-Identifier: MIT
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
deg = 8; // polynomial degree
a = -pi/4; // interval
diff --git a/contrib/arm-optimized-routines/math/tools/erf.sollya b/contrib/arm-optimized-routines/math/tools/erf.sollya
new file mode 100644
index 000000000000..060e1686c835
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/tools/erf.sollya
@@ -0,0 +1,25 @@
+// tables and constants for approximating erf(x).
+//
+// Copyright (c) 2023-2024, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+display = hexadecimal;
+prec=128;
+
+// Tables
+print("{ i, r, erf(r), 2/sqrt(pi) * exp(-r^2)}");
+for i from 0 to 768 do {
+ r = i / 128;
+ t0 = double(erf(r));
+ t1 = double(2/sqrt(pi) * exp(-r * r));
+ print("{ " @ i @ ",\t" @ r @ ",\t" @ t0 @ ",\t" @ t1 @ " },");
+};
+
+// Constants
+double(1/3);
+double(1/10);
+double(2/15);
+double(2/9);
+double(2/45);
+double(2/sqrt(pi));
+
diff --git a/contrib/arm-optimized-routines/math/tools/erfc.sollya b/contrib/arm-optimized-routines/math/tools/erfc.sollya
new file mode 100644
index 000000000000..1b4b00066093
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/tools/erfc.sollya
@@ -0,0 +1,51 @@
+// tables and constants for approximating erfc(x).
+//
+// Copyright (c) 2023-2024, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+display = hexadecimal;
+prec=128;
+
+// Tables
+print("{ i, r, erfc(r), 2/sqrt(pi) * exp(-r^2) }");
+for i from 0 to 3787 do {
+ r = 0.0 + i / 128;
+ t0 = double(erfc(r) * 2^128);
+ t1 = double(2/sqrt(pi) * exp(-r * r) * 2^128);
+ print("{ " @ t0 @ ",\t" @ t1 @ " },");
+};
+
+// Constants
+print("> 2/sqrt(pi)");
+double(2/sqrt(pi));
+
+print("> 1/3");
+double(1/3);
+
+print("> P5");
+double(2/15);
+double(1/10);
+double(2/9);
+double(2/45);
+
+print("> P6");
+double(1/42);
+double(1/7);
+double(2/21);
+double(4/315);
+
+print("> Q");
+double( 5.0 / 4.0);
+double( 6.0 / 5.0);
+double( 7.0 / 6.0);
+double( 8.0 / 7.0);
+double( 9.0 / 8.0);
+double(10.0 / 9.0);
+
+print("> R");
+double(-2.0 * 4.0 / (5.0 * 6.0));
+double(-2.0 * 5.0 / (6.0 * 7.0));
+double(-2.0 * 6.0 / (7.0 * 8.0));
+double(-2.0 * 7.0 / (8.0 * 9.0));
+double(-2.0 * 8.0 / (9.0 * 10.0));
+double(-2.0 * 9.0 / (10.0 * 11.0));
diff --git a/contrib/arm-optimized-routines/math/tools/erfcf.sollya b/contrib/arm-optimized-routines/math/tools/erfcf.sollya
new file mode 100644
index 000000000000..a8e0409f5db5
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/tools/erfcf.sollya
@@ -0,0 +1,22 @@
+// tables and constants for approximating erfcf(x).
+//
+// Copyright (c) 2023-2024, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+display = hexadecimal;
+prec=128;
+
+// Tables
+print("{ i, r, erfc(r), 2/sqrt(pi) * exp(-r^2) }");
+for i from 0 to 644 do {
+ r = 0.0 + i / 64;
+ t0 = single(erfc(r) * 2^47);
+ t1 = single(2/sqrt(pi) * exp(-r * r) * 2^47);
+ print("{ " @ t0 @ ",\t" @ t1 @ " },");
+};
+
+// Constants
+single(1/3);
+single(2/15);
+single(1/10);
+single(2/sqrt(pi));
diff --git a/contrib/arm-optimized-routines/math/tools/erff.sollya b/contrib/arm-optimized-routines/math/tools/erff.sollya
new file mode 100644
index 000000000000..c0178a2b24ad
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/tools/erff.sollya
@@ -0,0 +1,20 @@
+// tables and constants for approximating erff(x).
+//
+// Copyright (c) 2023-2024, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+display = hexadecimal;
+prec=128;
+
+// Tables
+print("{ i, r, erf(r), 2/sqrt(pi) * exp(-r^2)}");
+for i from 0 to 512 do {
+ r = i / 128;
+ t0 = single(erf(r));
+ t1 = single(2/sqrt(pi) * exp(-r * r));
+ print("{ " @ i @ ",\t" @ r @ ",\t" @ t0 @ ",\t" @ t1 @ " },");
+};
+
+// Constants
+single(1/3);
+single(2/sqrt(pi));
diff --git a/contrib/arm-optimized-routines/math/tools/exp.sollya b/contrib/arm-optimized-routines/math/tools/exp.sollya
index b7a462cda5a4..0668bdb5b3d3 100644
--- a/contrib/arm-optimized-routines/math/tools/exp.sollya
+++ b/contrib/arm-optimized-routines/math/tools/exp.sollya
@@ -1,7 +1,7 @@
// polynomial for approximating e^x
//
// Copyright (c) 2019, Arm Limited.
-// SPDX-License-Identifier: MIT
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
deg = 5; // poly degree
N = 128; // table entries
diff --git a/contrib/arm-optimized-routines/math/tools/exp10.sollya b/contrib/arm-optimized-routines/math/tools/exp10.sollya
new file mode 100644
index 000000000000..91f92595b96d
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/tools/exp10.sollya
@@ -0,0 +1,55 @@
+// polynomial for approximating 10^x
+//
+// Copyright (c) 2023-2024, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+// exp10f parameters
+deg = 5; // poly degree
+N = 1; // Neon 1, SVE 64
+b = log(2)/(2 * N * log(10)); // interval
+a = -b;
+wp = single;
+
+// exp10 parameters
+//deg = 4; // poly degree - bump to 5 for ~1 ULP
+//N = 128; // table size
+//b = log(2)/(2 * N * log(10)); // interval
+//a = -b;
+//wp = D;
+
+
+// find polynomial with minimal relative error
+
+f = 10^x;
+
+// return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
+approx = proc(poly,d) {
+ return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
+};
+// return p that minimizes |f(x) - poly(x) - x^d*p(x)|
+approx_abs = proc(poly,d) {
+ return remez(f(x) - poly(x), deg-d, [a;b], x^d, 1e-10);
+};
+
+// first coeff is fixed, iteratively find optimal double prec coeffs
+poly = 1;
+for i from 1 to deg do {
+ p = roundcoefficients(approx(poly,i), [|wp ...|]);
+// p = roundcoefficients(approx_abs(poly,i), [|wp ...|]);
+ poly = poly + x^i*coeff(p,0);
+};
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/10^x, [a;b], 30));
+print("abs error:", accurateinfnorm(10^x-poly(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do coeff(poly,i);
+
+log10_2 = round(N * log(10) / log(2), wp, RN);
+log2_10 = log(2) / (N * log(10));
+log2_10_hi = round(log2_10, wp, RN);
+log2_10_lo = round(log2_10 - log2_10_hi, wp, RN);
+print(log10_2);
+print(log2_10_hi);
+print(log2_10_lo);
diff --git a/contrib/arm-optimized-routines/math/tools/exp2.sollya b/contrib/arm-optimized-routines/math/tools/exp2.sollya
index e760769601d4..bd0a42d6bbcb 100644
--- a/contrib/arm-optimized-routines/math/tools/exp2.sollya
+++ b/contrib/arm-optimized-routines/math/tools/exp2.sollya
@@ -1,7 +1,7 @@
// polynomial for approximating 2^x
//
// Copyright (c) 2019, Arm Limited.
-// SPDX-License-Identifier: MIT
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
// exp2f parameters
deg = 3; // poly degree
diff --git a/contrib/arm-optimized-routines/math/tools/expm1.sollya b/contrib/arm-optimized-routines/math/tools/expm1.sollya
new file mode 100644
index 000000000000..d87466a066af
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/tools/expm1.sollya
@@ -0,0 +1,21 @@
+// polynomial for approximating exp(x)-1 in double precision
+//
+// Copyright (c) 2022-2024, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 12;
+
+a = -log(2)/2;
+b = log(2)/2;
+
+f = proc(y) {
+ return exp(y)-1;
+};
+
+poly = fpminimax(f(x), deg, [|double ...|], [a;b]);
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 2 to deg do round(coeff(poly,i), D, RN);
diff --git a/contrib/arm-optimized-routines/math/tools/expm1f.sollya b/contrib/arm-optimized-routines/math/tools/expm1f.sollya
new file mode 100644
index 000000000000..bb9496f3f2c4
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/tools/expm1f.sollya
@@ -0,0 +1,21 @@
+// polynomial for approximating exp(x)-1 in single precision
+//
+// Copyright (c) 2022-2024, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 5;
+
+a = -log(2)/2;
+b = log(2)/2;
+
+f = proc(y) {
+ return exp(y)-1;
+};
+
+poly = fpminimax(f(x), deg, [|single ...|], [a;b]);
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 2 to deg do round(coeff(poly,i), SG, RN);
diff --git a/contrib/arm-optimized-routines/math/tools/log.sollya b/contrib/arm-optimized-routines/math/tools/log.sollya
index 6df4db44b6f3..5288f5572925 100644
--- a/contrib/arm-optimized-routines/math/tools/log.sollya
+++ b/contrib/arm-optimized-routines/math/tools/log.sollya
@@ -1,7 +1,7 @@
// polynomial for approximating log(1+x)
//
// Copyright (c) 2019, Arm Limited.
-// SPDX-License-Identifier: MIT
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
deg = 12; // poly degree
// |log(1+x)| > 0x1p-4 outside the interval
diff --git a/contrib/arm-optimized-routines/math/tools/log10.sollya b/contrib/arm-optimized-routines/math/tools/log10.sollya
new file mode 100644
index 000000000000..78f956b14b95
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/tools/log10.sollya
@@ -0,0 +1,44 @@
+// polynomial for approximating log10(1+x)
+//
+// Copyright (c) 2019-2024, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 6; // poly degree
+// |log10(1+x)| > 0x1p-5 outside the interval
+a = -0x1.p-5;
+b = 0x1.p-5;
+
+ln10 = evaluate(log(10),0);
+invln10hi = double(1/ln10 + 0x1p21) - 0x1p21; // round away last 21 bits
+invln10lo = double(1/ln10 - invln10hi);
+
+// find log10(1+x)/x polynomial with minimal relative error
+// (minimal relative error polynomial for log10(1+x) is the same * x)
+deg = deg-1; // because of /x
+
+// f = log(1+x)/x; using taylor series
+f = 0;
+for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
+f = f/ln10;
+
+// return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
+approx = proc(poly,d) {
+ return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
+};
+
+// first coeff is fixed, iteratively find optimal double prec coeffs
+poly = invln10hi + invln10lo;
+for i from 1 to deg do {
+ p = roundcoefficients(approx(poly,i), [|D ...|]);
+ poly = poly + x^i*coeff(p,0);
+};
+display = hexadecimal;
+print("invln10hi:", invln10hi);
+print("invln10lo:", invln10lo);
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do coeff(poly,i);
+
+display = decimal;
+print("in [",a,b,"]");
diff --git a/contrib/arm-optimized-routines/math/tools/log10f.sollya b/contrib/arm-optimized-routines/math/tools/log10f.sollya
new file mode 100644
index 000000000000..c64a30aa8e18
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/tools/log10f.sollya
@@ -0,0 +1,37 @@
+// polynomial for approximating log10f(1+x)
+//
+// Copyright (c) 2019-2024, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+// Computation of log10f(1+x) will be carried out in double precision
+
+deg = 4; // poly degree
+// [OFF; 2*OFF] is divided in 2^4 intervals with OFF~0.7
+a = -0.04375;
+b = 0.04375;
+
+// find log(1+x)/x polynomial with minimal relative error
+// (minimal relative error polynomial for log(1+x) is the same * x)
+deg = deg-1; // because of /x
+
+// f = log(1+x)/x; using taylor series
+f = 0;
+for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
+
+// return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
+approx = proc(poly,d) {
+ return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
+};
+
+// first coeff is fixed, iteratively find optimal double prec coeffs
+poly = 1;
+for i from 1 to deg do {
+ p = roundcoefficients(approx(poly,i), [|D ...|]);
+ poly = poly + x^i*coeff(p,0);
+};
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do double(coeff(poly,i));
diff --git a/contrib/arm-optimized-routines/math/tools/log1p.sollya b/contrib/arm-optimized-routines/math/tools/log1p.sollya
new file mode 100644
index 000000000000..0cf72081fabb
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/tools/log1p.sollya
@@ -0,0 +1,30 @@
+// polynomial for approximating log(1+x) in double precision
+//
+// Copyright (c) 2022-2024, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 20;
+
+a = sqrt(2)/2-1;
+b = sqrt(2)-1;
+
+f = proc(y) {
+ return log(1+y);
+};
+
+approx = proc(poly, d) {
+ return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
+};
+
+poly = x;
+for i from 2 to deg do {
+ p = roundcoefficients(approx(poly,i), [|D ...|]);
+ poly = poly + x^i*coeff(p,0);
+};
+
+
+print("coeffs:");
+display = hexadecimal;
+for i from 2 to deg do coeff(poly,i);
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
diff --git a/contrib/arm-optimized-routines/math/tools/log1pf.sollya b/contrib/arm-optimized-routines/math/tools/log1pf.sollya
new file mode 100644
index 000000000000..fc542c937111
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/tools/log1pf.sollya
@@ -0,0 +1,21 @@
+// polynomial for approximating log(1+x) in single precision
+//
+// Copyright (c) 2022-2024, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 10;
+
+a = -0.25;
+b = 0.5;
+
+f = proc(y) {
+ return log(1+y);
+};
+
+poly = fpminimax(f(x), deg, [|single ...|], [a;b]);
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 2 to deg do round(coeff(poly,i), SG, RN);
diff --git a/contrib/arm-optimized-routines/math/tools/log2.sollya b/contrib/arm-optimized-routines/math/tools/log2.sollya
index 4a364c0f111f..85811be5d90c 100644
--- a/contrib/arm-optimized-routines/math/tools/log2.sollya
+++ b/contrib/arm-optimized-routines/math/tools/log2.sollya
@@ -1,7 +1,7 @@
// polynomial for approximating log2(1+x)
//
// Copyright (c) 2019, Arm Limited.
-// SPDX-License-Identifier: MIT
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
deg = 11; // poly degree
// |log2(1+x)| > 0x1p-4 outside the interval
diff --git a/contrib/arm-optimized-routines/math/tools/log2_abs.sollya b/contrib/arm-optimized-routines/math/tools/log2_abs.sollya
index 82c4dac26fa1..d018ba0145d2 100644
--- a/contrib/arm-optimized-routines/math/tools/log2_abs.sollya
+++ b/contrib/arm-optimized-routines/math/tools/log2_abs.sollya
@@ -1,7 +1,7 @@
// polynomial for approximating log2(1+x)
//
// Copyright (c) 2019, Arm Limited.
-// SPDX-License-Identifier: MIT
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
deg = 7; // poly degree
// interval ~= 1/(2*N), where N is the table entries
diff --git a/contrib/arm-optimized-routines/math/tools/log_abs.sollya b/contrib/arm-optimized-routines/math/tools/log_abs.sollya
index a2ac190fc497..5f9bfe41a683 100644
--- a/contrib/arm-optimized-routines/math/tools/log_abs.sollya
+++ b/contrib/arm-optimized-routines/math/tools/log_abs.sollya
@@ -1,7 +1,7 @@
// polynomial for approximating log(1+x)
//
// Copyright (c) 2019, Arm Limited.
-// SPDX-License-Identifier: MIT
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
deg = 6; // poly degree
// interval ~= 1/(2*N), where N is the table entries
diff --git a/contrib/arm-optimized-routines/math/tools/plot.py b/contrib/arm-optimized-routines/math/tools/plot.py
index 6c8b89ff284b..a0fa02322560 100755
--- a/contrib/arm-optimized-routines/math/tools/plot.py
+++ b/contrib/arm-optimized-routines/math/tools/plot.py
@@ -3,7 +3,7 @@
# ULP error plot tool.
#
# Copyright (c) 2019, Arm Limited.
-# SPDX-License-Identifier: MIT
+# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
import numpy as np
import matplotlib.pyplot as plt
diff --git a/contrib/arm-optimized-routines/math/tools/remez.jl b/contrib/arm-optimized-routines/math/tools/remez.jl
index 2ff436f5287f..1deab67d0660 100755
--- a/contrib/arm-optimized-routines/math/tools/remez.jl
+++ b/contrib/arm-optimized-routines/math/tools/remez.jl
@@ -4,7 +4,7 @@
# remez.jl - implementation of the Remez algorithm for polynomial approximation
#
# Copyright (c) 2015-2019, Arm Limited.
-# SPDX-License-Identifier: MIT
+# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
import Base.\
diff --git a/contrib/arm-optimized-routines/math/tools/sin.sollya b/contrib/arm-optimized-routines/math/tools/sin.sollya
index a6e851145c11..a19300019867 100644
--- a/contrib/arm-optimized-routines/math/tools/sin.sollya
+++ b/contrib/arm-optimized-routines/math/tools/sin.sollya
@@ -1,7 +1,7 @@
// polynomial for approximating sin(x)
//
// Copyright (c) 2019, Arm Limited.
-// SPDX-License-Identifier: MIT
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
deg = 7; // polynomial degree
a = -pi/4; // interval
diff --git a/contrib/arm-optimized-routines/math/tools/sincos.sollya b/contrib/arm-optimized-routines/math/tools/sincos.sollya
new file mode 100644
index 000000000000..600368507f4e
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/tools/sincos.sollya
@@ -0,0 +1,33 @@
+// polynomial for approximating cos(x)
+//
+// Copyright (c) 2023-2024, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+// This script only finds the coeffs for cos - see math/aarch64/advsimd/sin.c for sin coeffs
+
+deg = 14; // polynomial degree
+a = -pi/4; // interval
+b = pi/4;
+
+// find even polynomial with minimal abs error compared to cos(x)
+
+f = cos(x);
+
+// return p that minimizes |f(x) - poly(x) - x^d*p(x)|
+approx = proc(poly,d) {
+ return remez(f(x)-poly(x), deg-d, [a;b], x^d, 1e-10);
+};
+
+// first coeff is fixed, iteratively find optimal double prec coeffs
+poly = 1;
+for i from 1 to deg/2 do {
+ p = roundcoefficients(approx(poly,2*i), [|double ...|]);
+ poly = poly + x^(2*i)*coeff(p,0);
+};
+
+display = hexadecimal;
+//print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+//print("abs error:", accurateinfnorm(f(x)-poly(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do coeff(poly,i);
diff --git a/contrib/arm-optimized-routines/math/tools/sincosf.sollya b/contrib/arm-optimized-routines/math/tools/sincosf.sollya
new file mode 100644
index 000000000000..add874e87a9a
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/tools/sincosf.sollya
@@ -0,0 +1,33 @@
+// polynomial for approximating cos(x)
+//
+// Copyright (c) 2023-2024, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+// This script only finds the coeffs for cos - see math/tools/sin.sollya for sin coeffs.
+
+deg = 8; // polynomial degree
+a = -pi/4; // interval
+b = pi/4;
+
+// find even polynomial with minimal abs error compared to cos(x)
+
+f = cos(x);
+
+// return p that minimizes |f(x) - poly(x) - x^d*p(x)|
+approx = proc(poly,d) {
+ return remez(f(x)-poly(x), deg-d, [a;b], x^d, 1e-10);
+};
+
+// first coeff is fixed, iteratively find optimal double prec coeffs
+poly = 1;
+for i from 1 to deg/2 do {
+ p = roundcoefficients(approx(poly,2*i), [|single ...|]);
+ poly = poly + x^(2*i)*coeff(p,0);
+};
+
+display = hexadecimal;
+//print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+//print("abs error:", accurateinfnorm(f(x)-poly(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do coeff(poly,i);
diff --git a/contrib/arm-optimized-routines/math/tools/sinpi.sollya b/contrib/arm-optimized-routines/math/tools/sinpi.sollya
new file mode 100644
index 000000000000..9bc5b1c7fc2a
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/tools/sinpi.sollya
@@ -0,0 +1,33 @@
+// polynomial for approximating sinpi(x)
+//
+// Copyright (c) 2023-2024, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 19; // polynomial degree
+a = -1/2; // interval
+b = 1/2;
+
+// find even polynomial with minimal abs error compared to sinpi(x)
+
+// f = sin(pi* x);
+f = pi*x;
+c = 1;
+for i from 1 to 80 do { c = 2*i*(2*i + 1)*c; f = f + (-1)^i*(pi*x)^(2*i+1)/c; };
+
+// return p that minimizes |f(x) - poly(x) - x^d*p(x)|
+approx = proc(poly,d) {
+ return remez(f(x)-poly(x), deg-d, [a;b], x^d, 1e-10);
+};
+
+// first coeff is predefine, iteratively find optimal double prec coeffs
+poly = pi*x;
+for i from 0 to (deg-1)/2 do {
+ p = roundcoefficients(approx(poly,2*i+1), [|D ...|]);
+ poly = poly + x^(2*i+1)*coeff(p,0);
+};
+
+display = hexadecimal;
+print("abs error:", accurateinfnorm(sin(pi*x)-poly(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do coeff(poly,i);
diff --git a/contrib/arm-optimized-routines/math/tools/tan.sollya b/contrib/arm-optimized-routines/math/tools/tan.sollya
new file mode 100644
index 000000000000..ca8a170bedaa
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/tools/tan.sollya
@@ -0,0 +1,20 @@
+// polynomial for approximating double precision tan(x)
+//
+// Copyright (c) 2023-2024, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 8;
+
+// interval bounds
+a = 0x1.0p-126;
+b = pi / 8;
+
+display = hexadecimal;
+
+f = (tan(sqrt(x))-sqrt(x))/x^(3/2);
+poly = fpminimax(f, deg, [|double ...|], [a*a;b*b]);
+
+//print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do coeff(poly,i);
diff --git a/contrib/arm-optimized-routines/math/tools/tanf.sollya b/contrib/arm-optimized-routines/math/tools/tanf.sollya
new file mode 100644
index 000000000000..054d3db44046
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/tools/tanf.sollya
@@ -0,0 +1,78 @@
+// polynomial for approximating single precision tan(x)
+//
+// Copyright (c) 2022-2024, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+dtype = single;
+
+mthd = 0; // approximate tan
+deg = 5; // poly degree
+
+// // Uncomment for cotan
+// mthd = 1; // approximate cotan
+// deg = 3; // poly degree
+
+// interval bounds
+a = 0x1.0p-126;
+b = pi / 4;
+
+print("Print some useful constants");
+display = hexadecimal!;
+if (dtype==double) then { prec = 53!; }
+else if (dtype==single) then { prec = 23!; };
+
+print("pi/4");
+pi/4;
+
+// Setup precisions (display and computation)
+display = decimal!;
+prec=128!;
+save_prec=prec;
+
+//
+// Select function to approximate with Sollya
+//
+if(mthd==0) then {
+ s = "x + x^3 * P(x^2)";
+ g = tan(x);
+ F = proc(P) { return x + x^3 * P(x^2); };
+ f = (g(sqrt(x))-sqrt(x))/(x*sqrt(x));
+ init_poly = 0;
+ // Display info
+ print("Approximate g(x) =", g, "as F(x)=", s, ".");
+ poly = fpminimax(f, deg, [|dtype ...|], [a*a;b*b]);
+}
+else if (mthd==1) then {
+ s = "1/x + x * P(x^2)";
+ g = 1 / tan(x);
+ F = proc(P) { return 1/x + x * P(x^2); };
+ f = (g(sqrt(x))-1/sqrt(x))/(sqrt(x));
+ init_poly = 0;
+ deg_init_poly = -1; // a value such that we actually start by building constant coefficient
+ // Display info
+ print("Approximate g(x) =", g, "as F(x)=", s, ".");
+ // Fpminimax used to minimise absolute error
+ approx_fpminimax = proc(func, poly, d) {
+ return fpminimax(func - poly / x^-(deg-d), 0, [|dtype|], [a;b], absolute, floating);
+ };
+ // Optimise all coefficients at once
+ poly = fpminimax(f, [|0,...,deg|], [|dtype ...|], [a;b], absolute, floating);
+};
+
+
+//
+// Display coefficients in Sollya
+//
+display = hexadecimal!;
+if (dtype==double) then { prec = 53!; }
+else if (dtype==single) then { prec = 23!; };
+print("_coeffs :_ hex");
+for i from 0 to deg do coeff(poly, i);
+
+// Compute errors
+display = hexadecimal!;
+d_rel_err = dirtyinfnorm(1-F(poly)/g(x), [a;b]);
+d_abs_err = dirtyinfnorm(g(x)-F(poly), [a;b]);
+print("dirty rel error:", d_rel_err);
+print("dirty abs error:", d_abs_err);
+print("in [",a,b,"]");
diff --git a/contrib/arm-optimized-routines/math/tools/tanpi.sollya b/contrib/arm-optimized-routines/math/tools/tanpi.sollya
new file mode 100644
index 000000000000..8edbc359ab8e
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/tools/tanpi.sollya
@@ -0,0 +1,48 @@
+// polynomial for approximating tanpi/f(x)
+//
+// Copyright (c) 2024, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+// 0 for tanpi/f [0,0.25], 1 for tanpi/f [0.25,1]
+method = 0;
+dtype = double;
+
+if (dtype == single) then {
+ if (method == 0) then { deg = 5; }
+ else if (method == 1) then { deg = 3; };
+} else if (dtype == double) then {
+ if (method == 0) then { deg = 13; }
+ else if (method == 1) then { deg = 8; };
+};
+
+a = 0x1.0p-126;
+b = 1/4;
+
+if (method == 0) then {
+ g = tan(pi * x);
+ F = proc(P) { return pi * x + x^3 * P(x^2); };
+ f = (g(sqrt(x)) - pi * sqrt(x))/(x^(3/2));
+} else if (method == 1) then {
+ g = 1/tan(pi * x);
+ F = proc(P) { return 1/(pi * x) + x * P(x^2); };
+ f = (g(sqrt(x)) / sqrt(x)) - 1/(pi * x);
+};
+
+poly = fpminimax(f, deg, [|dtype ...|], [a*a;b*b]);
+
+//
+// Display coefficients in Sollya
+//
+display = hexadecimal!;
+if (dtype==double) then { prec = 53!; }
+else if (dtype==single) then { prec = 23!; };
+print("_coeffs :_ hex");
+for i from 0 to deg do coeff(poly, i);
+
+// Compute errors
+//display = hexadecimal!;
+d_rel_err = dirtyinfnorm(1-F(poly)/g(x), [a;b]);
+d_abs_err = dirtyinfnorm(g(x)-F(poly), [a;b]);
+print("dirty rel error:", d_rel_err);
+print("dirty abs error:", d_abs_err);
+print("in [",a,b,"]");
diff --git a/contrib/arm-optimized-routines/math/tools/tgamma128_gen.jl b/contrib/arm-optimized-routines/math/tools/tgamma128_gen.jl
new file mode 100644
index 000000000000..ecec174110ea
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/tools/tgamma128_gen.jl
@@ -0,0 +1,212 @@
+# -*- julia -*-
+#
+# Generate tgamma128.h, containing polynomials and constants used by
+# tgamma128.c.
+#
+# Copyright (c) 2006-2023, Arm Limited.
+# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+# This Julia program depends on the 'Remez' and 'SpecialFunctions'
+# library packages. To install them, run this at the interactive Julia
+# prompt:
+#
+# import Pkg; Pkg.add(["Remez", "SpecialFunctions"])
+#
+# Tested on Julia 1.4.1 (Ubuntu 20.04) and 1.9.0 (22.04).
+
+import Printf
+import Remez
+import SpecialFunctions
+
+# Round a BigFloat to 128-bit long double and format it as a C99 hex
+# float literal.
+function quadhex(x)
+ sign = " "
+ if x < 0
+ sign = "-"
+ x = -x
+ end
+
+ exponent = BigInt(floor(log2(x)))
+ exponent = max(exponent, -16382)
+ @assert(exponent <= 16383) # else overflow
+
+ x /= BigFloat(2)^exponent
+ @assert(1 <= x < 2)
+ x *= BigFloat(2)^112
+ mantissa = BigInt(round(x))
+
+ mantstr = string(mantissa, base=16, pad=29)
+ return Printf.@sprintf("%s0x%s.%sp%+dL", sign, mantstr[1], mantstr[2:end],
+ exponent)
+end
+
+# Round a BigFloat to 128-bit long double and return it still as a
+# BigFloat.
+function quadval(x, round=0)
+ sign = +1
+ if x.sign < 0
+ sign = -1
+ x = -x
+ end
+
+ exponent = BigInt(floor(log2(x)))
+ exponent = max(exponent, -16382)
+ @assert(exponent <= 16383) # else overflow
+
+ x /= BigFloat(2)^exponent
+ @assert(1 <= x < 2)
+ x *= BigFloat(2)^112
+ if round < 0
+ mantissa = floor(x)
+ elseif round > 0
+ mantissa = ceil(x)
+ else
+ mantissa = round(x)
+ end
+
+ return sign * mantissa * BigFloat(2)^(exponent - 112)
+end
+
+# Output an array of BigFloats as a C array declaration.
+function dumparray(a, name)
+ println("static const long double ", name, "[] = {")
+ for x in N
+ println(" ", quadhex(x), ",")
+ end
+ println("};")
+end
+
+print("/*
+ * Polynomial coefficients and other constants for tgamma128.c.
+ *
+ * Copyright (c) 2006,2009,2023 Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+")
+
+Base.MPFR.setprecision(512)
+
+e = exp(BigFloat(1))
+
+print("
+/* The largest positive value for which 128-bit tgamma does not overflow. */
+")
+lo = BigFloat("1000")
+hi = BigFloat("2000")
+while true
+ global lo
+ global hi
+ global max_x
+
+ mid = (lo + hi) / 2
+ if mid == lo || mid == hi
+ max_x = mid
+ break
+ end
+ if SpecialFunctions.logabsgamma(mid)[1] < 16384 * log(BigFloat(2))
+ lo = mid
+ else
+ hi = mid
+ end
+end
+max_x = quadval(max_x, -1)
+println("static const long double max_x = ", quadhex(max_x), ";")
+
+print("
+/* Coefficients of the polynomial used in the tgamma_large() subroutine */
+")
+N, D, E, X = Remez.ratfn_minimax(
+ x -> x==0 ? sqrt(BigFloat(2)*pi/e) :
+ exp(SpecialFunctions.logabsgamma(1/x)[1] +
+ (1/x-0.5)*(1+log(x))),
+ (0, 1/BigFloat(8)),
+ 24, 0,
+ (x, y) -> 1/y
+)
+dumparray(N, "coeffs_large")
+
+print("
+/* Coefficients of the polynomial used in the tgamma_tiny() subroutine */
+")
+N, D, E, X = Remez.ratfn_minimax(
+ x -> x==0 ? 1 : 1/(x*SpecialFunctions.gamma(x)),
+ (0, 1/BigFloat(32)),
+ 13, 0,
+)
+dumparray(N, "coeffs_tiny")
+
+print("
+/* The location within the interval [1,2] where gamma has a minimum.
+ * Specified as the sum of two 128-bit values, for extra precision. */
+")
+lo = BigFloat("1.4")
+hi = BigFloat("1.5")
+while true
+ global lo
+ global hi
+ global min_x
+
+ mid = (lo + hi) / 2
+ if mid == lo || mid == hi
+ min_x = mid
+ break
+ end
+ if SpecialFunctions.digamma(mid) < 0
+ lo = mid
+ else
+ hi = mid
+ end
+end
+min_x_hi = quadval(min_x, -1)
+println("static const long double min_x_hi = ", quadhex(min_x_hi), ";")
+println("static const long double min_x_lo = ", quadhex(min_x - min_x_hi), ";")
+
+print("
+/* The actual minimum value that gamma takes at that location.
+ * Again specified as the sum of two 128-bit values. */
+")
+min_y = SpecialFunctions.gamma(min_x)
+min_y_hi = quadval(min_y, -1)
+println("static const long double min_y_hi = ", quadhex(min_y_hi), ";")
+println("static const long double min_y_lo = ", quadhex(min_y - min_y_hi), ";")
+
+function taylor_bodge(x)
+ # Taylor series generated by Wolfram Alpha for (gamma(min_x+x)-min_y)/x^2.
+ # Used in the Remez calls below for x values very near the origin, to avoid
+ # significance loss problems when trying to compute it directly via that
+ # formula (even in MPFR's extra precision).
+ return BigFloat("0.428486815855585429730209907810650582960483696962660010556335457558784421896667728014324097132413696263704801646004585959298743677879606168187061990204432200")+x*(-BigFloat("0.130704158939785761928008749242671025181542078105370084716141350308119418619652583986015464395882363802104154017741656168641240436089858504560718773026275797")+x*(BigFloat("0.160890753325112844190519489594363387594505844658437718135952967735294789599989664428071656484587979507034160383271974554122934842441540146372016567834062876")+x*(-BigFloat("0.092277030213334350126864106458600575084335085690780082222880945224248438672595248111704471182201673989215223667543694847795410779036800385804729955729659506"))))
+end
+
+print("
+/* Coefficients of the polynomial used in the tgamma_central() subroutine
+ * for computing gamma on the interval [1,min_x] */
+")
+N, D, E, X = Remez.ratfn_minimax(
+ x -> x < BigFloat(0x1p-64) ? taylor_bodge(-x) :
+ (SpecialFunctions.gamma(min_x - x) - min_y) / (x*x),
+ (0, min_x - 1),
+ 31, 0,
+ (x, y) -> x^2,
+)
+dumparray(N, "coeffs_central_neg")
+
+print("
+/* Coefficients of the polynomial used in the tgamma_central() subroutine
+ * for computing gamma on the interval [min_x,2] */
+")
+N, D, E, X = Remez.ratfn_minimax(
+ x -> x < BigFloat(0x1p-64) ? taylor_bodge(x) :
+ (SpecialFunctions.gamma(min_x + x) - min_y) / (x*x),
+ (0, 2 - min_x),
+ 28, 0,
+ (x, y) -> x^2,
+)
+dumparray(N, "coeffs_central_pos")
+
+print("
+/* 128-bit float value of pi, used by the sin_pi_x_over_pi subroutine
+ */
+")
+println("static const long double pi = ", quadhex(BigFloat(pi)), ";")
diff --git a/contrib/arm-optimized-routines/math/tools/v_erf.sollya b/contrib/arm-optimized-routines/math/tools/v_erf.sollya
new file mode 100644
index 000000000000..5d7795842bcd
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/tools/v_erf.sollya
@@ -0,0 +1,20 @@
+// polynomial for approximating erf(x).
+// To generate coefficients for interval i (0 to 47) do:
+// $ sollya v_erf.sollya $i
+//
+// Copyright (c) 2022-2024, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+scale = 1/8;
+deg = 9;
+
+itv = parse(__argv[0]);
+if (itv == 0) then { a = 0x1p-1022; }
+else { a = itv * scale; };
+
+prec=256;
+
+poly = fpminimax(erf(scale*x+a), deg, [|D ...|], [0; 1]);
+
+display = hexadecimal;
+for i from 0 to deg do coeff(poly, i); \ No newline at end of file
diff --git a/contrib/arm-optimized-routines/math/tools/v_erfc.sollya b/contrib/arm-optimized-routines/math/tools/v_erfc.sollya
new file mode 100644
index 000000000000..764b333d6d25
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/tools/v_erfc.sollya
@@ -0,0 +1,46 @@
+// polynomial for approximating erfc(x)*exp(x*x)
+//
+// Copyright (c) 2022-2024, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 12; // poly degree
+
+itv = parse(__argv[0]);
+
+bounds = [|3.725290298461914e-9,
+ 0.18920711500272103,
+ 0.41421356237309515,
+ 0.681792830507429,
+ 1,
+ 1.378414230005442,
+ 1.8284271247461903,
+ 2.363585661014858,
+ 3,
+ 3.756828460010884,
+ 4.656854249492381,
+ 5.727171322029716,
+ 7,
+ 8.513656920021768,
+ 10.313708498984761,
+ 12.454342644059432,
+ 15,
+ 18.027313840043536,
+ 21.627416997969522,
+ 25.908685288118864,
+ 31|];
+
+a = bounds[itv];
+b = bounds[itv + 1];
+
+f = proc(y) {
+ t = y + a;
+ return erfc(t) * exp(t*t);
+};
+
+poly = fpminimax(f(x), deg, [|double ...|], [0;b-a]);
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do coeff(poly, i);
diff --git a/contrib/arm-optimized-routines/math/tools/v_exp.sollya b/contrib/arm-optimized-routines/math/tools/v_exp.sollya
index c0abb63fb642..5fa7de7435a9 100644
--- a/contrib/arm-optimized-routines/math/tools/v_exp.sollya
+++ b/contrib/arm-optimized-routines/math/tools/v_exp.sollya
@@ -1,7 +1,7 @@
// polynomial for approximating e^x
//
// Copyright (c) 2019, Arm Limited.
-// SPDX-License-Identifier: MIT
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
deg = 4; // poly degree
N = 128; // table entries
diff --git a/contrib/arm-optimized-routines/math/tools/v_log.sollya b/contrib/arm-optimized-routines/math/tools/v_log.sollya
index cc3d2c4ae72a..d982524eb920 100644
--- a/contrib/arm-optimized-routines/math/tools/v_log.sollya
+++ b/contrib/arm-optimized-routines/math/tools/v_log.sollya
@@ -1,7 +1,7 @@
// polynomial used for __v_log(x)
//
// Copyright (c) 2019, Arm Limited.
-// SPDX-License-Identifier: MIT
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
deg = 6; // poly degree
a = -0x1.fc1p-9;
diff --git a/contrib/arm-optimized-routines/math/tools/v_log10.sollya b/contrib/arm-optimized-routines/math/tools/v_log10.sollya
new file mode 100644
index 000000000000..5181074f6762
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/tools/v_log10.sollya
@@ -0,0 +1,38 @@
+// polynomial used for __v_log10(x)
+//
+// Copyright (c) 2019-2024, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 6; // poly degree
+a = -0x1.fc1p-9;
+b = 0x1.009p-8;
+
+// find log(1+x)/x polynomial with minimal relative error
+// (minimal relative error polynomial for log(1+x) is the same * x)
+deg = deg-1; // because of /x
+
+// f = log(1+x)/x; using taylor series
+f = 0;
+for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
+
+// return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
+approx = proc(poly,d) {
+ return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
+};
+
+// first coeff is fixed, iteratively find optimal double prec coeffs
+poly = 1;
+for i from 1 to deg do {
+ p = roundcoefficients(approx(poly,i), [|D ...|]);
+ poly = poly + x^i*coeff(p,0);
+};
+
+// scale coefficients by 1/ln(10)
+ln10 = evaluate(log(10),0);
+poly = poly/ln10;
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do double(coeff(poly,i));
diff --git a/contrib/arm-optimized-routines/math/tools/v_log10f.sollya b/contrib/arm-optimized-routines/math/tools/v_log10f.sollya
new file mode 100644
index 000000000000..4906cb1d2137
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/tools/v_log10f.sollya
@@ -0,0 +1,45 @@
+// polynomial for approximating v_log10f(1+x)
+//
+// Copyright (c) 2019-2024, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 9; // poly degree
+// |log10(1+x)| > 0x1p-4 outside the interval
+a = -1/3;
+b = 1/3;
+
+display = hexadecimal;
+print("log10(2) = ", single(log10(2)));
+
+ln10 = evaluate(log(10),0);
+invln10 = single(1/ln10);
+
+// find log10(1+x)/x polynomial with minimal relative error
+// (minimal relative error polynomial for log10(1+x) is the same * x)
+deg = deg-1; // because of /x
+
+// f = log(1+x)/x; using taylor series
+f = 0;
+for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
+f = f/ln10;
+
+// return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
+approx = proc(poly,d) {
+ return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
+};
+
+// first coeff is fixed, iteratively find optimal double prec coeffs
+poly = invln10;
+for i from 1 to deg do {
+ p = roundcoefficients(approx(poly,i), [|SG ...|]);
+ poly = poly + x^i*coeff(p,0);
+};
+display = hexadecimal;
+print("invln10:", invln10);
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do single(coeff(poly,i));
+
+display = decimal;
+print("in [",a,b,"]");
diff --git a/contrib/arm-optimized-routines/math/tools/v_log2f.sollya b/contrib/arm-optimized-routines/math/tools/v_log2f.sollya
new file mode 100644
index 000000000000..337d4830a2ae
--- /dev/null
+++ b/contrib/arm-optimized-routines/math/tools/v_log2f.sollya
@@ -0,0 +1,38 @@
+// polynomial used for __v_log2f(x)
+//
+// Copyright (c) 2022-2024, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 9; // poly degree
+a = -1/3;
+b = 1/3;
+
+ln2 = evaluate(log(2),0);
+invln2 = single(1/ln2);
+
+// find log2(1+x)/x polynomial with minimal relative error
+// (minimal relative error polynomial for log2(1+x) is the same * x)
+deg = deg-1; // because of /x
+
+// f = log2(1+x)/x; using taylor series
+f = 0;
+for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
+f = f * invln2;
+
+// return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
+approx = proc(poly,d) {
+ return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
+};
+
+// first coeff is fixed, iteratively find optimal double prec coeffs
+poly = invln2;
+for i from 1 to deg do {
+ p = roundcoefficients(approx(poly,i), [|SG ...|]);
+ poly = poly + x^i*coeff(p,0);
+};
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do coeff(poly,i);
diff --git a/contrib/arm-optimized-routines/math/tools/v_sin.sollya b/contrib/arm-optimized-routines/math/tools/v_sin.sollya
index 65cc9957c624..63b9d65a1ac3 100644
--- a/contrib/arm-optimized-routines/math/tools/v_sin.sollya
+++ b/contrib/arm-optimized-routines/math/tools/v_sin.sollya
@@ -1,7 +1,7 @@
// polynomial for approximating sin(x)
//
// Copyright (c) 2019, Arm Limited.
-// SPDX-License-Identifier: MIT
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
deg = 15; // polynomial degree
a = -pi/2; // interval