diff options
Diffstat (limited to 'contrib/arm-optimized-routines/math/tools')
44 files changed, 1209 insertions, 13 deletions
diff --git a/contrib/arm-optimized-routines/math/tools/asin.sollya b/contrib/arm-optimized-routines/math/tools/asin.sollya new file mode 100644 index 000000000000..02c4a93356c3 --- /dev/null +++ b/contrib/arm-optimized-routines/math/tools/asin.sollya @@ -0,0 +1,29 @@ +// polynomial for approximating asin(x) +// +// Copyright (c) 2023-2024, Arm Limited. +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +f = asin(x); +dtype = double; + +prec=256; + +a = 0x1p-106; +b = 0.25; + +deg = 11; + +backward = proc(poly, d) { + return d + d ^ 3 * poly(d * d); +}; + +forward = proc(f, d) { + return (f(sqrt(d))-sqrt(d))/(d*sqrt(d)); +}; + +poly = fpminimax(forward(f, x), [|0,...,deg|], [|dtype ...|], [a;b], relative, floating); + +display = hexadecimal!; +print("rel error:", dirtyinfnorm(1-backward(poly, x)/f(x), [a;b])); +print("in [", a, b, "]"); +for i from 0 to deg do print(coeff(poly, i)); diff --git a/contrib/arm-optimized-routines/math/tools/asinf.sollya b/contrib/arm-optimized-routines/math/tools/asinf.sollya new file mode 100644 index 000000000000..69d1803875d1 --- /dev/null +++ b/contrib/arm-optimized-routines/math/tools/asinf.sollya @@ -0,0 +1,36 @@ +// polynomial for approximating asinf(x) +// +// Copyright (c) 2023-2024, Arm Limited. +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +f = asin(x); +dtype = single; + +a = 0x1p-24; +b = 0.25; + +deg = 4; + +backward = proc(poly, d) { + return d + d ^ 3 * poly(d * d); +}; + +forward = proc(f, d) { + return (f(sqrt(d))-sqrt(d))/(d*sqrt(d)); +}; + +approx = proc(poly, d) { + return remez(1 - poly(x) / forward(f, x), deg - d, [a;b], x^d/forward(f, x), 1e-16); +}; + +poly = 0; +for i from 0 to deg do { + i; + p = roundcoefficients(approx(poly,i), [|dtype ...|]); + poly = poly + x^i*coeff(p,0); +}; + +display = hexadecimal!; +print("rel error:", accurateinfnorm(1-backward(poly, x)/f(x), [a;b], 30)); +print("in [", a, b, "]"); +for i from 0 to deg do print(coeff(poly, i)); diff --git a/contrib/arm-optimized-routines/math/tools/asinh.sollya b/contrib/arm-optimized-routines/math/tools/asinh.sollya new file mode 100644 index 000000000000..eea9b8081168 --- /dev/null +++ b/contrib/arm-optimized-routines/math/tools/asinh.sollya @@ -0,0 +1,28 @@ +// polynomial for approximating asinh(x) +// +// Copyright (c) 2022-2024, Arm Limited. +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +// Polynomial is used in [2^-26, 1]. However it is least accurate close to 1, so +// we use 2^-6 as the lower bound for coeff generation, which yields sufficiently +// accurate results in [2^-26, 2^-6]. +a = 0x1p-6; +b = 1.0; + +f = (asinh(sqrt(x)) - sqrt(x))/x^(3/2); + +approx = proc(poly, d) { + return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10); +}; + +poly = 0; +for i from 0 to deg do { + i; + p = roundcoefficients(approx(poly,i), [|D ...|]); + poly = poly + x^i*coeff(p,0); +}; + + +display = hexadecimal; +print("coeffs:"); +for i from 0 to deg do coeff(poly,i); diff --git a/contrib/arm-optimized-routines/math/tools/asinhf.sollya b/contrib/arm-optimized-routines/math/tools/asinhf.sollya new file mode 100644 index 000000000000..5f1580fce883 --- /dev/null +++ b/contrib/arm-optimized-routines/math/tools/asinhf.sollya @@ -0,0 +1,29 @@ +// polynomial for approximating asinh(x) +// +// Copyright (c) 2022-2024, Arm Limited. +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +deg = 9; + +a = 0x1.0p-12; +b = 1.0; + +f = proc(y) { + return asinh(x); +}; + +approx = proc(poly, d) { + return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10); +}; + +poly = x; +for i from 2 to deg do { + p = roundcoefficients(approx(poly,i), [|SG ...|]); + poly = poly + x^i*coeff(p,0); +}; + +display = hexadecimal; +print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); +print("in [",a,b,"]"); +print("coeffs:"); +for i from 2 to deg do coeff(poly,i); diff --git a/contrib/arm-optimized-routines/math/tools/atan.sollya b/contrib/arm-optimized-routines/math/tools/atan.sollya new file mode 100644 index 000000000000..048017d8d269 --- /dev/null +++ b/contrib/arm-optimized-routines/math/tools/atan.sollya @@ -0,0 +1,23 @@ +// polynomial for approximating atan(x) and atan2(y, x) +// +// Copyright (c) 2022-2024, Arm Limited. +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +// atan is odd, so approximate with an odd polynomial: +// x + ax^3 + bx^5 + cx^7 + ... +// We generate a, b, c, ... such that we can approximate atan(x) by: +// x + x^3 * (a + bx^2 + cx^4 + ...) + +// Assemble monomials +deg = 20; +mons = [|1,...,deg|]; +for i from 0 to deg-1 do mons[i] = mons[i] * 2 + 1; + +a = 0x1.0p-1022; +b = 1; + +poly = fpminimax(atan(x)-x, mons, [|double ...|], [a;b]); + +display = hexadecimal; +print("coeffs:"); +for i from 0 to deg-1 do coeff(poly,mons[i]); diff --git a/contrib/arm-optimized-routines/math/tools/atanf.sollya b/contrib/arm-optimized-routines/math/tools/atanf.sollya new file mode 100644 index 000000000000..21c3ba2bfa1d --- /dev/null +++ b/contrib/arm-optimized-routines/math/tools/atanf.sollya @@ -0,0 +1,20 @@ +// polynomial for approximating atanf(x) +// +// Copyright (c) 2022-2024, Arm Limited. +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +// Generate list of monomials: +// Taylor series of atan is of the form x + ax^3 + bx^5 + cx^7 + ... +// So generate a, b, c, ... such that we can approximate atan(x) by: +// x + x^3 * (a + bx^2 + cx^4 + ...) + +deg = 7; + +a = 1.1754943508222875e-38; +b = 1; + +poly = fpminimax((atan(sqrt(x))-sqrt(x))/x^(3/2), deg, [|single ...|], [a;b]); + +display = hexadecimal; +print("coeffs:"); +for i from 0 to deg do coeff(poly,i); diff --git a/contrib/arm-optimized-routines/math/tools/cbrt.sollya b/contrib/arm-optimized-routines/math/tools/cbrt.sollya new file mode 100644 index 000000000000..2490a69ac029 --- /dev/null +++ b/contrib/arm-optimized-routines/math/tools/cbrt.sollya @@ -0,0 +1,20 @@ +// polynomial for approximating cbrt(x) in double precision +// +// Copyright (c) 2022-2024, Arm Limited. +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +deg = 3; + +a = 0.5; +b = 1; + + +f = x^(1/3); + +poly = fpminimax(f, deg, [|double ...|], [a;b]); + +display = hexadecimal; +print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); +print("in [",a,b,"]"); +print("coeffs:"); +for i from 0 to deg do round(coeff(poly,i), D, RN); diff --git a/contrib/arm-optimized-routines/math/tools/cbrtf.sollya b/contrib/arm-optimized-routines/math/tools/cbrtf.sollya new file mode 100644 index 000000000000..1debf930e722 --- /dev/null +++ b/contrib/arm-optimized-routines/math/tools/cbrtf.sollya @@ -0,0 +1,20 @@ +// polynomial for approximating cbrt(x) in single precision +// +// Copyright (c) 2022-2024, Arm Limited. +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +deg = 3; + +a = 0.5; +b = 1; + + +f = x^(1/3); + +poly = fpminimax(f, deg, [|single ...|], [a;b]); + +display = hexadecimal; +print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); +print("in [",a,b,"]"); +print("coeffs:"); +for i from 0 to deg do round(coeff(poly,i), SG, RN); diff --git a/contrib/arm-optimized-routines/math/tools/cos.sollya b/contrib/arm-optimized-routines/math/tools/cos.sollya index bd72d6b74820..6690adfcbb9b 100644 --- a/contrib/arm-optimized-routines/math/tools/cos.sollya +++ b/contrib/arm-optimized-routines/math/tools/cos.sollya @@ -1,7 +1,7 @@ // polynomial for approximating cos(x) // // Copyright (c) 2019, Arm Limited. -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception deg = 8; // polynomial degree a = -pi/4; // interval diff --git a/contrib/arm-optimized-routines/math/tools/erf.sollya b/contrib/arm-optimized-routines/math/tools/erf.sollya new file mode 100644 index 000000000000..060e1686c835 --- /dev/null +++ b/contrib/arm-optimized-routines/math/tools/erf.sollya @@ -0,0 +1,25 @@ +// tables and constants for approximating erf(x). +// +// Copyright (c) 2023-2024, Arm Limited. +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +display = hexadecimal; +prec=128; + +// Tables +print("{ i, r, erf(r), 2/sqrt(pi) * exp(-r^2)}"); +for i from 0 to 768 do { + r = i / 128; + t0 = double(erf(r)); + t1 = double(2/sqrt(pi) * exp(-r * r)); + print("{ " @ i @ ",\t" @ r @ ",\t" @ t0 @ ",\t" @ t1 @ " },"); +}; + +// Constants +double(1/3); +double(1/10); +double(2/15); +double(2/9); +double(2/45); +double(2/sqrt(pi)); + diff --git a/contrib/arm-optimized-routines/math/tools/erfc.sollya b/contrib/arm-optimized-routines/math/tools/erfc.sollya new file mode 100644 index 000000000000..1b4b00066093 --- /dev/null +++ b/contrib/arm-optimized-routines/math/tools/erfc.sollya @@ -0,0 +1,51 @@ +// tables and constants for approximating erfc(x). +// +// Copyright (c) 2023-2024, Arm Limited. +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +display = hexadecimal; +prec=128; + +// Tables +print("{ i, r, erfc(r), 2/sqrt(pi) * exp(-r^2) }"); +for i from 0 to 3787 do { + r = 0.0 + i / 128; + t0 = double(erfc(r) * 2^128); + t1 = double(2/sqrt(pi) * exp(-r * r) * 2^128); + print("{ " @ t0 @ ",\t" @ t1 @ " },"); +}; + +// Constants +print("> 2/sqrt(pi)"); +double(2/sqrt(pi)); + +print("> 1/3"); +double(1/3); + +print("> P5"); +double(2/15); +double(1/10); +double(2/9); +double(2/45); + +print("> P6"); +double(1/42); +double(1/7); +double(2/21); +double(4/315); + +print("> Q"); +double( 5.0 / 4.0); +double( 6.0 / 5.0); +double( 7.0 / 6.0); +double( 8.0 / 7.0); +double( 9.0 / 8.0); +double(10.0 / 9.0); + +print("> R"); +double(-2.0 * 4.0 / (5.0 * 6.0)); +double(-2.0 * 5.0 / (6.0 * 7.0)); +double(-2.0 * 6.0 / (7.0 * 8.0)); +double(-2.0 * 7.0 / (8.0 * 9.0)); +double(-2.0 * 8.0 / (9.0 * 10.0)); +double(-2.0 * 9.0 / (10.0 * 11.0)); diff --git a/contrib/arm-optimized-routines/math/tools/erfcf.sollya b/contrib/arm-optimized-routines/math/tools/erfcf.sollya new file mode 100644 index 000000000000..a8e0409f5db5 --- /dev/null +++ b/contrib/arm-optimized-routines/math/tools/erfcf.sollya @@ -0,0 +1,22 @@ +// tables and constants for approximating erfcf(x). +// +// Copyright (c) 2023-2024, Arm Limited. +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +display = hexadecimal; +prec=128; + +// Tables +print("{ i, r, erfc(r), 2/sqrt(pi) * exp(-r^2) }"); +for i from 0 to 644 do { + r = 0.0 + i / 64; + t0 = single(erfc(r) * 2^47); + t1 = single(2/sqrt(pi) * exp(-r * r) * 2^47); + print("{ " @ t0 @ ",\t" @ t1 @ " },"); +}; + +// Constants +single(1/3); +single(2/15); +single(1/10); +single(2/sqrt(pi)); diff --git a/contrib/arm-optimized-routines/math/tools/erff.sollya b/contrib/arm-optimized-routines/math/tools/erff.sollya new file mode 100644 index 000000000000..c0178a2b24ad --- /dev/null +++ b/contrib/arm-optimized-routines/math/tools/erff.sollya @@ -0,0 +1,20 @@ +// tables and constants for approximating erff(x). +// +// Copyright (c) 2023-2024, Arm Limited. +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +display = hexadecimal; +prec=128; + +// Tables +print("{ i, r, erf(r), 2/sqrt(pi) * exp(-r^2)}"); +for i from 0 to 512 do { + r = i / 128; + t0 = single(erf(r)); + t1 = single(2/sqrt(pi) * exp(-r * r)); + print("{ " @ i @ ",\t" @ r @ ",\t" @ t0 @ ",\t" @ t1 @ " },"); +}; + +// Constants +single(1/3); +single(2/sqrt(pi)); diff --git a/contrib/arm-optimized-routines/math/tools/exp.sollya b/contrib/arm-optimized-routines/math/tools/exp.sollya index b7a462cda5a4..0668bdb5b3d3 100644 --- a/contrib/arm-optimized-routines/math/tools/exp.sollya +++ b/contrib/arm-optimized-routines/math/tools/exp.sollya @@ -1,7 +1,7 @@ // polynomial for approximating e^x // // Copyright (c) 2019, Arm Limited. -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception deg = 5; // poly degree N = 128; // table entries diff --git a/contrib/arm-optimized-routines/math/tools/exp10.sollya b/contrib/arm-optimized-routines/math/tools/exp10.sollya new file mode 100644 index 000000000000..91f92595b96d --- /dev/null +++ b/contrib/arm-optimized-routines/math/tools/exp10.sollya @@ -0,0 +1,55 @@ +// polynomial for approximating 10^x +// +// Copyright (c) 2023-2024, Arm Limited. +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +// exp10f parameters +deg = 5; // poly degree +N = 1; // Neon 1, SVE 64 +b = log(2)/(2 * N * log(10)); // interval +a = -b; +wp = single; + +// exp10 parameters +//deg = 4; // poly degree - bump to 5 for ~1 ULP +//N = 128; // table size +//b = log(2)/(2 * N * log(10)); // interval +//a = -b; +//wp = D; + + +// find polynomial with minimal relative error + +f = 10^x; + +// return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)| +approx = proc(poly,d) { + return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10); +}; +// return p that minimizes |f(x) - poly(x) - x^d*p(x)| +approx_abs = proc(poly,d) { + return remez(f(x) - poly(x), deg-d, [a;b], x^d, 1e-10); +}; + +// first coeff is fixed, iteratively find optimal double prec coeffs +poly = 1; +for i from 1 to deg do { + p = roundcoefficients(approx(poly,i), [|wp ...|]); +// p = roundcoefficients(approx_abs(poly,i), [|wp ...|]); + poly = poly + x^i*coeff(p,0); +}; + +display = hexadecimal; +print("rel error:", accurateinfnorm(1-poly(x)/10^x, [a;b], 30)); +print("abs error:", accurateinfnorm(10^x-poly(x), [a;b], 30)); +print("in [",a,b,"]"); +print("coeffs:"); +for i from 0 to deg do coeff(poly,i); + +log10_2 = round(N * log(10) / log(2), wp, RN); +log2_10 = log(2) / (N * log(10)); +log2_10_hi = round(log2_10, wp, RN); +log2_10_lo = round(log2_10 - log2_10_hi, wp, RN); +print(log10_2); +print(log2_10_hi); +print(log2_10_lo); diff --git a/contrib/arm-optimized-routines/math/tools/exp2.sollya b/contrib/arm-optimized-routines/math/tools/exp2.sollya index e760769601d4..bd0a42d6bbcb 100644 --- a/contrib/arm-optimized-routines/math/tools/exp2.sollya +++ b/contrib/arm-optimized-routines/math/tools/exp2.sollya @@ -1,7 +1,7 @@ // polynomial for approximating 2^x // // Copyright (c) 2019, Arm Limited. -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception // exp2f parameters deg = 3; // poly degree diff --git a/contrib/arm-optimized-routines/math/tools/expm1.sollya b/contrib/arm-optimized-routines/math/tools/expm1.sollya new file mode 100644 index 000000000000..d87466a066af --- /dev/null +++ b/contrib/arm-optimized-routines/math/tools/expm1.sollya @@ -0,0 +1,21 @@ +// polynomial for approximating exp(x)-1 in double precision +// +// Copyright (c) 2022-2024, Arm Limited. +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +deg = 12; + +a = -log(2)/2; +b = log(2)/2; + +f = proc(y) { + return exp(y)-1; +}; + +poly = fpminimax(f(x), deg, [|double ...|], [a;b]); + +display = hexadecimal; +print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); +print("in [",a,b,"]"); +print("coeffs:"); +for i from 2 to deg do round(coeff(poly,i), D, RN); diff --git a/contrib/arm-optimized-routines/math/tools/expm1f.sollya b/contrib/arm-optimized-routines/math/tools/expm1f.sollya new file mode 100644 index 000000000000..bb9496f3f2c4 --- /dev/null +++ b/contrib/arm-optimized-routines/math/tools/expm1f.sollya @@ -0,0 +1,21 @@ +// polynomial for approximating exp(x)-1 in single precision +// +// Copyright (c) 2022-2024, Arm Limited. +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +deg = 5; + +a = -log(2)/2; +b = log(2)/2; + +f = proc(y) { + return exp(y)-1; +}; + +poly = fpminimax(f(x), deg, [|single ...|], [a;b]); + +display = hexadecimal; +print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); +print("in [",a,b,"]"); +print("coeffs:"); +for i from 2 to deg do round(coeff(poly,i), SG, RN); diff --git a/contrib/arm-optimized-routines/math/tools/log.sollya b/contrib/arm-optimized-routines/math/tools/log.sollya index 6df4db44b6f3..5288f5572925 100644 --- a/contrib/arm-optimized-routines/math/tools/log.sollya +++ b/contrib/arm-optimized-routines/math/tools/log.sollya @@ -1,7 +1,7 @@ // polynomial for approximating log(1+x) // // Copyright (c) 2019, Arm Limited. -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception deg = 12; // poly degree // |log(1+x)| > 0x1p-4 outside the interval diff --git a/contrib/arm-optimized-routines/math/tools/log10.sollya b/contrib/arm-optimized-routines/math/tools/log10.sollya new file mode 100644 index 000000000000..78f956b14b95 --- /dev/null +++ b/contrib/arm-optimized-routines/math/tools/log10.sollya @@ -0,0 +1,44 @@ +// polynomial for approximating log10(1+x) +// +// Copyright (c) 2019-2024, Arm Limited. +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +deg = 6; // poly degree +// |log10(1+x)| > 0x1p-5 outside the interval +a = -0x1.p-5; +b = 0x1.p-5; + +ln10 = evaluate(log(10),0); +invln10hi = double(1/ln10 + 0x1p21) - 0x1p21; // round away last 21 bits +invln10lo = double(1/ln10 - invln10hi); + +// find log10(1+x)/x polynomial with minimal relative error +// (minimal relative error polynomial for log10(1+x) is the same * x) +deg = deg-1; // because of /x + +// f = log(1+x)/x; using taylor series +f = 0; +for i from 0 to 60 do { f = f + (-x)^i/(i+1); }; +f = f/ln10; + +// return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)| +approx = proc(poly,d) { + return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10); +}; + +// first coeff is fixed, iteratively find optimal double prec coeffs +poly = invln10hi + invln10lo; +for i from 1 to deg do { + p = roundcoefficients(approx(poly,i), [|D ...|]); + poly = poly + x^i*coeff(p,0); +}; +display = hexadecimal; +print("invln10hi:", invln10hi); +print("invln10lo:", invln10lo); +print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); +print("in [",a,b,"]"); +print("coeffs:"); +for i from 0 to deg do coeff(poly,i); + +display = decimal; +print("in [",a,b,"]"); diff --git a/contrib/arm-optimized-routines/math/tools/log10f.sollya b/contrib/arm-optimized-routines/math/tools/log10f.sollya new file mode 100644 index 000000000000..c64a30aa8e18 --- /dev/null +++ b/contrib/arm-optimized-routines/math/tools/log10f.sollya @@ -0,0 +1,37 @@ +// polynomial for approximating log10f(1+x) +// +// Copyright (c) 2019-2024, Arm Limited. +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +// Computation of log10f(1+x) will be carried out in double precision + +deg = 4; // poly degree +// [OFF; 2*OFF] is divided in 2^4 intervals with OFF~0.7 +a = -0.04375; +b = 0.04375; + +// find log(1+x)/x polynomial with minimal relative error +// (minimal relative error polynomial for log(1+x) is the same * x) +deg = deg-1; // because of /x + +// f = log(1+x)/x; using taylor series +f = 0; +for i from 0 to 60 do { f = f + (-x)^i/(i+1); }; + +// return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)| +approx = proc(poly,d) { + return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10); +}; + +// first coeff is fixed, iteratively find optimal double prec coeffs +poly = 1; +for i from 1 to deg do { + p = roundcoefficients(approx(poly,i), [|D ...|]); + poly = poly + x^i*coeff(p,0); +}; + +display = hexadecimal; +print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); +print("in [",a,b,"]"); +print("coeffs:"); +for i from 0 to deg do double(coeff(poly,i)); diff --git a/contrib/arm-optimized-routines/math/tools/log1p.sollya b/contrib/arm-optimized-routines/math/tools/log1p.sollya new file mode 100644 index 000000000000..0cf72081fabb --- /dev/null +++ b/contrib/arm-optimized-routines/math/tools/log1p.sollya @@ -0,0 +1,30 @@ +// polynomial for approximating log(1+x) in double precision +// +// Copyright (c) 2022-2024, Arm Limited. +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +deg = 20; + +a = sqrt(2)/2-1; +b = sqrt(2)-1; + +f = proc(y) { + return log(1+y); +}; + +approx = proc(poly, d) { + return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10); +}; + +poly = x; +for i from 2 to deg do { + p = roundcoefficients(approx(poly,i), [|D ...|]); + poly = poly + x^i*coeff(p,0); +}; + + +print("coeffs:"); +display = hexadecimal; +for i from 2 to deg do coeff(poly,i); +print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); +print("in [",a,b,"]"); diff --git a/contrib/arm-optimized-routines/math/tools/log1pf.sollya b/contrib/arm-optimized-routines/math/tools/log1pf.sollya new file mode 100644 index 000000000000..fc542c937111 --- /dev/null +++ b/contrib/arm-optimized-routines/math/tools/log1pf.sollya @@ -0,0 +1,21 @@ +// polynomial for approximating log(1+x) in single precision +// +// Copyright (c) 2022-2024, Arm Limited. +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +deg = 10; + +a = -0.25; +b = 0.5; + +f = proc(y) { + return log(1+y); +}; + +poly = fpminimax(f(x), deg, [|single ...|], [a;b]); + +display = hexadecimal; +print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); +print("in [",a,b,"]"); +print("coeffs:"); +for i from 2 to deg do round(coeff(poly,i), SG, RN); diff --git a/contrib/arm-optimized-routines/math/tools/log2.sollya b/contrib/arm-optimized-routines/math/tools/log2.sollya index 4a364c0f111f..85811be5d90c 100644 --- a/contrib/arm-optimized-routines/math/tools/log2.sollya +++ b/contrib/arm-optimized-routines/math/tools/log2.sollya @@ -1,7 +1,7 @@ // polynomial for approximating log2(1+x) // // Copyright (c) 2019, Arm Limited. -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception deg = 11; // poly degree // |log2(1+x)| > 0x1p-4 outside the interval diff --git a/contrib/arm-optimized-routines/math/tools/log2_abs.sollya b/contrib/arm-optimized-routines/math/tools/log2_abs.sollya index 82c4dac26fa1..d018ba0145d2 100644 --- a/contrib/arm-optimized-routines/math/tools/log2_abs.sollya +++ b/contrib/arm-optimized-routines/math/tools/log2_abs.sollya @@ -1,7 +1,7 @@ // polynomial for approximating log2(1+x) // // Copyright (c) 2019, Arm Limited. -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception deg = 7; // poly degree // interval ~= 1/(2*N), where N is the table entries diff --git a/contrib/arm-optimized-routines/math/tools/log_abs.sollya b/contrib/arm-optimized-routines/math/tools/log_abs.sollya index a2ac190fc497..5f9bfe41a683 100644 --- a/contrib/arm-optimized-routines/math/tools/log_abs.sollya +++ b/contrib/arm-optimized-routines/math/tools/log_abs.sollya @@ -1,7 +1,7 @@ // polynomial for approximating log(1+x) // // Copyright (c) 2019, Arm Limited. -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception deg = 6; // poly degree // interval ~= 1/(2*N), where N is the table entries diff --git a/contrib/arm-optimized-routines/math/tools/plot.py b/contrib/arm-optimized-routines/math/tools/plot.py index 6c8b89ff284b..a0fa02322560 100755 --- a/contrib/arm-optimized-routines/math/tools/plot.py +++ b/contrib/arm-optimized-routines/math/tools/plot.py @@ -3,7 +3,7 @@ # ULP error plot tool. # # Copyright (c) 2019, Arm Limited. -# SPDX-License-Identifier: MIT +# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception import numpy as np import matplotlib.pyplot as plt diff --git a/contrib/arm-optimized-routines/math/tools/remez.jl b/contrib/arm-optimized-routines/math/tools/remez.jl index 2ff436f5287f..1deab67d0660 100755 --- a/contrib/arm-optimized-routines/math/tools/remez.jl +++ b/contrib/arm-optimized-routines/math/tools/remez.jl @@ -4,7 +4,7 @@ # remez.jl - implementation of the Remez algorithm for polynomial approximation # # Copyright (c) 2015-2019, Arm Limited. -# SPDX-License-Identifier: MIT +# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception import Base.\ diff --git a/contrib/arm-optimized-routines/math/tools/sin.sollya b/contrib/arm-optimized-routines/math/tools/sin.sollya index a6e851145c11..a19300019867 100644 --- a/contrib/arm-optimized-routines/math/tools/sin.sollya +++ b/contrib/arm-optimized-routines/math/tools/sin.sollya @@ -1,7 +1,7 @@ // polynomial for approximating sin(x) // // Copyright (c) 2019, Arm Limited. -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception deg = 7; // polynomial degree a = -pi/4; // interval diff --git a/contrib/arm-optimized-routines/math/tools/sincos.sollya b/contrib/arm-optimized-routines/math/tools/sincos.sollya new file mode 100644 index 000000000000..600368507f4e --- /dev/null +++ b/contrib/arm-optimized-routines/math/tools/sincos.sollya @@ -0,0 +1,33 @@ +// polynomial for approximating cos(x) +// +// Copyright (c) 2023-2024, Arm Limited. +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +// This script only finds the coeffs for cos - see math/aarch64/advsimd/sin.c for sin coeffs + +deg = 14; // polynomial degree +a = -pi/4; // interval +b = pi/4; + +// find even polynomial with minimal abs error compared to cos(x) + +f = cos(x); + +// return p that minimizes |f(x) - poly(x) - x^d*p(x)| +approx = proc(poly,d) { + return remez(f(x)-poly(x), deg-d, [a;b], x^d, 1e-10); +}; + +// first coeff is fixed, iteratively find optimal double prec coeffs +poly = 1; +for i from 1 to deg/2 do { + p = roundcoefficients(approx(poly,2*i), [|double ...|]); + poly = poly + x^(2*i)*coeff(p,0); +}; + +display = hexadecimal; +//print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); +//print("abs error:", accurateinfnorm(f(x)-poly(x), [a;b], 30)); +print("in [",a,b,"]"); +print("coeffs:"); +for i from 0 to deg do coeff(poly,i); diff --git a/contrib/arm-optimized-routines/math/tools/sincosf.sollya b/contrib/arm-optimized-routines/math/tools/sincosf.sollya new file mode 100644 index 000000000000..add874e87a9a --- /dev/null +++ b/contrib/arm-optimized-routines/math/tools/sincosf.sollya @@ -0,0 +1,33 @@ +// polynomial for approximating cos(x) +// +// Copyright (c) 2023-2024, Arm Limited. +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +// This script only finds the coeffs for cos - see math/tools/sin.sollya for sin coeffs. + +deg = 8; // polynomial degree +a = -pi/4; // interval +b = pi/4; + +// find even polynomial with minimal abs error compared to cos(x) + +f = cos(x); + +// return p that minimizes |f(x) - poly(x) - x^d*p(x)| +approx = proc(poly,d) { + return remez(f(x)-poly(x), deg-d, [a;b], x^d, 1e-10); +}; + +// first coeff is fixed, iteratively find optimal double prec coeffs +poly = 1; +for i from 1 to deg/2 do { + p = roundcoefficients(approx(poly,2*i), [|single ...|]); + poly = poly + x^(2*i)*coeff(p,0); +}; + +display = hexadecimal; +//print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); +//print("abs error:", accurateinfnorm(f(x)-poly(x), [a;b], 30)); +print("in [",a,b,"]"); +print("coeffs:"); +for i from 0 to deg do coeff(poly,i); diff --git a/contrib/arm-optimized-routines/math/tools/sinpi.sollya b/contrib/arm-optimized-routines/math/tools/sinpi.sollya new file mode 100644 index 000000000000..9bc5b1c7fc2a --- /dev/null +++ b/contrib/arm-optimized-routines/math/tools/sinpi.sollya @@ -0,0 +1,33 @@ +// polynomial for approximating sinpi(x) +// +// Copyright (c) 2023-2024, Arm Limited. +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +deg = 19; // polynomial degree +a = -1/2; // interval +b = 1/2; + +// find even polynomial with minimal abs error compared to sinpi(x) + +// f = sin(pi* x); +f = pi*x; +c = 1; +for i from 1 to 80 do { c = 2*i*(2*i + 1)*c; f = f + (-1)^i*(pi*x)^(2*i+1)/c; }; + +// return p that minimizes |f(x) - poly(x) - x^d*p(x)| +approx = proc(poly,d) { + return remez(f(x)-poly(x), deg-d, [a;b], x^d, 1e-10); +}; + +// first coeff is predefine, iteratively find optimal double prec coeffs +poly = pi*x; +for i from 0 to (deg-1)/2 do { + p = roundcoefficients(approx(poly,2*i+1), [|D ...|]); + poly = poly + x^(2*i+1)*coeff(p,0); +}; + +display = hexadecimal; +print("abs error:", accurateinfnorm(sin(pi*x)-poly(x), [a;b], 30)); +print("in [",a,b,"]"); +print("coeffs:"); +for i from 0 to deg do coeff(poly,i); diff --git a/contrib/arm-optimized-routines/math/tools/tan.sollya b/contrib/arm-optimized-routines/math/tools/tan.sollya new file mode 100644 index 000000000000..ca8a170bedaa --- /dev/null +++ b/contrib/arm-optimized-routines/math/tools/tan.sollya @@ -0,0 +1,20 @@ +// polynomial for approximating double precision tan(x) +// +// Copyright (c) 2023-2024, Arm Limited. +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +deg = 8; + +// interval bounds +a = 0x1.0p-126; +b = pi / 8; + +display = hexadecimal; + +f = (tan(sqrt(x))-sqrt(x))/x^(3/2); +poly = fpminimax(f, deg, [|double ...|], [a*a;b*b]); + +//print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); +print("in [",a,b,"]"); +print("coeffs:"); +for i from 0 to deg do coeff(poly,i); diff --git a/contrib/arm-optimized-routines/math/tools/tanf.sollya b/contrib/arm-optimized-routines/math/tools/tanf.sollya new file mode 100644 index 000000000000..054d3db44046 --- /dev/null +++ b/contrib/arm-optimized-routines/math/tools/tanf.sollya @@ -0,0 +1,78 @@ +// polynomial for approximating single precision tan(x) +// +// Copyright (c) 2022-2024, Arm Limited. +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +dtype = single; + +mthd = 0; // approximate tan +deg = 5; // poly degree + +// // Uncomment for cotan +// mthd = 1; // approximate cotan +// deg = 3; // poly degree + +// interval bounds +a = 0x1.0p-126; +b = pi / 4; + +print("Print some useful constants"); +display = hexadecimal!; +if (dtype==double) then { prec = 53!; } +else if (dtype==single) then { prec = 23!; }; + +print("pi/4"); +pi/4; + +// Setup precisions (display and computation) +display = decimal!; +prec=128!; +save_prec=prec; + +// +// Select function to approximate with Sollya +// +if(mthd==0) then { + s = "x + x^3 * P(x^2)"; + g = tan(x); + F = proc(P) { return x + x^3 * P(x^2); }; + f = (g(sqrt(x))-sqrt(x))/(x*sqrt(x)); + init_poly = 0; + // Display info + print("Approximate g(x) =", g, "as F(x)=", s, "."); + poly = fpminimax(f, deg, [|dtype ...|], [a*a;b*b]); +} +else if (mthd==1) then { + s = "1/x + x * P(x^2)"; + g = 1 / tan(x); + F = proc(P) { return 1/x + x * P(x^2); }; + f = (g(sqrt(x))-1/sqrt(x))/(sqrt(x)); + init_poly = 0; + deg_init_poly = -1; // a value such that we actually start by building constant coefficient + // Display info + print("Approximate g(x) =", g, "as F(x)=", s, "."); + // Fpminimax used to minimise absolute error + approx_fpminimax = proc(func, poly, d) { + return fpminimax(func - poly / x^-(deg-d), 0, [|dtype|], [a;b], absolute, floating); + }; + // Optimise all coefficients at once + poly = fpminimax(f, [|0,...,deg|], [|dtype ...|], [a;b], absolute, floating); +}; + + +// +// Display coefficients in Sollya +// +display = hexadecimal!; +if (dtype==double) then { prec = 53!; } +else if (dtype==single) then { prec = 23!; }; +print("_coeffs :_ hex"); +for i from 0 to deg do coeff(poly, i); + +// Compute errors +display = hexadecimal!; +d_rel_err = dirtyinfnorm(1-F(poly)/g(x), [a;b]); +d_abs_err = dirtyinfnorm(g(x)-F(poly), [a;b]); +print("dirty rel error:", d_rel_err); +print("dirty abs error:", d_abs_err); +print("in [",a,b,"]"); diff --git a/contrib/arm-optimized-routines/math/tools/tanpi.sollya b/contrib/arm-optimized-routines/math/tools/tanpi.sollya new file mode 100644 index 000000000000..8edbc359ab8e --- /dev/null +++ b/contrib/arm-optimized-routines/math/tools/tanpi.sollya @@ -0,0 +1,48 @@ +// polynomial for approximating tanpi/f(x) +// +// Copyright (c) 2024, Arm Limited. +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +// 0 for tanpi/f [0,0.25], 1 for tanpi/f [0.25,1] +method = 0; +dtype = double; + +if (dtype == single) then { + if (method == 0) then { deg = 5; } + else if (method == 1) then { deg = 3; }; +} else if (dtype == double) then { + if (method == 0) then { deg = 13; } + else if (method == 1) then { deg = 8; }; +}; + +a = 0x1.0p-126; +b = 1/4; + +if (method == 0) then { + g = tan(pi * x); + F = proc(P) { return pi * x + x^3 * P(x^2); }; + f = (g(sqrt(x)) - pi * sqrt(x))/(x^(3/2)); +} else if (method == 1) then { + g = 1/tan(pi * x); + F = proc(P) { return 1/(pi * x) + x * P(x^2); }; + f = (g(sqrt(x)) / sqrt(x)) - 1/(pi * x); +}; + +poly = fpminimax(f, deg, [|dtype ...|], [a*a;b*b]); + +// +// Display coefficients in Sollya +// +display = hexadecimal!; +if (dtype==double) then { prec = 53!; } +else if (dtype==single) then { prec = 23!; }; +print("_coeffs :_ hex"); +for i from 0 to deg do coeff(poly, i); + +// Compute errors +//display = hexadecimal!; +d_rel_err = dirtyinfnorm(1-F(poly)/g(x), [a;b]); +d_abs_err = dirtyinfnorm(g(x)-F(poly), [a;b]); +print("dirty rel error:", d_rel_err); +print("dirty abs error:", d_abs_err); +print("in [",a,b,"]"); diff --git a/contrib/arm-optimized-routines/math/tools/tgamma128_gen.jl b/contrib/arm-optimized-routines/math/tools/tgamma128_gen.jl new file mode 100644 index 000000000000..ecec174110ea --- /dev/null +++ b/contrib/arm-optimized-routines/math/tools/tgamma128_gen.jl @@ -0,0 +1,212 @@ +# -*- julia -*- +# +# Generate tgamma128.h, containing polynomials and constants used by +# tgamma128.c. +# +# Copyright (c) 2006-2023, Arm Limited. +# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +# This Julia program depends on the 'Remez' and 'SpecialFunctions' +# library packages. To install them, run this at the interactive Julia +# prompt: +# +# import Pkg; Pkg.add(["Remez", "SpecialFunctions"]) +# +# Tested on Julia 1.4.1 (Ubuntu 20.04) and 1.9.0 (22.04). + +import Printf +import Remez +import SpecialFunctions + +# Round a BigFloat to 128-bit long double and format it as a C99 hex +# float literal. +function quadhex(x) + sign = " " + if x < 0 + sign = "-" + x = -x + end + + exponent = BigInt(floor(log2(x))) + exponent = max(exponent, -16382) + @assert(exponent <= 16383) # else overflow + + x /= BigFloat(2)^exponent + @assert(1 <= x < 2) + x *= BigFloat(2)^112 + mantissa = BigInt(round(x)) + + mantstr = string(mantissa, base=16, pad=29) + return Printf.@sprintf("%s0x%s.%sp%+dL", sign, mantstr[1], mantstr[2:end], + exponent) +end + +# Round a BigFloat to 128-bit long double and return it still as a +# BigFloat. +function quadval(x, round=0) + sign = +1 + if x.sign < 0 + sign = -1 + x = -x + end + + exponent = BigInt(floor(log2(x))) + exponent = max(exponent, -16382) + @assert(exponent <= 16383) # else overflow + + x /= BigFloat(2)^exponent + @assert(1 <= x < 2) + x *= BigFloat(2)^112 + if round < 0 + mantissa = floor(x) + elseif round > 0 + mantissa = ceil(x) + else + mantissa = round(x) + end + + return sign * mantissa * BigFloat(2)^(exponent - 112) +end + +# Output an array of BigFloats as a C array declaration. +function dumparray(a, name) + println("static const long double ", name, "[] = {") + for x in N + println(" ", quadhex(x), ",") + end + println("};") +end + +print("/* + * Polynomial coefficients and other constants for tgamma128.c. + * + * Copyright (c) 2006,2009,2023 Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ +") + +Base.MPFR.setprecision(512) + +e = exp(BigFloat(1)) + +print(" +/* The largest positive value for which 128-bit tgamma does not overflow. */ +") +lo = BigFloat("1000") +hi = BigFloat("2000") +while true + global lo + global hi + global max_x + + mid = (lo + hi) / 2 + if mid == lo || mid == hi + max_x = mid + break + end + if SpecialFunctions.logabsgamma(mid)[1] < 16384 * log(BigFloat(2)) + lo = mid + else + hi = mid + end +end +max_x = quadval(max_x, -1) +println("static const long double max_x = ", quadhex(max_x), ";") + +print(" +/* Coefficients of the polynomial used in the tgamma_large() subroutine */ +") +N, D, E, X = Remez.ratfn_minimax( + x -> x==0 ? sqrt(BigFloat(2)*pi/e) : + exp(SpecialFunctions.logabsgamma(1/x)[1] + + (1/x-0.5)*(1+log(x))), + (0, 1/BigFloat(8)), + 24, 0, + (x, y) -> 1/y +) +dumparray(N, "coeffs_large") + +print(" +/* Coefficients of the polynomial used in the tgamma_tiny() subroutine */ +") +N, D, E, X = Remez.ratfn_minimax( + x -> x==0 ? 1 : 1/(x*SpecialFunctions.gamma(x)), + (0, 1/BigFloat(32)), + 13, 0, +) +dumparray(N, "coeffs_tiny") + +print(" +/* The location within the interval [1,2] where gamma has a minimum. + * Specified as the sum of two 128-bit values, for extra precision. */ +") +lo = BigFloat("1.4") +hi = BigFloat("1.5") +while true + global lo + global hi + global min_x + + mid = (lo + hi) / 2 + if mid == lo || mid == hi + min_x = mid + break + end + if SpecialFunctions.digamma(mid) < 0 + lo = mid + else + hi = mid + end +end +min_x_hi = quadval(min_x, -1) +println("static const long double min_x_hi = ", quadhex(min_x_hi), ";") +println("static const long double min_x_lo = ", quadhex(min_x - min_x_hi), ";") + +print(" +/* The actual minimum value that gamma takes at that location. + * Again specified as the sum of two 128-bit values. */ +") +min_y = SpecialFunctions.gamma(min_x) +min_y_hi = quadval(min_y, -1) +println("static const long double min_y_hi = ", quadhex(min_y_hi), ";") +println("static const long double min_y_lo = ", quadhex(min_y - min_y_hi), ";") + +function taylor_bodge(x) + # Taylor series generated by Wolfram Alpha for (gamma(min_x+x)-min_y)/x^2. + # Used in the Remez calls below for x values very near the origin, to avoid + # significance loss problems when trying to compute it directly via that + # formula (even in MPFR's extra precision). + return BigFloat("0.428486815855585429730209907810650582960483696962660010556335457558784421896667728014324097132413696263704801646004585959298743677879606168187061990204432200")+x*(-BigFloat("0.130704158939785761928008749242671025181542078105370084716141350308119418619652583986015464395882363802104154017741656168641240436089858504560718773026275797")+x*(BigFloat("0.160890753325112844190519489594363387594505844658437718135952967735294789599989664428071656484587979507034160383271974554122934842441540146372016567834062876")+x*(-BigFloat("0.092277030213334350126864106458600575084335085690780082222880945224248438672595248111704471182201673989215223667543694847795410779036800385804729955729659506")))) +end + +print(" +/* Coefficients of the polynomial used in the tgamma_central() subroutine + * for computing gamma on the interval [1,min_x] */ +") +N, D, E, X = Remez.ratfn_minimax( + x -> x < BigFloat(0x1p-64) ? taylor_bodge(-x) : + (SpecialFunctions.gamma(min_x - x) - min_y) / (x*x), + (0, min_x - 1), + 31, 0, + (x, y) -> x^2, +) +dumparray(N, "coeffs_central_neg") + +print(" +/* Coefficients of the polynomial used in the tgamma_central() subroutine + * for computing gamma on the interval [min_x,2] */ +") +N, D, E, X = Remez.ratfn_minimax( + x -> x < BigFloat(0x1p-64) ? taylor_bodge(x) : + (SpecialFunctions.gamma(min_x + x) - min_y) / (x*x), + (0, 2 - min_x), + 28, 0, + (x, y) -> x^2, +) +dumparray(N, "coeffs_central_pos") + +print(" +/* 128-bit float value of pi, used by the sin_pi_x_over_pi subroutine + */ +") +println("static const long double pi = ", quadhex(BigFloat(pi)), ";") diff --git a/contrib/arm-optimized-routines/math/tools/v_erf.sollya b/contrib/arm-optimized-routines/math/tools/v_erf.sollya new file mode 100644 index 000000000000..5d7795842bcd --- /dev/null +++ b/contrib/arm-optimized-routines/math/tools/v_erf.sollya @@ -0,0 +1,20 @@ +// polynomial for approximating erf(x). +// To generate coefficients for interval i (0 to 47) do: +// $ sollya v_erf.sollya $i +// +// Copyright (c) 2022-2024, Arm Limited. +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +scale = 1/8; +deg = 9; + +itv = parse(__argv[0]); +if (itv == 0) then { a = 0x1p-1022; } +else { a = itv * scale; }; + +prec=256; + +poly = fpminimax(erf(scale*x+a), deg, [|D ...|], [0; 1]); + +display = hexadecimal; +for i from 0 to deg do coeff(poly, i);
\ No newline at end of file diff --git a/contrib/arm-optimized-routines/math/tools/v_erfc.sollya b/contrib/arm-optimized-routines/math/tools/v_erfc.sollya new file mode 100644 index 000000000000..764b333d6d25 --- /dev/null +++ b/contrib/arm-optimized-routines/math/tools/v_erfc.sollya @@ -0,0 +1,46 @@ +// polynomial for approximating erfc(x)*exp(x*x) +// +// Copyright (c) 2022-2024, Arm Limited. +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +deg = 12; // poly degree + +itv = parse(__argv[0]); + +bounds = [|3.725290298461914e-9, + 0.18920711500272103, + 0.41421356237309515, + 0.681792830507429, + 1, + 1.378414230005442, + 1.8284271247461903, + 2.363585661014858, + 3, + 3.756828460010884, + 4.656854249492381, + 5.727171322029716, + 7, + 8.513656920021768, + 10.313708498984761, + 12.454342644059432, + 15, + 18.027313840043536, + 21.627416997969522, + 25.908685288118864, + 31|]; + +a = bounds[itv]; +b = bounds[itv + 1]; + +f = proc(y) { + t = y + a; + return erfc(t) * exp(t*t); +}; + +poly = fpminimax(f(x), deg, [|double ...|], [0;b-a]); + +display = hexadecimal; +print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); +print("in [",a,b,"]"); +print("coeffs:"); +for i from 0 to deg do coeff(poly, i); diff --git a/contrib/arm-optimized-routines/math/tools/v_exp.sollya b/contrib/arm-optimized-routines/math/tools/v_exp.sollya index c0abb63fb642..5fa7de7435a9 100644 --- a/contrib/arm-optimized-routines/math/tools/v_exp.sollya +++ b/contrib/arm-optimized-routines/math/tools/v_exp.sollya @@ -1,7 +1,7 @@ // polynomial for approximating e^x // // Copyright (c) 2019, Arm Limited. -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception deg = 4; // poly degree N = 128; // table entries diff --git a/contrib/arm-optimized-routines/math/tools/v_log.sollya b/contrib/arm-optimized-routines/math/tools/v_log.sollya index cc3d2c4ae72a..d982524eb920 100644 --- a/contrib/arm-optimized-routines/math/tools/v_log.sollya +++ b/contrib/arm-optimized-routines/math/tools/v_log.sollya @@ -1,7 +1,7 @@ // polynomial used for __v_log(x) // // Copyright (c) 2019, Arm Limited. -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception deg = 6; // poly degree a = -0x1.fc1p-9; diff --git a/contrib/arm-optimized-routines/math/tools/v_log10.sollya b/contrib/arm-optimized-routines/math/tools/v_log10.sollya new file mode 100644 index 000000000000..5181074f6762 --- /dev/null +++ b/contrib/arm-optimized-routines/math/tools/v_log10.sollya @@ -0,0 +1,38 @@ +// polynomial used for __v_log10(x) +// +// Copyright (c) 2019-2024, Arm Limited. +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +deg = 6; // poly degree +a = -0x1.fc1p-9; +b = 0x1.009p-8; + +// find log(1+x)/x polynomial with minimal relative error +// (minimal relative error polynomial for log(1+x) is the same * x) +deg = deg-1; // because of /x + +// f = log(1+x)/x; using taylor series +f = 0; +for i from 0 to 60 do { f = f + (-x)^i/(i+1); }; + +// return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)| +approx = proc(poly,d) { + return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10); +}; + +// first coeff is fixed, iteratively find optimal double prec coeffs +poly = 1; +for i from 1 to deg do { + p = roundcoefficients(approx(poly,i), [|D ...|]); + poly = poly + x^i*coeff(p,0); +}; + +// scale coefficients by 1/ln(10) +ln10 = evaluate(log(10),0); +poly = poly/ln10; + +display = hexadecimal; +print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); +print("in [",a,b,"]"); +print("coeffs:"); +for i from 0 to deg do double(coeff(poly,i)); diff --git a/contrib/arm-optimized-routines/math/tools/v_log10f.sollya b/contrib/arm-optimized-routines/math/tools/v_log10f.sollya new file mode 100644 index 000000000000..4906cb1d2137 --- /dev/null +++ b/contrib/arm-optimized-routines/math/tools/v_log10f.sollya @@ -0,0 +1,45 @@ +// polynomial for approximating v_log10f(1+x) +// +// Copyright (c) 2019-2024, Arm Limited. +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +deg = 9; // poly degree +// |log10(1+x)| > 0x1p-4 outside the interval +a = -1/3; +b = 1/3; + +display = hexadecimal; +print("log10(2) = ", single(log10(2))); + +ln10 = evaluate(log(10),0); +invln10 = single(1/ln10); + +// find log10(1+x)/x polynomial with minimal relative error +// (minimal relative error polynomial for log10(1+x) is the same * x) +deg = deg-1; // because of /x + +// f = log(1+x)/x; using taylor series +f = 0; +for i from 0 to 60 do { f = f + (-x)^i/(i+1); }; +f = f/ln10; + +// return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)| +approx = proc(poly,d) { + return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10); +}; + +// first coeff is fixed, iteratively find optimal double prec coeffs +poly = invln10; +for i from 1 to deg do { + p = roundcoefficients(approx(poly,i), [|SG ...|]); + poly = poly + x^i*coeff(p,0); +}; +display = hexadecimal; +print("invln10:", invln10); +print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); +print("in [",a,b,"]"); +print("coeffs:"); +for i from 0 to deg do single(coeff(poly,i)); + +display = decimal; +print("in [",a,b,"]"); diff --git a/contrib/arm-optimized-routines/math/tools/v_log2f.sollya b/contrib/arm-optimized-routines/math/tools/v_log2f.sollya new file mode 100644 index 000000000000..337d4830a2ae --- /dev/null +++ b/contrib/arm-optimized-routines/math/tools/v_log2f.sollya @@ -0,0 +1,38 @@ +// polynomial used for __v_log2f(x) +// +// Copyright (c) 2022-2024, Arm Limited. +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +deg = 9; // poly degree +a = -1/3; +b = 1/3; + +ln2 = evaluate(log(2),0); +invln2 = single(1/ln2); + +// find log2(1+x)/x polynomial with minimal relative error +// (minimal relative error polynomial for log2(1+x) is the same * x) +deg = deg-1; // because of /x + +// f = log2(1+x)/x; using taylor series +f = 0; +for i from 0 to 60 do { f = f + (-x)^i/(i+1); }; +f = f * invln2; + +// return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)| +approx = proc(poly,d) { + return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10); +}; + +// first coeff is fixed, iteratively find optimal double prec coeffs +poly = invln2; +for i from 1 to deg do { + p = roundcoefficients(approx(poly,i), [|SG ...|]); + poly = poly + x^i*coeff(p,0); +}; + +display = hexadecimal; +print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); +print("in [",a,b,"]"); +print("coeffs:"); +for i from 0 to deg do coeff(poly,i); diff --git a/contrib/arm-optimized-routines/math/tools/v_sin.sollya b/contrib/arm-optimized-routines/math/tools/v_sin.sollya index 65cc9957c624..63b9d65a1ac3 100644 --- a/contrib/arm-optimized-routines/math/tools/v_sin.sollya +++ b/contrib/arm-optimized-routines/math/tools/v_sin.sollya @@ -1,7 +1,7 @@ // polynomial for approximating sin(x) // // Copyright (c) 2019, Arm Limited. -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception deg = 15; // polynomial degree a = -pi/2; // interval |