aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSteve Kargl <kargl@FreeBSD.org>2024-08-29 19:44:48 +0000
committerDimitry Andric <dim@FreeBSD.org>2024-09-08 07:37:52 +0000
commit33c82f11c2674beb50304b14248ff46372def520 (patch)
tree3ccd440bd8b92c3ed83edbdeceeea9b02a675f86
parent0b683336b211e74b6d7f21ecf517c6591370b793 (diff)
Improve accuracy of asinf(3) and acosf(3)
This uses a better rational approximation to improve the accuracy of both functions. For exhaustive testing of asinf(3) in the interval, the current libm gives: % ./tlibm asin -fPED -x 0x1p-12f -X 1 Interval tested for asinf: [0.000244141,1] ulp <= 0.5: 97.916% 98564994 | 97.916% 98564994 0.5 < ulp < 0.6: 2.038% 2051023 | 99.953% 100616017 0.6 < ulp < 0.7: 0.047% 47254 | 100.000% 100663271 0.7 < ulp < 0.8: 0.000% 25 | 100.000% 100663296 Max ulp: 0.729891 at 5.00732839e-01 which isn't too bad given that much of the computation is actually done in double floating point. With the new rational approximation, exhaustive testing yields: % ./tlibm asin -fPED -x 0x1p-12f -X 1 Interval tested for asinf: [0.000244141,1] ulp <= 0.5: 99.711% 100372643 | 99.711% 100372643 0.5 < ulp < 0.6: 0.288% 290357 | 100.000% 100663000 0.6 < ulp < 0.7: 0.000% 296 | 100.000% 100663296 Max ulp: 0.636344 at 5.09706438e-01 Similarly, for exhaustive testing of asinf(3) in the interval, the current libm gives: % ./tlibm acos -fPED -x -1 -X -0x1p-12f Interval tested for acosf: [-1,-0.000244141] ulp <= 0.5: 97.008% 97651921 | 97.008% 97651921 0.5 < ulp < 0.6: 2.441% 2457242 | 99.450% 100109163 0.6 < ulp < 0.7: 0.472% 475503 | 99.922% 100584666 0.7 < ulp < 0.8: 0.071% 71309 | 99.993% 100655975 0.8 < ulp < 0.9: 0.007% 7319 | 100.000% 100663294 0.9 < ulp < 1.0: 0.000% 2 | 100.000% 100663296 Max ulp: 0.914007 at -5.01484931e-01 % ./tlibm acos -fPED -x 0x1p-12f -X 1 Interval tested for acosf: [0.000244141,1] ulp <= 0.5: 97.317% 97962530 | 97.317% 97962530 0.5 < ulp < 0.6: 2.340% 2355182 | 99.657% 100317712 0.6 < ulp < 0.7: 0.314% 316134 | 99.971% 100633846 0.7 < ulp < 0.8: 0.029% 29450 | 100.000% 100663296 Max ulp: 0.796035 at 4.99814630e-01 With the new rational approximation, exhaustive testing yields: % ./tlibm acos -fPED -x -1 -X -0x1p-12f Interval tested for acosf: [-1,-0.000244141] ulp <= 0.5: 97.010% 97653245 | 97.010% 97653245 0.5 < ulp < 0.6: 2.442% 2458373 | 99.452% 100111618 0.6 < ulp < 0.7: 0.473% 476012 | 99.925% 100587630 0.7 < ulp < 0.8: 0.068% 68603 | 99.993% 100656233 0.8 < ulp < 0.9: 0.007% 7063 | 100.000% 100663296 Max ulp: 0.896189 at -5.04511118e-01 % ./tlibm acos -fPED -x 0x1p-12f -X 1 Interval tested for acosf: [0.000244141,1] ulp <= 0.5: 97.650% 98298175 | 97.650% 98298175 0.5 < ulp < 0.6: 2.028% 2041709 | 99.679% 100339884 0.6 < ulp < 0.7: 0.292% 293555 | 99.970% 100633439 0.7 < ulp < 0.8: 0.030% 29857 | 100.000% 100663296 Max ulp: 0.775875 at 4.91849005e-01 PR: 281001 MFC after: 1 week (cherry picked from commit 41e016289f77deb88b0ef1ec3f7b2ab3515ac7c8)
-rw-r--r--lib/msun/src/e_acosf.c20
-rw-r--r--lib/msun/src/e_asinf.c22
2 files changed, 27 insertions, 15 deletions
diff --git a/lib/msun/src/e_acosf.c b/lib/msun/src/e_acosf.c
index 42ba126d1baa..ede552e7055a 100644
--- a/lib/msun/src/e_acosf.c
+++ b/lib/msun/src/e_acosf.c
@@ -22,11 +22,17 @@ pi = 3.1415925026e+00, /* 0x40490fda */
pio2_hi = 1.5707962513e+00; /* 0x3fc90fda */
static volatile float
pio2_lo = 7.5497894159e-08; /* 0x33a22168 */
+
+/*
+ * The coefficients for the rational approximation were generated over
+ * 0x1p-12f <= x <= 0.5f. The maximum error satisfies log2(e) < -30.084.
+ */
static const float
-pS0 = 1.6666586697e-01,
-pS1 = -4.2743422091e-02,
-pS2 = -8.6563630030e-03,
-qS1 = -7.0662963390e-01;
+pS0 = 1.66666672e-01f, /* 0x3e2aaaab */
+pS1 = -1.19510300e-01f, /* 0xbdf4c1d1 */
+pS2 = 5.47002675e-03f, /* 0x3bb33de9 */
+qS1 = -1.16706085e+00f, /* 0xbf956240 */
+qS2 = 2.90115148e-01f; /* 0x3e9489f9 */
float
acosf(float x)
@@ -46,13 +52,13 @@ acosf(float x)
if(ix<=0x32800000) return pio2_hi+pio2_lo;/*if|x|<2**-26*/
z = x*x;
p = z*(pS0+z*(pS1+z*pS2));
- q = one+z*qS1;
+ q = one+z*(qS1+z*qS2);
r = p/q;
return pio2_hi - (x - (pio2_lo-x*r));
} else if (hx<0) { /* x < -0.5 */
z = (one+x)*(float)0.5;
p = z*(pS0+z*(pS1+z*pS2));
- q = one+z*qS1;
+ q = one+z*(qS1+z*qS2);
s = sqrtf(z);
r = p/q;
w = r*s-pio2_lo;
@@ -66,7 +72,7 @@ acosf(float x)
SET_FLOAT_WORD(df,idf&0xfffff000);
c = (z-df*df)/(s+df);
p = z*(pS0+z*(pS1+z*pS2));
- q = one+z*qS1;
+ q = one+z*(qS1+z*qS2);
r = p/q;
w = r*s+c;
return (float)2.0*(df+w);
diff --git a/lib/msun/src/e_asinf.c b/lib/msun/src/e_asinf.c
index a2ee1a166f03..8d1aca27df83 100644
--- a/lib/msun/src/e_asinf.c
+++ b/lib/msun/src/e_asinf.c
@@ -18,12 +18,18 @@
static const float
one = 1.0000000000e+00, /* 0x3F800000 */
-huge = 1.000e+30,
- /* coefficient for R(x^2) */
-pS0 = 1.6666586697e-01,
-pS1 = -4.2743422091e-02,
-pS2 = -8.6563630030e-03,
-qS1 = -7.0662963390e-01;
+huge = 1.000e+30;
+
+/*
+ * The coefficients for the rational approximation were generated over
+ * 0x1p-12f <= x <= 0.5f. The maximum error satisfies log2(e) < -30.084.
+ */
+static const float
+pS0 = 1.66666672e-01f, /* 0x3e2aaaab */
+pS1 = -1.19510300e-01f, /* 0xbdf4c1d1 */
+pS2 = 5.47002675e-03f, /* 0x3bb33de9 */
+qS1 = -1.16706085e+00f, /* 0xbf956240 */
+qS2 = 2.90115148e-01f; /* 0x3e9489f9 */
static const double
pio2 = 1.570796326794896558e+00;
@@ -46,7 +52,7 @@ asinf(float x)
}
t = x*x;
p = t*(pS0+t*(pS1+t*pS2));
- q = one+t*qS1;
+ q = one+t*(qS1+t*qS2);
w = p/q;
return x+x*w;
}
@@ -54,7 +60,7 @@ asinf(float x)
w = one-fabsf(x);
t = w*(float)0.5;
p = t*(pS0+t*(pS1+t*pS2));
- q = one+t*qS1;
+ q = one+t*(qS1+t*qS2);
s = sqrt(t);
w = p/q;
t = pio2-2.0*(s+s*w);